📄 ch 07 - merging data with relational algebra.sql
字号:
VALUES( 'C', 'Z', 9)
INSERT RawData (X,Y,Data)
VALUES( 'D', 'Z', 10)
INSERT RawData (X,Y,Data)
VALUES( 'A', 'X', 2)
INSERT RawData (X,Y,Data)
VALUES( 'A', 'X', 3)
INSERT RawData (X,Y,Data)
VALUES( 'A', 'Y', 2)
INSERT RawData (X,Y,Data)
VALUES( 'A', 'Y', 5)
INSERT RawData (X,Y,Data)
VALUES( 'A', 'Y', 1)
INSERT RawData (X,Y,Data)
VALUES( 'D', 'Y', 50)
SELECT * FROM RawData
SELECT R.Y,
(SELECT SUM(Data)
FROM RawData
WHERE X = 'A' AND Y = R.Y) AS 'A',
(SELECT SUM(Data)
FROM RawData
WHERE X = 'B' AND Y = R.Y) AS 'B',
(SELECT SUM(Data)
FROM RawData
WHERE X = 'C' AND Y = R.Y) AS 'C',
(SELECT SUM(Data)
FROM RawData
WHERE X = 'D' AND Y = R.Y) AS 'D'
FROM RawData R
GROUP BY Y
-------------------------------------------------
-- Union
USE TempDB
SELECT OnePK, Thing1, 'from One' as Source
FROM dbo.One
UNION ALL
SELECT TwoPK, Thing2, 'from Two'
FROM dbo.Two
ORDER BY Thing1
-- Intersect Union
-- rows common to both tables
-- first create red thing and blue thing to two so there will be an intersection
INSERT dbo.Two(TwoPK, OnePK, Thing2)
VALUES(5,0, 'Red Thing')
INSERT dbo.Two(TwoPK, OnePK, Thing2)
VALUES(6,0, 'Blue Thing')
SELECT DISTINCT U.Thing1
FROM
(SELECT DISTINCT Thing1
FROM dbo.One
UNION ALL
SELECT DISTINCT Thing2
FROM dbo.Two) U
GROUP BY Thing1
HAVING Count(*) >1
-- Difference Union
SELECT Thing1
FROM
(SELECT DISTINCT Thing1
FROM dbo.One
UNION ALL
SELECT DISTINCT Thing2
FROM dbo.Two) U
GROUP BY Thing1
HAVING Count(*) = 1
SELECT Thing1
FROM
(SELECT DISTINCT Thing1
FROM dbo.One
UNION ALL
SELECT DISTINCT Thing1
FROM dbo.One
UNION ALL
SELECT DISTINCT Thing2
FROM dbo.Two) U
GROUP BY Thing1
HAVING Count(*) = 2
-------------------------------------------------
-- Relational Division
-- Exact Relational Division
go
USE OBXKites
go
DECLARE @OrderNumber INT
-- First Person orders exactly all toys
EXEC pOrder_AddNew
@ContactCode = '110',
@EmployeeCode = '120',
@LocationCode = 'CH',
@OrderDate='6/1/2002',
@OrderNumber = @OrderNumber output
EXEC pOrder_AddItem
@OrderNumber = @OrderNumber, -- must be a valid, open order. Get OrderNumber from pOrder_AddNew
@Code = '1049', -- if NULL then non-stock Product text description
@NonStockProduct = NULL,
@Quantity = 1, -- required
@UnitPrice = NULL, -- if Null then the sproc will lookup the correct current price for the customer
@ShipRequestDate = NULL, -- if NULL then today
@ShipComment = NULL
EXEC pOrder_AddItem @OrderNumber, '1050', NULL, 1, NULL, NULL, NULL
-- Second Person - has ordered exactly all toys - 1050 twice
EXEC pOrder_AddNew '111', '119', 'JR', '6/1/2002', @OrderNumber output
EXEC pOrder_AddItem @OrderNumber, '1049', NULL, 1, NULL, NULL, NULL
EXEC pOrder_AddItem @OrderNumber, '1050', NULL, 1, NULL, NULL, NULL
EXEC pOrder_AddNew '111', '119', 'JR', '6/1/2002', @OrderNumber output
EXEC pOrder_AddItem @OrderNumber, '1050', NULL, 1, NULL, NULL, NULL
-- Third Person - has order all toys plus some others
EXEC pOrder_AddNew '112', '119', 'JR', '6/1/2002', @OrderNumber output
EXEC pOrder_AddItem @OrderNumber, '1049', NULL, 1, NULL, NULL, NULL
EXEC pOrder_AddItem @OrderNumber, '1050', NULL, 1, NULL, NULL, NULL
EXEC pOrder_AddItem @OrderNumber, '1001', NULL, 1, NULL, NULL, NULL
EXEC pOrder_AddItem @OrderNumber, '1002', NULL, 1, NULL, NULL, NULL
-- Fourth Person - has order one toy
EXEC pOrder_AddNew '113', '119', 'JR', '6/1/2002', @OrderNumber output
EXEC pOrder_AddItem @OrderNumber, '1049', NULL, 1, NULL, NULL, NULL
SELECT *
FROM dbo.[order]
WHERE OrderDate = '6/1/2002'
--Relational Division with remainder
-- Is number of toys ordered...
SELECT Contact.ContactCode
FROM dbo.Contact
JOIN dbo.[Order]
ON Contact.ContactID = [Order].ContactID
JOIN dbo.OrderDetail
ON [Order].OrderID = OrderDetail.OrderID
JOIN dbo.Product
ON OrderDetail.ProductID = Product.ProductID
JOIN dbo.ProductCategory
ON Product.ProductCategoryID = ProductCategory.ProductCategoryID
WHERE ProductCategory.ProductCategoryName = 'Toy'
GROUP BY Contact.ContactCode
HAVING COUNT(DISTINCT Product.Code) =
-- equal to number of toys available?
(SELECT Count(Code)
FROM dbo.Product
JOIN dbo.ProductCategory
ON Product.ProductCategoryID
= ProductCategory.ProductCategoryID
WHERE ProductCategory.ProductCategoryName = 'Toy')
-- Exact Relational Division
-- Is number of all products ordered...
SELECT Contact.ContactCode
FROM dbo.Contact
JOIN dbo.[Order]
ON Contact.ContactID = [Order].ContactID
JOIN dbo.OrderDetail
ON [Order].OrderID = OrderDetail.OrderID
JOIN dbo.Product
ON OrderDetail.ProductID = Product.ProductID
JOIN dbo.ProductCategory P1
ON Product.ProductCategoryID = P1.ProductCategoryID
JOIN
-- and number of toys ordered
(SELECT Contact.ContactCode, Product.Code
FROM dbo.Contact
JOIN dbo.[Order]
ON Contact.ContactID = [Order].ContactID
JOIN dbo.OrderDetail
ON [Order].OrderID = OrderDetail.OrderID
JOIN dbo.Product
ON OrderDetail.ProductID = Product.ProductID
JOIN dbo.ProductCategory
ON Product.ProductCategoryID = ProductCategory.ProductCategoryID
WHERE ProductCategory.ProductCategoryName = 'Toy') ToysOrdered
ON Contact.ContactCode = ToysOrdered.ContactCode
GROUP BY Contact.ContactCode
HAVING COUNT(DISTINCT Product.Code) =
-- equal to number of toys available?
(SELECT Count(Code)
FROM dbo.Product
JOIN dbo.ProductCategory
ON Product.ProductCategoryID
= ProductCategory.ProductCategoryID
WHERE ProductCategory.ProductCategoryName = 'Toy')
-- AND equal to the total number of any product ordered?
AND COUNT(DISTINCT ToysOrdered.Code) =
(SELECT Count(Code)
FROM dbo.Product
JOIN dbo.ProductCategory
ON Product.ProductCategoryID
= ProductCategory.ProductCategoryID
WHERE ProductCategory.ProductCategoryName = 'Toy')
-- Set Difference
USE Tempdb
SELECT Thing1, Thing2
FROM dbo.One
LEFT OUTER JOIN dbo.Two
ON One.OnePK = Two.OnePK
WHERE Two.TwoPK IS NULL
USE OBXKites
SELECT LastName, FirstName
FROM dbo.Contact
LEFT OUTER JOIN dbo.[Order]
ON Contact.ContactID = [Order].ContactID
WHERE OrderID IS NULL
ORDER BY LastName, FirstName
SELECT LastName, FirstName
FROM dbo.Contact
WHERE ContactID NOT IN
(SELECT ContactID FROM dbo. [Order])
ORDER BY LastName, FirstName
USE Tempdb
SELECT Thing1, Thing2
FROM dbo.One
FULL OUTER JOIN dbo.Two
ON One.OnePK = Two.OnePK
WHERE Two.TwoPK IS NULL
OR One.OnePK IS NULL
-------------------------------------------------
-- Query Story #1
-- Northwind's Inventory Problem
-- Step 1: Determine the Demand
USE Northwind
SELECT ProductID, SUM(Quantity)
FROM dbo.Orders
JOIN dbo.[Order Details]
ON Orders.OrderID = [Order details].OrderID
WHERE ShippedDate IS NULL
GROUP BY ProductID
-- Step 2: Compare it with the Available Product
SELECT Products.ProductID,
Demand.TotalDemand - (Products.Unitsinstock + Products.UnitsOnOrder) AS Short
FROM dbo.Products
JOIN (SELECT ProductID, SUM(Quantity) AS TotalDemand
FROM dbo.Orders
JOIN dbo.[Order Details]
ON Orders.OrderID = [Order details].OrderID
WHERE ShippedDate IS NULL
GROUP BY ProductID) Demand
ON Products.ProductID = Demand.ProductID
AND Demand.TotalDemand > Products.Unitsinstock + Products.UnitsOnOrder
ORDER BY Short DESC
-------------------------------------------------
-- Query Story #2
-- Denormalizing Time Sequences
go
USE Tempdb
IF EXISTS(SELECT * FROM SysObjects Where [Name] = 'Event')
DROP TABLE Event
go
CREATE TABLE dbo.Event (
EventID INT Identity(1,1) PRIMARY KEY NONCLUSTERED,
Person CHAR(3),
Start BIT, -- true if begin, false if end
[Time] DATETIME,
EndEventID INT
)
go
ALTER TABLE dbo.Event ADD CONSTRAINT
FK_Event_End FOREIGN KEY (EndEventID) REFERENCES dbo.Event (EventID)
go
sp_Help Event
-- Day One
INSERT dbo.Event(Person, Start, [Time]) -- start 1 end 2
VALUES ('Sue', 1, '20011029 8:00')
INSERT dbo.Event(Person, Start, [Time])
VALUES ('Sue', 0, '20011029 13:10')
INSERT dbo.Event(Person, Start, [Time]) -- 3, 4
VALUES ('Joe', 1, '20011029 8:01')
INSERT dbo.Event(Person, Start, [Time])
VALUES ('Joe', 0, '20011029 11:58')
INSERT dbo.Event(Person, Start, [Time]) -- 5, null
VALUES ('Joe', 1, '20011029 12:41')
-- Day Two
INSERT dbo.Event(Person, Start, [Time]) -- 6, 9
VALUES ('Joe', 1, '20011030 8:00') -- Joe forgot to logout last night
INSERT dbo.Event(Person, Start, [Time]) -- 7, 8
VALUES ('Sue', 1, '20011030 8:00')
INSERT dbo.Event(Person, Start, [Time])
VALUES ('Sue', 0, '20011030 12:00')
INSERT dbo.Event(Person, Start, [Time])
VALUES ('Joe', 0, '20011030 12:00')
INSERT dbo.Event(Person, Start, [Time]) -- 10, 12
VALUES ('Sue', 1, '20011030 12:36')
INSERT dbo.Event(Person, Start, [Time]) -- 11, 13
VALUES ('Joe', 1, '20011030 13:05')
INSERT dbo.Event(Person, Start, [Time])
VALUES ('Sue', 0, '20011030 16:30')
INSERT dbo.Event(Person, Start, [Time])
VALUES ('Joe', 0, '20011030 15:15')
-- Check the inserts
SELECT * FROM dbo.Event
-- This join matched all starts with ends for each person
-- It's a start but doesn't match up the start with the next end
SELECT *
FROM dbo.Event A
JOIN (SELECT *
FROM dbo.Event
WHERE Start = 0) B
ON A.Person = B.Person
WHERE A.Start = 1
-- Add a Join condition to filter only those ends which are after corresponding start
SELECT A.EventID, MIN(B.EventID) as MinEvent
FROM dbo.Event A
JOIN (
SELECT *
FROM dbo.Event
WHERE Start = 0) B
ON A.Person = B.Person
AND A.[Time] <= B.[Time]
WHERE A.Start = 1
GROUP BY A.EventID
-- There's still a problem, start 5 and 6 both have end 9
-- only the last start before an end should have an end
-- another groupby will select the max from the previous pass
SELECT MAX(C.StartID) AS MaxStart, C.EndID
FROM (
SELECT A.EventID AS StartID, Min(B.EventID) AS EndID
FROM dbo.Event A
JOIN (SELECT *
FROM dbo.Event
WHERE Start = 0) B
ON A.Person = B.Person
AND A.[Time] <= B.[Time]
WHERE A.Start = 1
GROUP BY A.EventID) C
GROUP BY C. EndID
-- Write the endID into the start row to indicate that row is a complete time record
-- AND filter for start rows without an EndActivityID
UPDATE dbo.Event
SET EndEventID = D.EndID
FROM dbo.Event
JOIN (SELECT MAX(C.StartID) AS StartID, C.EndID
FROM (SELECT A.EventID AS StartID, Min(B.EventID) AS EndID FROM dbo.Event A
JOIN (SELECT * FROM dbo.Event WHERE Start = 0 ) B
ON A.Person = B.Person
AND A.[Time] <= B.[Time]
WHERE A.Start = 1 AND A.EndEventID IS NULL
GROUP BY A.EventID) C
GROUP BY C. EndID) D
ON Event.EventID = D.StartID
-- Check the results
SELECT * FROM dbo.Event where Start = 1
-- From this data it's now easy to calulate the elapsed time
SELECT S.EventID, S.Person, S.[Time] as Start,
DateDiff(mi,S.[Time],E.[Time]) as ElapsedMinutes
FROM dbo.Event AS S
JOIN dbo.Event AS E
ON S.EndEventID = E.EventID
-- Check for Open Starts
SELECT *
FROM dbo.Event
WHERE EndEventID IS NULL
AND Start = 1
-------------------------------------------------
-- Query Story #3
-- The Stockbroker Problem
IF EXISTS(SELECT * FROM SysObjects Where [Name] = 'RatingsBroker1')
BEGIN
DROP TABLE RatingsBroker1
DROP TABLE RatingsBroker2
DROP TABLE RatingsBroker3
END
CREATE TABLE dbo.RatingsBroker1(
PK INT IDENTITY,
Ticker VARCHAR(10),
Rating VARCHAR(10)
)
CREATE TABLE dbo.RatingsBroker2(
PK INT IDENTITY,
Ticker VARCHAR(10),
Rating VARCHAR(10)
)
CREATE TABLE dbo.RatingsBroker3(
PK INT IDENTITY,
Ticker VARCHAR(10),
Rating VARCHAR(10)
)
INSERT dbo.RatingsBroker1(Ticker, Rating)
VALUES('ABC', 'Buy')
INSERT dbo.RatingsBroker1(Ticker, Rating)
VALUES('MSFT', 'Buy')
INSERT dbo.RatingsBroker1(Ticker, Rating)
VALUES('UAL', 'Sell')
INSERT dbo.RatingsBroker2(Ticker, Rating)
VALUES('ABC', 'Buy')
INSERT dbo.RatingsBroker2(Ticker, Rating)
VALUES('GENE', 'Hold')
INSERT dbo.RatingsBroker3(Ticker, Rating)
VALUES('ABC', 'Hold')
INSERT dbo.RatingsBroker3(Ticker, Rating)
VALUES('MSFT', 'Buy')
INSERT dbo.RatingsBroker3(Ticker, Rating)
VALUES('GENE', 'Sell')
-- Get list of tickers
SELECT Ticker FROM dbo.RatingsBroker1
UNION
SELECT Ticker FROM dbo.RatingsBroker2
UNION
SELECT Ticker FROM dbo.RatingsBroker3
-- query
SELECT U.Ticker,
RatingsBroker1.Rating AS B1,
RatingsBroker2.Rating AS B2,
RatingsBroker3.Rating AS B3
FROM (SELECT Ticker
FROM dbo.RatingsBroker1
UNION
SELECT Ticker
FROM dbo.RatingsBroker2
UNION
SELECT Ticker
FROM dbo.RatingsBroker3) U
LEFT JOIN RatingsBroker1 ON U.Ticker = RatingsBroker1.Ticker
LEFT JOIN RatingsBroker2 ON U.Ticker = RatingsBroker2.Ticker
LEFT JOIN RatingsBroker3 ON U.Ticker = RatingsBroker3.Ticker
ORDER BY U.Ticker
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -