--##################################################### --##################################################### --##################################################### -- We will use the results from this table to calculate -- precision and recall inside the database. -- -- We will first calculate the table -- JOINresult (where S=XX AND sim>=YY) WHERE EXISTS Baseline (where sim>=YY) -- Then for precision we divide with the size of the join -- and for recall with the size of the baseline --##################################################### CREATE TABLE [dbo].[Baseline] ( [tid1] int NOT NULL, [tid2] int NOT NULL, [sim] float NOT NULL, PRIMARY KEY (tid1,tid2), FOREIGN KEY (tid1) REFERENCES R1, FOREIGN KEY (tid2) REFERENCES R2 ) GO INSERT INTO Baseline(tid1, tid2, sim) SELECT T1.tid AS tid1, T2.tid AS tid2, SUM(T1.weight*T2.weight) AS sim FROM R1Weights T1, R2Weights T2 WHERE T1.token = T2.token GROUP BY T1.tid, T2.tid HAVING SUM(T1.weight*T2.weight) >= 0.1 CREATE INDEX [similarities] ON [dbo].[Baseline] ([sim] desc ) WITH FILLFACTOR = 100 CREATE TABLE [dbo].[BaselineNumbers] ( [sim] float NOT NULL, [num] matches NOT NULL, PRIMARY KEY (sim) ) GO CREATE TABLE [dbo].[BaselineNumbers] ( [sim] float NOT NULL, [matches] int NOT NULL, PRIMARY KEY (sim) ) GO DECLARE @SIM float SET @SIM = 0.1 WHILE @SIM < 1.0 BEGIN INSERT INTO BaselineNumbers(sim, matches) SELECT @SIM AS sim, COUNT(*) AS matches FROM Baseline WHERE (Baseline.sim >= @SIM) SET @SIM = @SIM + 0.1 END GO