After creating the relations R1, R2, R1Tokens, R2Tokens you can run the preprocessing step, to create the auxiliary relations.
Download the SQL script preprocessing.sql, or use the code below.
You can run the script as isql -U <user> -P <passwd> -d <database name> -i preprocessing.sql
-- Create two dummy relations with the size of R1, R2 -- (used to create the RiIDF tables) CREATE TABLE [dbo].[R1Size] ( [s] int PRIMARY KEY ) CREATE TABLE [dbo].[R2Size] ( [s] int PRIMARY KEY ) INSERT INTO [dbo].[R1Size] SELECT COUNT(*) FROM R1 INSERT INTO [dbo].[R2Size] SELECT COUNT(*) FROM R2 GO -- Create the IDF tables CREATE TABLE [dbo].[R1IDF] ( [token] varchar (80) NOT NULL, [idf] float NOT NULL, PRIMARY KEY (token) ) CREATE TABLE [dbo].[R2IDF] ( [token] varchar (80) NOT NULL, [idf] float NOT NULL, PRIMARY KEY (token) ) INSERT INTO [dbo].[R1IDF] SELECT T.token, LOG(S.s)-LOG(COUNT(DISTINCT T.tid)) FROM R1Tokens T, R1Size S GROUP BY T.token, S.s INSERT INTO [dbo].[R2IDF] SELECT T.token, LOG(S.s)-LOG(COUNT(DISTINCT T.tid)) FROM R2Tokens T, R2Size S GROUP BY T.token, S.s GO -- Create the TF tables CREATE TABLE [dbo].[R1TF] ( [tid] int NOT NULL, [token] varchar (80) NOT NULL, [tf] int NOT NULL, PRIMARY KEY (tid,token), FOREIGN KEY (tid) REFERENCES R1, FOREIGN KEY (token) REFERENCES R1IDF ) CREATE TABLE [dbo].[R2TF] ( [tid] int NOT NULL, [token] varchar (80) NOT NULL, [tf] int NOT NULL, PRIMARY KEY (tid,token), FOREIGN KEY (tid) REFERENCES R2, FOREIGN KEY (token) REFERENCES R2IDF ) INSERT INTO [dbo].[R1TF] SELECT T.tid, T.token, COUNT(*) FROM R1Tokens T GROUP BY T.tid, T.token INSERT INTO [dbo].[R2TF] SELECT T.tid, T.token, COUNT(*) FROM R2Tokens T GROUP BY T.tid, T.token GO -- Create the Length tables CREATE TABLE [dbo].[R1Length] ( [tid] int NOT NULL, [length] float NOT NULL, PRIMARY KEY (tid), FOREIGN KEY (tid) REFERENCES R1 ) CREATE TABLE [dbo].[R2Length] ( [tid] int NOT NULL, [length] float NOT NULL, PRIMARY KEY (tid), FOREIGN KEY (tid) REFERENCES R2 ) INSERT INTO [dbo].[R1Length] SELECT T.tid, SQRT(SUM(I.idf*I.idf*T.tf*T.tf)) FROM R1IDF I, R1TF T WHERE I.token = T.token GROUP BY T.tid INSERT INTO [dbo].[R2Length] SELECT T.tid, SQRT(SUM(I.idf*I.idf*T.tf*T.tf)) FROM R2IDF I, R2TF T WHERE I.token = T.token GROUP BY T.tid GO -- Create the Weights tables CREATE TABLE [dbo].[R1Weights] ( [tid] int NOT NULL, [token] varchar (80) NOT NULL, [weight] float NOT NULL, PRIMARY KEY (tid,token), FOREIGN KEY (tid) REFERENCES R1, FOREIGN KEY (token) REFERENCES R1IDF ) CREATE TABLE [dbo].[R2Weights] ( [tid] int NOT NULL, [token] varchar (80) NOT NULL, [weight] float NOT NULL, PRIMARY KEY (tid,token), FOREIGN KEY (tid) REFERENCES R2, FOREIGN KEY (token) REFERENCES R2IDF ) INSERT INTO [dbo].[R1Weights] SELECT T.tid, T.token, I.idf*T.tf/L.length FROM R1IDF I, R1TF T, R1Length L WHERE I.token = T.token AND T.tid = L.tid INSERT INTO [dbo].[R2Weights] SELECT T.tid, T.token, I.idf*T.tf/L.length FROM R2IDF I, R2TF T, R2Length L WHERE I.token = T.token AND T.tid = L.tid GO -- Create the Sum tables CREATE TABLE [dbo].[R1Sum] ( [token] varchar (80) NOT NULL, [total] float NOT NULL, PRIMARY KEY (token), FOREIGN KEY (token) REFERENCES R1IDF ) CREATE TABLE [dbo].[R2Sum] ( [token] varchar (80) NOT NULL, [total] float NOT NULL, PRIMARY KEY (token), FOREIGN KEY (token) REFERENCES R2IDF ) INSERT INTO [dbo].[R1Sum] SELECT R.token, SUM(R.weight) FROM R1Weights R GROUP BY R.token INSERT INTO [dbo].[R2Sum] SELECT R.token, SUM(R.weight) FROM R2Weights R GROUP BY R.token GO -- Create the RV tables (NOTE: obsolete) CREATE TABLE [dbo].[R1V] ( [tid] int NOT NULL, [Tv] float NOT NULL, PRIMARY KEY (tid), FOREIGN KEY (tid) REFERENCES R1 ) CREATE TABLE [dbo].[R2V] ( [tid] int NOT NULL, [Tv] float NOT NULL, PRIMARY KEY (tid), FOREIGN KEY (tid) REFERENCES R2 ) INSERT INTO R1V(tid, Tv) SELECT R.tid, SUM(R.weight*S.total) FROM R1Weights R, R2Sum S WHERE R.token = S.token GROUP BY R.tid INSERT INTO R2V(tid, Tv) SELECT R.tid, SUM(R.weight*S.total) FROM R2Weights R, R1Sum S WHERE R.token = S.token GROUP BY R.tid GO
|