The tokens will be stored in two relations R1Tokens, R2Tokens.
Download the SQL script qgrams.sql, or use the code below
You can run the script as isql -U <user> -P <passwd> -d <database name> -i qgrams.sql
-- Creating q-grams as tokens -- Create an auxiliary relation N -- It contains integers from 1 to @UPPERLIMIT DROP TABLE [dbo].[N] GO CREATE TABLE [dbo].[N] ( [i] int PRIMARY KEY ) -- Using the WHILE-loop, populate the relation DECLARE @I int DECLARE @UPPERLIMIT int SET @I=1 SET @UPPERLIMIT=100 WHILE @I <= @UPPERLIMIT BEGIN INSERT INTO [dbo].[N] VALUES(@I) SET @I = @I+1 END GO -- Create the tables where we store the tokens -- Be careful to allow enough characters to store -- the tokens. Below we will use 3-grams, so -- char(3) is ok. CREATE TABLE [dbo].[R1Tokens] ( [tid] int NOT NULL, [token] char(3) NOT NULL, FOREIGN KEY (tid) REFERENCES [R1](tid) ) CREATE TABLE [dbo].[R2Tokens] ( [tid] int NOT NULL, [token] char(3) NOT NULL, FOREIGN KEY (tid) REFERENCES [R2](tid) ) GO -- Populate the tokens relations by creating q-grams DECLARE @Q int SET @Q=3 INSERT INTO R1Tokens SELECT R1.tid, SUBSTRING(SUBSTRING('$$$$$$$',1,@Q-1) + UPPER(R1.str) + SUBSTRING('$$$$$$$',1,@Q-1), N.i, @Q) AS token FROM N INNER JOIN R1 ON N.i <= LEN(R1.str) + (@Q-1) INSERT INTO R2Tokens SELECT R2.tid,SUBSTRING(SUBSTRING('$$$$$$$',1,@Q-1) + UPPER(R2.str) + SUBSTRING('$$$$$$$',1,@Q-1), N.i, @Q) AS token FROM N INNER JOIN R2 ON N.i <= LEN(R2.str) + (@Q-1) GO |