The tokens will be stored in two relations R1Tokens, R2Tokens.
Download the SQL script qgrams.sql, or use the code below
You can run the script as isql -U <user> -P <passwd> -d <database name> -i qgrams.sql
-- Creating q-grams as tokens
-- Create an auxiliary relation N
-- It contains integers from 1 to @UPPERLIMIT
DROP TABLE [dbo].[N]
GO
CREATE TABLE [dbo].[N] (
[i] int PRIMARY KEY
)
-- Using the WHILE-loop, populate the relation
DECLARE @I int
DECLARE @UPPERLIMIT int
SET @I=1
SET @UPPERLIMIT=100
WHILE @I <= @UPPERLIMIT
BEGIN
INSERT INTO [dbo].[N] VALUES(@I)
SET @I = @I+1
END
GO
-- Create the tables where we store the tokens
-- Be careful to allow enough characters to store
-- the tokens. Below we will use 3-grams, so
-- char(3) is ok.
CREATE TABLE [dbo].[R1Tokens] (
[tid] int NOT NULL,
[token] char(3) NOT NULL,
FOREIGN KEY (tid) REFERENCES [R1](tid)
)
CREATE TABLE [dbo].[R2Tokens] (
[tid] int NOT NULL,
[token] char(3) NOT NULL,
FOREIGN KEY (tid) REFERENCES [R2](tid)
)
GO
-- Populate the tokens relations by creating q-grams
DECLARE @Q int
SET @Q=3
INSERT INTO R1Tokens
SELECT R1.tid, SUBSTRING(SUBSTRING('$$$$$$$',1,@Q-1) + UPPER(R1.str) + SUBSTRING('$$$$$$$',1,@Q-1), N.i, @Q) AS token
FROM N INNER JOIN R1 ON N.i <= LEN(R1.str) + (@Q-1)
INSERT INTO R2Tokens
SELECT R2.tid,SUBSTRING(SUBSTRING('$$$$$$$',1,@Q-1) + UPPER(R2.str) + SUBSTRING('$$$$$$$',1,@Q-1), N.i, @Q) AS token
FROM N INNER JOIN R2 ON N.i <= LEN(R2.str) + (@Q-1)
GO
|