Creating Tokens

The tokens will be stored in two relations R1Tokens, R2Tokens.

Download the SQL script qgrams.sql, or use the code below

You can run the script as isql -U <user> -P <passwd> -d <database name> -i qgrams.sql

-- Creating q-grams as tokens

-- Create an auxiliary relation N
-- It contains integers from 1 to @UPPERLIMIT

DROP TABLE [dbo].[N]
GO
CREATE TABLE [dbo].[N] (
[i] int PRIMARY KEY
)

-- Using the WHILE-loop, populate the relation
DECLARE @I int
DECLARE @UPPERLIMIT int
SET @I=1
SET @UPPERLIMIT=100
WHILE @I <= @UPPERLIMIT
	BEGIN
	INSERT INTO [dbo].[N] VALUES(@I)
	SET @I = @I+1
	END
GO

-- Create the tables where we store the tokens
-- Be careful to allow enough characters to store
-- the tokens. Below we will use 3-grams, so
-- char(3) is ok.
CREATE TABLE [dbo].[R1Tokens] (
[tid] int NOT NULL,
[token] char(3) NOT NULL,
FOREIGN KEY (tid) REFERENCES [R1](tid)
)

CREATE TABLE [dbo].[R2Tokens] (
[tid] int NOT NULL,
[token] char(3) NOT NULL,
FOREIGN KEY (tid) REFERENCES [R2](tid)
)

GO

-- Populate the tokens relations by creating q-grams
DECLARE @Q int
SET @Q=3
INSERT INTO R1Tokens

SELECT R1.tid, SUBSTRING(SUBSTRING('$$$$$$$',1,@Q-1) + UPPER(R1.str) + SUBSTRING('$$$$$$$',1,@Q-1), N.i, @Q) AS token
FROM N INNER JOIN R1 ON N.i <= LEN(R1.str) + (@Q-1)

INSERT INTO R2Tokens
SELECT R2.tid,SUBSTRING(SUBSTRING('$$$$$$$',1,@Q-1) + UPPER(R2.str) + SUBSTRING('$$$$$$$',1,@Q-1), N.i, @Q) AS token
FROM N INNER JOIN R2 ON N.i <= LEN(R2.str) + (@Q-1)

GO