*	This SAS program performs the simulations and estimations reported in 
	"Stalking the efficient price in market microstructure specifications: An overview";

x 'cd c:\auser\docs\Cointegration JFM\Sas Programs';
options nocenter nosymbolgen mprint pagesize=54 ls=124;

%let nObs=50000;	*	number of observations to use in simulations;
%let np=20;			*	Order of estimated VECM;
%let nq=60;			*	Order of the cumulative impulse computation (VMA);

*	Description

	This code below consists of an estimation program, followed by the
	generation and estimation of the paper's models I, II and III.

	The model estimated in this program is the VECM:

	Dp(t) = A(1) Dp(t-1) + A(2) Dp(t-2) + ... + A(np) Dp(t-np) 
				+ g ([1 -1] x p(t-1)) + u(t)							(1)
	where:
	p(t)		2x1 vector of prices [p1(t) p2(t)]`
	Dp(t)		2x1 first difference
	A(1) ... A(np) are (2x2) coefficient matrices
	g			2x1 vector of adjustment coefficients
	u(t)		2x1 vector of disturbances

	The estimation procedure is implemented as a macro "PriceDiscoveryAnalysis".
	This macro assumes that a work dataset named "t1" has two price series ("p1" and "p2").
	Estimation is performed using the MODEL procedure.  Additional necessary calculations
	are performed using the IML ("Interactive Matrix Language") procedure.  The program 
	makes extensive use of SAS's macro facility.

	Programming notes:

	1.  Because the MODEL procedure does not allow a first difference on the lhs of an equation,
	the specification actually estimated is:

	p(t) = p(t-1) + A(1) Dp(t-1) + A(2) Dp(t-2) + ... + A(np) Dp(t-np) 
				+ g ([1 -1] x p(t-1)) + u(t)							(2)

	2.	The random-walk / information share calculation relies on the VMA representation:
		Dp(t) = Psi(L) u(t) 
	where Psi(L) is a matrix lag polynomial.  Only the sum is actually needed, that is,
	Psi(1).  This is obtained from the long-run cumulative impact function (from (2)) in
	response to unit shocks in p1 and p2.  The cumulative impact function is computed
	using the "forecast" component of the MODEL procedure.  The impact is computed through
	lead time "nq".

	3.	The estimation procedure in MODEL generates:
				The disturbance covariance matrix, Cov(u)
				Psi(1), described above
				The estimated adjustment coefficients, g.
	These are input to an IML ("Interactive Matrix Language") procedure that actually
	computes the random walk decompositions, information shares, and
	Granger-Gonzalo factor weights.

	4.	The MODEL procedure does not allow arrays of parameters.  The %Lz2 and %A macros
	facilitate a work-around.  An array-like reference "%A(6,1,2)" is expanded as
	"A060102".  That is, A(i,j,k) is the coefficient in equation j, of variable k, at lag i.

	5.	The %vset macro generates a sum where each term has the form:
		A(i,j,k) * Dp[k](t-i).
	This enables the matrix model (equation (2), above) to be specified without writing
	out each term explicitly.  The "list" option on the proc model statement can be used
	to print out the full expanded model that results.

	6.	SAS's IML package does not have a routine to compute a basis for the null space
	of a matrix.  (This is needed for the GG factor computation.)  I implemented an
	equivalent procedure using a singular value decompositon.  (See "Numerical recipes in C".)
;
%macro Lz2(i);
	%if	&i<10 %then 0&i;
	%else &i;
%mend Lz2;

%macro A(i,j,k);
%unquote(A%Lz2(&i)%Lz2(&j)%Lz2(&k))
%mend A;

*	The %vset macro generates sums of terms of the form: A(i,j,k) * lag(i, dif(v));
%macro vset(ieq,iv,LagStart,LagEnd,v);
	%do i=&LagStart %to &LagEnd;
		+ %A(&i,&ieq,&iv) * lag&np(&i,dif(&v))
	%end;
%mend vset;

*	The PriceDiscoveryAnalysis macro estimates a VECM and performs IS and PT/GG analyses.
*	Dataset "t1" is assumed to contain p1 and p2;
%macro PriceDiscoveryAnalysis;
proc datasets lib=work; *	Delete any old work datasets that are lying around.;
	delete shock1 shock2 t2 t3 GGWts PsiSumRow acv;
	run;
*	
	The shock1 and shock2 datasets generated in the following step
	will be used to generate the impulse response functions
	(VMA coefficients) by using the FORECAST option in proc model.;
data shock1 shock2;
*	lagged values assumed be zero;
	p1 = 0;	p2 = 0;
	do i=1 to &np; output; end;
*	impulses;
	i=&np+1;
	p1 = 1;	p2 = 0;
	output shock1;
	p1 = 0;	p2 = 1;
	output shock2;
*	future values;
	p1 = .; p2 = .;
	do i=&np+2 to &np+&nq+1; output; end;
	run;
proc model data=t1;
	p1 = lag(p1) + %vset(1,1,1,&np, p1) + %vset(1,2,1,&np,p2) + g1*(lag(p1)-lag(p2));
	p2 = lag(p2) + %vset(2,1,1,&np, p1) + %vset(2,2,1,&np,p2) + g2*(lag(p1)-lag(p2));
	fit p1 p2 / sur
		outs=vcov (drop=_nused_) 
		outest=est (keep=g1 g2) 
		out=res (rename=(p1=e1 p2=e2)); 
	solve / forecast data=shock1 out=ma1;
	solve / forecast data=shock2 out=ma2;
	title2 "Estimation and forecasting of VECM";
proc iml;
	start;
	print "Random-walk / information share analysis ordered to place p1 innovations first.";
	use ma1; read all var {p1 p2} into ma1 [colname=xname];
	use ma2; read all var {p1 p2} into ma2 [colname=xname];
	PsiSum = (ma1[nrow(ma1),] // ma2[nrow(ma2),])`;	*	Take only the last rows;
	print "Note: All rows should be the same-> "PsiSum;
	use vcov; read all into vcov [colname=cvname];
	print "VECM residual covariance matrix: " vcov;
	vw1 = PsiSum * vcov * PsiSum`;
	PsiSumRow = PsiSum[1,];
	*	Write weights to a dataset;
	create PsiSumRow var {w1 w2}; append from PsiSumRow;
	g = (PsiSumRow*t(root(vcov)))##2;	*	"root" performs the cholesky factorization.;
	vw = sum(g);
	print "Variance of rw component:" vw;
	is1 = g/vw;
	print "[Information share of p1; share of p2]: " is1;

	*	permute the variables to reverse the order;
	print "Random-walk / information share analysis ordered to place p2 innovations first.";
	vcovP = (vcov[2,2]||vcov[2,1])//(vcov[1,2]||vcov[1,1]);
	PsiSumRowP = PsiSumRow[1,2] || PsiSumRow[1,1];
	g = (PsiSumRowP*t(root(vcovP)))##2;
	vw = sum(g);
	print "Variance of rw component:" vw;
	is2 = g/vw;
	print "[Information share of p2; share of p1]: " is2;

	*	Granger-Gonzalo factors;
	use est; read all into g;	*	estimated cointegrating vector coefficients;
	print "Cointegrating vector coefficients:" g;
	call svd(u,q,v,g);	*	called to generate null space of g;
	if abs(q[2,1])>1e-10 then do;
		print "Error in null space analysis. svd q=" q;
		stop;
	end;
	GGWts = v[,2]`;
	GGWts = GGWts/sum(GGWts);	*	Normalize;
	print "Granger-Gonzalo factor weights:" GGWts;
	*	Write the weights to a dataset;
	create GGWts var {w1 w2};	append from GGWts;
	finish;
	run;
	quit;
data t2;	*	Construct GG common factor;
	retain w1 w2;
	if _n_=1 then set GGWts;
	set t1;
	f = w1*p1 + w2*p2;
proc arima data=t2;	*	Analyze GG common factor;
	identify var=f(1) nlag=5 noprint outcov=acv (keep=lag var n cov corr);
proc print data=acv noobs;
	title2 "Autocovariances of changes in GG common factor.";
	run;
data t3;	*	Construct (changes in) implicit random walk;
	retain w1 w2;
	if _n_=1 then set PsiSumRow;
	set res;	*	"res" contains the VECM residuals;
	dm = w1*e1 + w2*e2;
proc arima data=t3;
	identify var=dm nlag=5 noprint outcov=acv (keep=lag var n cov corr);
proc print data=acv noobs;
	title2 "Autocovariances of implicit random-walk.";
	run;
%mend PriceDiscoveryAnalysis;
*
	This ends the definition of the macros.  The remaining code simulates
	data for each of the models I, II and II, and invokes for each simulated
	dataset the PriceDiscoveryAnalysis macro.
;

/*title "(Hasbrouck) Price discovery example I: A two-market ""Roll"" model.";*/
/**	Generate the data;*/
/*data t1;*/
/*	sdu = 1;*/
/*	c=1;*/
/*	m = 0;*/
/*	do t=1 to &nObs;*/
/*		u = sdu*normal(123456);*/
/*		m = m+u;*/
/*		q1 = 1-2*ranbin(654321,1,0.5);*/
/*		q2 = 1-2*ranbin(678910,1,0.5);*/
/*		p1 = m+q1*c;*/
/*		p2 = m+q2*c;*/
/*		output;*/
/*		keep p1 p2;*/
/*	end;*/
/*	run;*/
/*%PriceDiscoveryAnalysis;*/

/*title "(Hasbrouck) Price discovery example II: Two markets with private informations.";*/
/**	Generate the data;*/
/*data t1;*/
/*	sdu = 1;*/
/*	c1=1;*/
/*	c2=1;*/
/*	m = 0;*/
/*	lambda = 1;*/
/*	do t=1 to &nObs;*/
/*		q1 = 1-2*ranbin(654321,1,0.5);*/
/*		q2 = 1-2*ranbin(678910,1,0.5);*/
/*		p2 = m+q2*c2;*/
/*		m = m + lambda * q1;*/
/*		u = sdu*normal(123456);*/
/*		p1 = m+q1*c1;*/
/*		output;*/
/*		keep p1 p2;*/
/*	end;*/
/*	run;*/
/*%PriceDiscoveryAnalysis;*/

title "(Hasbrouck) Price discovery example III: Two markets with public and private information.";
proc datasets lib=work;
	delete t1;
	run;
*	Generate the data;
data t1;
	sdu = 1;
	c1=1;
	c2=0;
	m = 0;
	lambda = 1;
	do t=1 to &nObs;
		q1 = 1-2*ranbin(654321,1,0.5);
		q2 = 1-2*ranbin(678910,1,0.5);
		p2 = m+q2*c2;
		u = sdu*normal(1234656);
		m = m + lambda * q1 + u;
		p1 = m+q1*c1;
		output;
		keep p1 p2;
	end;
	run;

%PriceDiscoveryAnalysis;