* ________________________________________________________________________________________________________________________ dpRegression.sas Joel Hasbrouck September 2011 This program runs on rnd.stern.nyu. It builds a dataset of transactions for one ticker symbol, computes signed trades, and estimates the generalized Roll model. _______________________________________________________________________________________________________________________ ; options source nodate nocenter nonumber ps=max ls=110 orientation=landscape; libname taq '/homedir/fin/fac/jhasbrou/public_html/ftp/phd2011Fall'; libname this '.'; *_______________________________________________________________________________________________________________________ Subset a ticker symbol and print out a few records. ________________________________________________________________________________________________________________________; data this.myTicker; set taq.ctqall; * Next statement restricts the output to the symbol we want with nonmissing values for the bbid and bofr Outliers (with extremely high offers) and records after the 'normal' market close are removed. 'condFlag' flags trades with special condition codes.; where symbol='ESSX' and bbid^=. and bofr^=. and bofr/bbid<5 and time<='16:10't and condflag^=1; run; *_______________________________________________________________________________________________________________________ Compute quote midpoints ________________________________________________________________________________________________________________________; data quotes; set this.myTicker; by date time; if last.time then do; if (bbid^=. and bofr^=.) then qMid=(bbid+bofr)/2; time = time+1; * Sign with respect to prior second's quote; output; keep symbol date time bbid bofr qMid; end; run; *_______________________________________________________________________________________________________________________ Merge back in & sign trades. ________________________________________________________________________________________________________________________; data qt; merge this.myTicker (where=(price^=.) keep=symbol date time price size) quotes (keep=symbol date time qMid); by symbol date time; retain qMidPrevailing tradeSeqno 0; if (qMid^=.) then qMidPrevailing=qMid; if price^=. then do; q = sign(price-qMidPrevailing); tradeSeqno = tradeSeqno+1; output; drop qMid; end; run; *_______________________________________________________________________________________________________________________ Compute first differences ; data qt; set qt; by date; dp = dif(price); dq = dif(q); if first.date then do; dp=.; dq=.; end; run; proc print data=qt (obs=50); title "qt"; run; %let nImpulse=10; *___________________________________________________________________________________________________ Estimate the generalized Roll model. ____________________________________________________________________________________________________; title "Generalized Roll model"; proc model data=qt outmodel=var; Dp = c*dif(q) + lambda*q; fit Dp; run; quit; *___________________________________________________________________________________________________ To compute impulse response function (IRF), first build a dataset with 'innovation' values. The one below looks at the impact of a one unit buy order (q=+1). ____________________________________________________________________________________________________; title2 'IRF calculations based on an initial one-unit buy order (q=+1)'; data u; do t=-2 to -1; q=0; Dp=0; output; end; q=1; Dp=.; output; do t=1 to &nImpulse; q=0; Dp=.; output; end; run; proc print data=u noobs; run; *___________________________________________________________________________________________________ Now 'solve' the model with 'innovation' values... ____________________________________________________________________________________________________; proc model model=var; solve / data=u out=irf (keep=t Dp q) forecast; id t; run; *___________________________________________________________________________________________________ ... and cumulate them. ____________________________________________________________________________________________________; data cirf; cumDp=0; cumq=0; do until (eof); set irf end=eof; cumDp = cumDp+Dp; cumq = cumq+q; output; end; proc print data=cirf noobs; run; *___________________________________________________________________________________________________ Estimate the generalized Roll model WITH autocorrelated q(t) ____________________________________________________________________________________________________; title "Generalized Roll model with autocorrelated q(t)"; proc model data=qt outmodel=var; Dp = c*dif(q) + lambda*q; q = a*lag1(q); fit Dp q / covs; run; quit; *___________________________________________________________________________________________________ To compute IRF, first build a dataset with 'innovation' values. The one below looks at the impact of a one unit buy order (q=+1). ____________________________________________________________________________________________________; title2 'IRF calculations based on an initial one-unit buy order (q=+1)'; data u; do t=-2 to -1; q=0; Dp=0; output; end; q=1; Dp=.; output; do t=1 to &nImpulse; q=.; Dp=.; output; end; run; proc print data=u noobs; run; *___________________________________________________________________________________________________ Now 'solve' the model with 'innovation' values... ____________________________________________________________________________________________________; proc model model=var; solve / data=u out=irf (keep=t Dp q) forecast; id t; run; proc print data=irf noobs; run; *___________________________________________________________________________________________________ ... and cumulate them. ____________________________________________________________________________________________________; data cirf; cumDp=0; cumq=0; do until (eof); set irf end=eof; cumDp = cumDp+Dp; cumq = cumq+q; output; end; proc print data=cirf noobs; run;