# This file contains S-PLUS commands to construct the figures in # Smoothing Methods in Statistics by Jeffrey S. Simonoff, published # in 1996 by Springer-Verlag. I make no claims that the code given # is as efficient as it could be, and I would be interested to learn # of more efficient approaches. # # The commands for each figure should be considered in isolation from # those for any other figure (so, for example, a setting of graphical # parameters using par() does not carry over to the next figure). # # All of the figures in the book were written to Postscript files using # the postscript() driver. Other graphics drivers might yield different # results. # # All data frames referred to here can be obtained by using the command # data.restore("smoothmeth.dmp"), where smoothmeth.dmp is the S-PLUS # data.dump file available at the book's WWW site. # # Some of the figures were originally created using code that (for various # reasons) is not publicly available. When publicly available code is # used below, this is noted. Generally speaking, generic S-PLUS functions # are used whenever possible. Six generally available libraries of S-PLUS # code are used below: # # ash - by David Scott, available at # http://lib.stat.cmu.edu/S/ash # # bv - by David Scott, available at # ftp://ftp.stat.rice.edu/pub/scottdw/Multi.Den.Est/hist.2d # # KernSmooth - by Matt Wand, available at # http://www.biostat.harvard.edu/~mwand/software.html # # locfit - by Catherine Loader, available at # http://cm.bell-labs.com/stat/project/locfit # # logspline - by Charles Kooperberg, available at # http://lib.stat.cmu.edu/S/logspline # # sm - by Adrian Bowman and Adelchi Azzalini, available at # http://www.stats.gla.ac.uk/~adrian/sm # or # http://www.stat.unipd.it/dip/homes/azzalini/SW/sm # # In addition, the following generally available Fortran code was used: # # bivar - by Jeffrey Simonoff, available at # http://www.stern.nyu.edu/~jsimonof/bivar.f # # projpurs - by Jerome Friedman, available at # http://lib.stat.cmu.edu/general/projpurs # # Figure 1.1 x<-c(0:1000)*.0015+7.5 y<-dnorm(x,mean=8.264,sd=.2987) plot(x,y,type="l",xlab="",ylab="",cex=.7) mtext("CD rate",side=1,line=2,cex=.7) mtext("Density",side=2,line=2,cex=.7) # Figure 1.2 hist(cdrate$Return,breaks=c(7.5,7.7,7.9,8.1,8.3,8.5,8.7,8.9),col=0, probability=T,xlab="",ylab="",cex=.7) mtext("CD rate",side=1,line=2,cex=.7) mtext("Density",side=2,line=2,cex=.7) # Figure 1.3 y<-dnorm(x,mean=8.3478,sd=.2535) plot(x,y,type="l",xlab="",ylab="",cex=.7) par(lty=3) y<-dnorm(x,mean=8.149,sd=.3217) lines(x,y) mtext("CD rate",side=1,line=2,cex=.7) mtext("Density",side=2,line=2,cex=.7) # Figure 1.4 plot(ksmooth(cdrate$Return[cdrate$Type==1],range.x=c(7.3,9),n.points=1000, ker="n",band=.25478),type="l",xlab="",ylab="",cex=.7) par(lty=3) lines(ksmooth(cdrate$Return[cdrate$Type==0],range.x=c(7.3,9),n.points=1000, ker="n",band=.35865)) mtext("CD rate",side=1,line=2,cex=.7) mtext("Density",side=2,line=2,cex=.7) # Figure 1.5 plot(diabetes$Age,diabetes$LogCpep,xlim=c(0,16),xlab="",ylab="",cex=.7) lines(ksmooth(diabetes$Age,diabetes$LogCpep,range.x=c(1.1,15.3),n.points=500, ker="n",band=3.53)) abline(lsfit(diabetes$Age,diabetes$LogCpep),lty=3) mtext("Age",side=1,line=2,cex=.7) mtext("Log C-peptide concentration",side=2,line=2,cex=.7) # Figure 1.6 plot(sulfate$Distance,sulfate$Corr,xlab="",ylab="",pch=".",cex=.7) lines(lowess(sulfate$Distance,sulfate$Corr,f=.4),lwd=5) mtext("Distance between stations (km)",side=1,line=2,cex=.7) mtext("Correlation",side=2,line=2,cex=.7) # Figure 2.1 par(mfrow=c(1,3)) hist(swissmon$Bottforg,nclass=28,probability=T,col=0,xlab="",ylim=c(0,.5)) mtext("Margin width (mm)",side=1,line=2) mtext("Density",side=2,line=2) mtext("(a)",side=3,line=2,cex=.75) hist(swissmon$Bottforg,nclass=12,probability=T,col=0,xlab="",ylim=c(0,.5)) mtext("Margin width (mm)",side=1,line=2) mtext("Density",side=2,line=2) mtext("(b)",side=3,line=2,cex=.75) hist(swissmon$Bottforg,nclass=6,col=0,probability=T,xlab="",ylim=c(0,.5)) mtext("Margin width (mm)",side=1,line=2) mtext("Density",side=2,line=2) mtext("(c)",side=3,line=2,cex=.75) # Figure 2.2 par(mfrow=c(2,3)) # Note that each call to rnorm creates new normal deviates, so the pictures # will not match up exactly with those in the book. Further, the given # x limits, y limits, and breaks might not be appropriate for newly generated # random deviates. norm1<-rnorm(100,0,1) norm2<-rnorm(100,0,1) norm3<-rnorm(100,0,1) hist(norm1,probability=T,col=0,xlab="",breaks=c(0:21)*.275-2.5, xlim=c(-2.5,3),ylim=c(0,.5)) lines(c(0:201)*.0275-2.5,dnorm(c(0:201)*.0275-2.5,0,1)) mtext("Gaussian sample 1",side=1,line=2) mtext("Density",side=2,line=2) mtext("(a)",side=3,line=2,cex=.75) hist(norm2,probability=T,col=0,xlab="",breaks=c(0:21)*.275-2.5, xlim=c(-2.5,3),ylim=c(0,.5)) lines(c(0:201)*.0275-2.5,dnorm(c(0:201)*.0275-2.5,0,1)) mtext("Gaussian sample 2",side=1,line=2) mtext("Density",side=2,line=2) mtext("(b)",side=3,line=2,cex=.75) hist(norm3,probability=T,col=0,xlab="",breaks=c(0:21)*.275-2.5, xlim=c(-2.5,3),ylim=c(0,.5)) lines(c(0:201)*.0275-2.5,dnorm(c(0:201)*.0275-2.5,0,1)) mtext("Gaussian sample 3",side=1,line=2) mtext("Density",side=2,line=2) mtext("(c)",side=3,line=2,cex=.75) hist(norm1,probability=T,col=0,xlab="",breaks=c(0:5)*1.375-2.5, xlim=c(-2.5,3),ylim=c(0,.4)) lines(c(0:201)*.0275-2.5,dnorm(c(0:201)*.0275-2.5,0,1)) mtext("Gaussian sample 1",side=1,line=2) mtext("Density",side=2,line=2) mtext("(d)",side=3,line=2,cex=.75) hist(norm2,probability=T,col=0,xlab="",breaks=c(0:5)*1.375-2.5, xlim=c(-2.5,3),ylim=c(0,.4)) lines(c(0:201)*.0275-2.5,dnorm(c(0:201)*.0275-2.5,0,1)) mtext("Gaussian sample 2",side=1,line=2) mtext("Density",side=2,line=2) mtext("(e)",side=3,line=2,cex=.75) hist(norm3,probability=T,col=0,xlab="",breaks=c(0:5)*1.375-2.5, xlim=c(-2.5,3),ylim=c(0,.4)) lines(c(0:201)*.0275-2.5,dnorm(c(0:201)*.0275-2.5,0,1)) mtext("Gaussian sample 3",side=1,line=2) mtext("Density",side=2,line=2) mtext("(f)",side=3,line=2,cex=.75) # Figure 2.3 par(mfcol=c(2,3)) hist(cdrate$Return[cdrate$Type==1],xlab="CD rate",probability=T, breaks=(41.35562549:48.35562549)*.1815714286,col=0,main="Thrifts",ylab="Density") hist(cdrate$Return[cdrate$Type==1],xlab="CD rate",probability=T, breaks=(206.7781275:241.7781275)*.03631428571,col=0,ylab="Density") hist(cdrate$Return[cdrate$Type==0],xlab="CD rate",probability=T, breaks=(32.83666378:37.83666378)*.2302,col=0,main="Comm. banks",ylab="Density") hist(cdrate$Return[cdrate$Type==0],xlab="CD rate",probability=T, breaks=(105.0773241:121.0773241)*.0719375,col=0,ylab="Density") hist(chondrit$Silica,xlab="Percentage silica",probability=T, breaks=(4.368421:7.368421)*4.75,col=0,main="Chondrites",ylab="Density") hist(chondrit$Silica,xlab="Percentage silica",probability=T, breaks=(10.193053:17.193053)*2.0357,col=0,ylab="Density") # Figure 2.4 par(mfrow=c(1,2),mex=.5) hist(cdrate$Return[cdrate$Type==1],probability=T, breaks=(41.35562549:48.35562549)*.1815714286,col=0,xlim=c(7.4,8.9),lty=3, xlab="CD rate",cex=.6,main="Thrifts") axis(1,labels=F) axis(2,labels=F) h<-hist(cdrate$Return[cdrate$Type==1],probability=T, breaks=(41.35562549:48.35562549)*.1815714286,plot=F) fpx<-c(7.418214286,h$breaks+.0907857143) fpy<-c(0,h$counts,0) lines(fpx,fpy,lwd=4) mtext("Density",side=2,line=3,cex=.6) hist(cdrate$Return[cdrate$Type==0],probability=T,breaks=(32.83666378:37.83666378)* .2302,col=0,xlim=c(7.3,8.85),lty=3,xlab="CD rate",cex=.6,main="Comm. banks") axis(1,labels=F) axis(2,labels=F) h<-hist(cdrate$Return[cdrate$Type==0],probability=T,breaks=(32.83666378:37.83666378)* .2302,plot=F) fpx<-c(7.4439,h$breaks+.1016) fpy<-c(0,h$counts,0) lines(fpx,fpy,lwd=4) mtext("Density",side=2,line=3,cex=.6) # Figure 2.5 # The bin centers and heights were determined using the code of Simonoff and Hurvich (1993) galfp<-matrix(c(8498.05,0.,9845.90,6.33350E-05,11193.7,0.,12541.6,0.,13889.4,0.,15237.3,0., 16585.1,1.80957E-05,17933.0,2.71436E-05,19280.8,1.53814E-04,20628.7,1.44766E-04,21976.5, 1.08574E-04,23324.4,1.17622E-04,24672.2,5.42872E-05,26020.0,9.04786E-06,27367.9,1.80957E-05, 28715.7,0.,30063.6,0.,31411.4,9.04786E-06,32759.3,9.04788E-06,34107.1,9.04788E-06,35455.0,0.), byrow=T,ncol=2) plot(galfp[,1],galfp[,2],type="l",xlab="",ylab="",cex=.7) mtext("Velocity (km/sec)",side=1,line=2,cex=.7) mtext("Density",side=2,line=2,cex=.7) # Figure 2.6 # The bin centers and heights were determined by external calculation # The published plot has the axes edited par(mfrow=c(3,1)) arb1<-matrix(c(0,0,250000,0.0000001412,550000,0.0000005367,850000,0.0000006780,1150000,0.0000004237, 1450000,0.0000002260,1750000,0.0000001977,2050000,0.0000002260,2350000,0.0000001695,2650000,0.0000002260, 2950000,0.0000001130,3250000,0.0000000565,3550000,0.0000001695,3850000,0.0000000565,4150000,0.0000000847, 4450000,0.0000000282,4700000,0),byrow=T,ncol=2) arb2<-matrix(c(5.15,0,5.25,0.0847457627,5.35,0.1694915254,5.45,0.0847457627,5.55,0.0847457627,5.65,0.5084745763, 5.75,0.5932203390,5.85,1.3559322034,5.95,1.5254237288,6.05,0.8474576271,6.15,0.7627118644,6.25,0.5932203390, 6.35,1.3559322034,6.45,1.0169491525,6.55,0.6779661017,6.65,0.3389830508,6.75,0),byrow=T,ncol=2) arb3<-matrix(c(141253.75,0,199526.23,.00002304,251188.64,.0000387129,316227.77,.0000153754,398107.17,.0000122131, 501187.23,.0000582072,630957.34,.0000539415,794328.23,.0000979367,1000000,.0000875181,1258925.4,.0000386212, 1584893.2,.0000276101,1995262.3,.0000170578,2511886.4,.0000309703,3162277.7,.0000184504,3981071.7,.0000097705, 5011872.3,.0000038805,5623413.25,0),byrow=T,ncol=2) plot(arb1[,1],arb1[,2],type="l",xlab="",ylab="") mtext("Salary (dollars)",side=1,line=2) mtext("Density",side=2,line=2) mtext("(a)",side=3,line=2,cex=.75) plot(arb2[,1],arb2[,2],type="l",xlab="",ylab="") mtext("Log(salary)",side=1,line=2) mtext("Density",side=2,line=2) mtext("(b)",side=3,line=2,cex=.75) plot(arb3[,1],arb3[,2],type="l",xlab="",ylab="") mtext("Salary (dollars)",side=1,line=2) mtext("Density",side=2,line=2) mtext("(c)",side=3,line=2,cex=.75) # Figure 2.7 par(mfrow=c(3,1)) hist(hckshoot$Shootpct,breaks=c(0:13)*.022,xlab="",probability=T,col=0,xlim=c(0,.3)) mtext("Shooting percentage",side=1,line=2) mtext("Density",side=2,line=2) mtext("(a)",side=3,line=2,cex=.75) hist(hckshoot$Shootpct,breaks=exp(c(0:13)*.28-4.5)/(exp(c(0:13)*.28 -4.5)+1),xlab="",probability=T,col=0,xlim=c(0,.3)) mtext("Shooting percentage",side=1,line=2) mtext("Density",side=2,line=2) mtext("(b)",side=3,line=2,cex=.75) hist(hckshoot$Shootpct[hckshoot$Position==1],breaks=c(0:27)*.01,xlab="", probability=T,col=.2,xlim=c(0,.3),ylim=c(0,18)) par(new=T) hist(hckshoot$Shootpct[hckshoot$Position==0],breaks=c(0:27)*.01,xlab="", probability=T,col=0,xlim=c(0,.3),ylim=c(0,18)) mtext("Shooting percentage",side=1,line=2) mtext("Density",side=2,line=2) mtext("(c)",side=3,line=2,cex=.75) # Figure 2.8 par(mfrow=c(3,2)) # Note that each call to rnorm and rbinom creates new random deviates, so # the pictures will not match up exactly with those in the book. Further, the # given breaks might not be appropriate for newly generated random deviates. number<-rbinom(1,50,.5) mixture<-rnorm(50) mixture[1:number]<-mixture[1:number]+3 hist(mixture,xlab=" ",breaks=-2.13:4.87,col=0,probability=T) hist(mixture,xlab=" ",breaks=-2.33:5.67,col=0,probability=T) hist(mixture,xlab=" ",breaks=-2.42:5.58,col=0,probability=T) hist(mixture,xlab=" ",breaks=-2.53:5.47,col=0,probability=T) hist(mixture,xlab=" ",breaks=-2.6:5.4,col=0,probability=T) hist(mixture,xlab=" ",breaks=-3.1:4.9,col=0,probability=T) # Figure 3.1 # Given bandwidth is equivalent to h=.14 using the ksmooth() scaling # of the kernel plot(ksmooth(cdrate$Return,band= 0.28,n.points = 200,range.x=c(7.4, 8.9)), type="l", xlab = "",ylab = "",cex=.7) mtext("CD rate",side=1,line=2,cex=.7) mtext("Density",side=2,line=2,cex=.7) rug(jitter(cdrate$Return,factor=.3)) # Figure 3.2 # Given bandwidth is equivalent to h=.08 using the ksmooth() scaling # of the kernel plot(ksmooth(cdrate$Return,ker="n",band= 0.216,n.points = 200,range.x=c(7.4, 8.9)), type="l",ylim = c(-0.5, 1.8), xlab = "",ylab = "",yaxt="n",cex=.7) axis(2,at=c(0,.5,1,1.5),labels=c("0","0.5","1.0","1.5"),cex=.7) mtext("CD rate",side=1,line=2,cex=.7) mtext("Density",side=2,line=2,cex=.7) for(i in 1:16) { lines(c(0:100)/200-.25 + cdrate$Return[i * 4 - 3], dnorm(c(0:100)/200-.25 + cdrate$Return[i * 4 - 3],mean = cdrate$Return[i * 4 - 3], sd = 0.08) * 0.09 - 0.5,lty=2) } rug(jitter(cdrate$Return,factor=.3)) # Figure 3.3 # Given bandwidths are equivalent to h=.04 and h=.16 using the ksmooth() scaling # of the kernel par(mfrow=c(1,2),mex=.5) plot(ksmooth(cdrate$Return,ker="n",band = 0.108,n.points = 200, range.x = c(7.4, 8.9)), type = "l", xlab = "CD rate",ylab = "", main="(a)",cex=.6) mtext("Density",side=2,line=3,cex=.6) plot(ksmooth(cdrate$Return, ker = "n", bandwidth = 0.432, n.points = 200, range.x = c(7.4, 8.9)), type = "l", xlab = "CD rate",ylab = "", main="(b)",cex=.6) mtext("Density",side=2,line=3,cex=.6) # Figure 3.4 # Given bandwidths are equivalent to h=.3108 and h=.2682 using the ksmooth() scaling # of the kernel # The Sheather-Jones bandwidths were calculated using a Fortran routine, but are also # available using Bowman and Azzalini's sm library or Wand's KernSmooth library par(mfrow=c(1,2),mex=.5) plot(ksmooth(swissmon$Bottforg,ker="n",band=.8384,range.x=c(7,13),n.points=200), type="l",xlab="Bottom margin",ylab="",cex=.6,main="Forged bills") mtext("Density",side=2,line=3,cex=.6) rug(jitter(swissmon$Bottforg)) plot(ksmooth(swissmon$Bottreal,ker="n",band=.7237,range.x=c(7,11),n.points=200), type="l",xlab="Bottom margin",ylab="",cex=.6,main="Real bills") mtext("Density",side=2,line=3,cex=.6) rug(jitter(swissmon$Bottreal)) # Figure 3.5 # Given bandwidth is equivalent to h=.2 using the ksmooth() scaling # of the kernel plot(ksmooth(c(swissmon$Diagforg,swissmon$Diagreal),ker="n",band=.54016), type="l", xlab = "",ylab = "",cex=.7) mtext("Diagonal length",side=1,line=2,cex=.7) mtext("Density",side=2,line=2,cex=.7) rug(jitter(c(swissmon$Diagforg,swissmon$Diagreal))) # Figure 3.6 # Generation of density estimates using the Sheather-Jones bandwidth # selector for the variability plot is accomplished using Bowman and # Azzalini's sm library. # Note that each call to sample creates new random subsamples, so # the picture will not match up exactly with that in the book. library(sm) plot(ksmooth(swissmon$Bottforg,ker="n",band=.8384,x.points=c(1:200)*.03+7), type="l",xlab="",ylab="",cex=.7,ylim=c(0,.55)) mtext("Bottom margin",side=1,line=2,cex=.7) mtext("Density",side=2,line=2,cex=.7) varplot<-matrix(NA,200,200) for (i in 1:200){ vardata<-sample(swissmon$Bottforg,replace=T) varplot[,i]<-sm.density(vardata,h=hsj(vardata),display="none", eval.points=c(1:200)*.03+7)$estimate} for (i in 1:200) varplot[i,]<-sort(varplot[i,]) lines(c(1:200)*.03+7,varplot[,195],lty=2) lines(c(1:200)*.03+7,varplot[,5],lty=2) # Figure 3.7 # The kernel estimate using a biweight kernel is obtained using # Wand's KernSmooth library library(KernSmooth) minekrn<-bkde(mineacci$Interval,kernel="biweight",bandwidth=10.5, range.x=c(0,50)) plot(minekrn,type="l",xlab = "",ylab = "",cex=.7) mtext("Days",side=1,line=2,cex=.7) mtext("Density",side=2,line=2,cex=.7) rug(jitter(mineacci$Interval[mineacci$Interval>0])) rug(mineacci$Interval[mineacci$Interval==0]) # Figure 3.8 # Given bandwidth is equivalent to h=1.435 using the ksmooth() scaling # of the kernel plot(ksmooth(marathon$Time,ker="n",band= 3.8756,n.points = 200), type="l", xlab = "",ylab = "",cex=.7) mtext("Marathon record time",side=1,line=2,cex=.7) mtext("Density",side=2,line=2,cex=.7) rug(marathon$Time) # Figure 3.9 # Given bandwidth is equivalent to h=.052 using the ksmooth() scaling # of the kernel hist(racial$Propwhit,breaks=c(0:40)*.025,prob=T,col=0,xlab="",ylab="",cex=.7) lines(ksmooth(racial$Propwhit,ker="n",band= .1404,n.points = 200), type="l",lwd=1.3) mtext("Proportion of white students",side=1,line=2,cex=.7) mtext("Density",side=2,line=2,cex=.7) # Figure 3.10 # This plot was constructed using personally-written Fortran # code. It has been adapted here to be used with Bowman and # Azzalini's sm library. The normal kernel is used, rather than # the biweight, and the boundary kernel estimate has been constructed # using a local linear regression estimate (see page 236 of the book # for discussion of this). The resultant density estimate is slightly # different from that in the book. library(sm) tt<-hist(mineacci$Interval,breaks=c(0:51)-.5,plot=F)$counts minebnd<-sm.regression(x=c(0:50),y=tt/28,h=4,display="none") plot(minebnd$eval.points,minebnd$estimate,type="l",xlab = "",ylab = "",cex=.7) mtext("Days",side=1,line=2,cex=.7) mtext("Density",side=2,line=2,cex=.7) # Figure 3.11 # This plot was constructed using personally-written Fortran # code. It has been adapted here to be used with Bowman and # Azzalini's sm library. The normal kernel is used, rather than # the biweight, and the boundary kernel estimate has been constructed # using a local linear regression estimate (see page 236 of the book # for discussion of this). The resultant density estimate is slightly # different from that in the book (and appears to be somewhat less variable # at the left boundary). # Note that each call to sample creates new random subsamples, so # the picture will not match up exactly with that in the book. library(sm) tt<-hist(mineacci$Interval,breaks=c(0:51)-.5,plot=F)$counts minebnd<-sm.regression(x=c(0:50),y=tt/28,h=4,display="none",ngrid=51) plot(minebnd$eval.points,minebnd$estimate,type="l",xlab = "",ylab = "",cex=.7, ylim=c(0,.3)) mtext("Days",side=1,line=2,cex=.7) mtext("Density",side=2,line=2,cex=.7) varplot<-matrix(NA,51,200) for (i in 1:200){ vardata<-sample(mineacci$Interval,replace=T) tt<-hist(vardata,breaks=c(0:51)-.5,plot=F)$counts varplot[,i]<-sm.regression(x=c(0:50),tt/28,h=1.683*hsj(vardata), display="none",ngrid=51)$estimate} for (i in 1:51) varplot[i,]<-sort(varplot[i,]) lines(c(0:50),varplot[,195],lty=2) lines(c(0:50),varplot[,5],lty=2) # Figure 3.12 # This plot has been adapted for use with Bowman and Azzalini's # sm library. library(sm) pilot<-ksmooth(marathon$Time,ker="n",band= 3.8756,x.points=marathon$Time) maradp<-sm.density(sort(marathon$Time),h=.466,h.weights=1/sqrt(pilot$y), display="none",eval.points=c(1280:1650)/10) plot(maradp$eval.points,maradp$estimate,type="l", xlab = "",ylab = "",cex=.7) mtext("Marathon record time",side=1,line=2,cex=.7) mtext("Density",side=2,line=2,cex=.7) rug(marathon$Time) # Figure 3.13 # This plot has been adapted for use with Bowman and Azzalini's # sm library. library(sm) pilot<-ksmooth(marathon$Time,ker="n",band= 1.9378,x.points=marathon$Time) maradp1<-sm.density(sort(marathon$Time),h=.466,h.weights=1/sqrt(pilot$y), display="none",eval.points=c(1280:1650)/10) pilot<-ksmooth(marathon$Time,ker="n",band= 7.7512,x.points=marathon$Time) maradp2<-sm.density(sort(marathon$Time),h=.466,h.weights=1/sqrt(pilot$y), display="none",eval.points=c(1280:1650)/10) pilot<-ksmooth(marathon$Time,ker="n",band= 3.8756,x.points=marathon$Time) maradp3<-sm.density(sort(marathon$Time),h=.233,h.weights=1/sqrt(pilot$y), display="none",eval.points=c(1280:1650)/10) maradp4<-sm.density(sort(marathon$Time),h=.932,h.weights=1/sqrt(pilot$y), display="none",eval.points=c(1280:1650)/10) par(mfcol=c(2,2),mex=.5) plot(maradp1$eval.points,maradp1$estimate,type="l",xlab = "",ylab = "",cex=.7) mtext("Marathon record time",side=1,line=3,cex=.7) mtext("Density",side=2,line=3,cex=.7) mtext("Changing pilot h",side=3,line=2,cex=.9) plot(maradp2$eval.points,maradp2$estimate,type="l",xlab = "",ylab = "",cex=.7) mtext("Marathon record time",side=1,line=3,cex=.7) mtext("Density",side=2,line=3,cex=.7) plot(maradp3$eval.points,maradp3$estimate,type="l",xlab = "",ylab = "",cex=.7) mtext("Marathon record time",side=1,line=3,cex=.7) mtext("Density",side=2,line=3,cex=.7) mtext("Changing variable h",side=3,line=2,cex=.9) plot(maradp4$eval.points,maradp4$estimate,type="l",xlab = "",ylab = "",cex=.7) mtext("Marathon record time",side=1,line=3,cex=.7) mtext("Density",side=2,line=3,cex=.7) # Figure 3.14 # The figure was originally created using personally-written Fortran # code, which contained an error. THE FIGURE IN THE FIRST PRINTING OF # THE BOOK IS INCORRECT. IF YOU HAVE THE FIRST PRINTING OF THE BOOK, # YOU SHOULD OBTAIN A COPY OF THE CORRECT FIGURE UNDER THE ERRATA # SECTION OF THE BOOK'S WEB PAGE. # The following function uses Wand's KernSmooth library to # calculate the nearest neighbor density estimates kernnn<-function(x,kernel = "biweight",k=1,eval.points) # # Function to construct k-th nearest neighbor density estimate. # The function uses Wand's KernSmooth library # { estimate<-rep(NA,length(eval.points)) for (i in 1:length(eval.points)){ nndist<-sort(abs(eval.points[i]-x)) hev<-min(nndist[k:length(x)][nndist[k:length(x)]>0]) fixed<-bkde(x,kernel=kernel,bandwidth=hev,truncate=F, range.x=c(min(eval.points),max(eval.points))) estimate[i]<-fixed$y[order(abs(eval.points[i]-fixed$x))][1]} list(eval.points=eval.points,estimate=estimate)} library(KernSmooth) par(mfrow=c(2,2),mex=.5) marnn10<-kernnn(marathon$Time,k=10,eval.points=c(0:200)*.225+125) marnn20<-kernnn(marathon$Time,k=20,eval.points=c(0:200)*.225+125) marnn30<-kernnn(marathon$Time,k=30,eval.points=c(0:200)*.225+125) marnn40<-kernnn(marathon$Time,k=40,eval.points=c(0:200)*.225+125) plot(marnn10$eval.points,marnn10$estimate,type="l",xlab="",ylab="",cex=.7) mtext("Marathon record time",side=1,line=3,cex=.7) mtext("Density",side=2,line=3,cex=.7) mtext("(a) 10 nearest neighbors",side=3,line=2,cex=.85) plot(marnn20$eval.points,marnn20$estimate,type="l",xlab="",ylab="",cex=.7) mtext("Marathon record time",side=1,line=3,cex=.7) mtext("Density",side=2,line=3,cex=.7) mtext("(b) 20 nearest neighbors",side=3,line=2,cex=.85) plot(marnn30$eval.points,marnn30$estimate,type="l",xlab="",ylab="",cex=.7) mtext("Marathon record time",side=1,line=3,cex=.7) mtext("Density",side=2,line=3,cex=.7) mtext("(c) 30 nearest neighbors",side=3,line=2,cex=.85) plot(marnn40$eval.points,marnn40$estimate,type="l",xlab="",ylab="",cex=.7) mtext("Marathon record time",side=1,line=3,cex=.7) mtext("Density",side=2,line=3,cex=.7) mtext("(d) 40 nearest neighbors",side=3,line=2,cex=.85) # Figure 3.15 # Given bandwidths are equivalent to h=5.5 and h=2 using the ksmooth() scaling # of the kernel. # Figue 3.15(c) was constructed using personally-written Fortran # code. It has been adapted here to be used with Bowman and # Azzalini's sm library. The boundary kernel estimate has been constructed # using a local linear regression estimate (see page 236 of the book # for discussion of this). The resultant density estimate is slightly # different from that in the book. library(sm) par(mfrow=c(3,1)) plot(ksmooth(quake$Depth,ker="n",band=14.85),type="l",xlab="",ylab="") rug(quake$Depth) mtext("Focal depth",side=1,line=2) mtext("Density",side=2,line=2) mtext("(a)",side=3,line=2,cex=.75) plot(ksmooth(quake$Depth,ker="n",band=5.4),type="l",xlab="",ylab="") mtext("Focal depth",side=1,line=2) mtext("Density",side=2,line=2) mtext("(b)",side=3,line=2,cex=.75) tt<-hist(quake$Depth,breaks=c(0:701)-.5,plot=F)$counts quakebnd<-sm.regression(x=c(0:700),y=tt/2178,h=5.5,display="none") plot(quakebnd$eval.points,quakebnd$estimate,type="l",xlab = "",ylab = "",cex=.7) mtext("Focal depth",side=1,line=2) mtext("Density",side=2,line=2) mtext("(c)",side=3,line=2,cex=.75) # Figure 3.16 # Given bandwidth is equivalent to h=5 using the ksmooth() scaling # of the kernel qdcenter<-quake$Depth-mean(quake$Depth) rangexy<-(exp(c(-140:193)/99)-1)*99 qdy<-log(1+qdcenter/99)*99 kkk<-ksmooth(qdy,ker="n",band=13.5,x.points=c(-140:193)) kkk$x<-rangexy+mean(quake$Depth) kkk$y<-kkk$y/(1+rangexy/99) plot(kkk,type="l",xlab="",ylab="",cex=.7,xlim=c(0,650)) mtext("Density",side=2,line=2,cex=.7) mtext("Focal depth",side=1,line=2,cex=.7) # Figure 3.17 # This plot uses Loader's locfit library library(locfit) plot(locfit(~racial$Propwhit, xlim=c(0,1), alpha=.7),type="l",xlab="",ylab="",cex=.7) rug(racial$Propwhit) mtext("Proportion of white students",side=1,line=2,cex=.7) mtext("Density",side=2,line=2,cex=.7) # Figure 3.18 # This plot uses Loader's locfit library # Note that each call to sample creates new random subsamples, so # the picture will not match up exactly with that in the book. varplot<-matrix(NA,201,200) for (i in 1:200){ vardata<-sample(mineacci$Interval,replace=T) ddd<-locfit(~vardata,xlim=c(0,50),alpha=.65) varplot[,i]<-exp(predict.locfit(ddd,c(0:200)/4)$fit)} for (i in 1:201) varplot[i,]<-sort(varplot[i,]) plot(locfit(~mineacci$Interval, xlim=c(0,50),alpha=.65),type="l",xlab="",ylab="", ylim=c(0,.6),cex=.7) lines(c(0:200)/4,varplot[,195],lty=2) lines(c(0:200)/4,varplot[,5],lty=2) mtext("Days",side=1,line=2,cex=.7) mtext("Density",side=2,line=2,cex=.7) # Figure 3.19 # This plot uses Loader's locfit library. The more recent versions of this code # do not produce the strange behavior in the region of 5 to 10 days with 64% # span, but do with 60% span, so that has been used here. library(locfit) plot(locfit(~mineacci$Interval, xlim=c(0,50), alpha=.6),type="l",xlab="",ylab="",cex=.7) mtext("Days",side=1,line=2,cex=.7) mtext("Density",side=2,line=2,cex=.7) # Figure 3.20 # This plot uses Kooperberg's logspline library library(logspline) par(mfrow=c(3,2)) fit<-logspline.fit(mineacci$Interval,lbound=0) ddd<-dlogspline(c(0:200)/4,fit) plot(c(0:200)/4,ddd,type="l",xlab="",ylab="",cex=.6,main="Automatic") mtext("Days",side=1,line=3,cex=.6) mtext("Density",side=2,line=3,cex=.6) fit<-logspline.fit(mineacci$Interval,lbound=0,nknots=6,delete=F) ddd<-dlogspline(c(0:200)/4,fit) plot(c(0:200)/4,ddd,type="l",xlab="",ylab="",cex=.6,main="Manual") mtext("Days",side=1,line=3,cex=.6) mtext("Density",side=2,line=3,cex=.6) fit<-logspline.fit(racial$Propwhit,lbound=0,ubound=1) ddd<-dlogspline(c(0:200)/200,fit) plot(c(0:200)/200,ddd,type="l",xlab="",ylab="",cex=.6) mtext("Proportion of white students",side=1,line=3,cex=.6) mtext("Density",side=2,line=3,cex=.6) fit<-logspline.fit(racial$Propwhit,lbound=0,ubound=1,nknots=6,delete=F) ddd<-dlogspline(c(0:200)/200,fit) plot(c(0:200)/200,ddd,type="l",xlab="",ylab="",cex=.6) mtext("Proportion of white students",side=1,line=3,cex=.6) mtext("Density",side=2,line=3,cex=.6) fit<-logspline.fit(quake$Depth,lbound=0) ddd<-dlogspline(c(0:675),fit) plot(c(0:675),ddd,type="l",xlab="",ylab="",cex=.6) mtext("Focal depth",side=1,line=3,cex=.6) mtext("Density",side=2,line=3,cex=.6) fit<-logspline.fit(quake$Depth,lbound=0,nknots=14,delete=F) ddd<-dlogspline(c(0:675),fit) plot(c(0:675),ddd,type="l",xlab="",ylab="",cex=.6) mtext("Focal depth",side=1,line=3,cex=.6) mtext("Density",side=2,line=3,cex=.6) # Figure 4.1 plot(baskball$APM,baskball$PPM,cex=.7,xlab="",ylab="") mtext("Assists per minute",side=1,line=2,cex=.7) mtext("Points per minute",side=2,line=2,cex=.7) # Figure 4.2 # This figure is constructed using Scott's bivariate histogram code bv ab<-matrix(0,2,2) ab[1,1]<-.04 ab[1,2]<-.36 ab[2,1]<-.1 ab[2,2]<-.9 nbin<-c(8,8) dyn.load2("bv.o") bv(cbind(baskball$APM,baskball$PPM-.0001),ab,nbin) lllx<-c(0.1289012,0.15301,NA,0.5919909,0.6158607,NA,1.0980476,1.123182, NA,-0.3437366,-0.3618814,NA,-0.6540545,-0.6695092,NA,-0.9550000,-0.9739205, NA,-1.235,-1.2531418) llly<-c(-1.4120000,-1.4876510,NA,-1.2600000,-1.3349011,NA,-1.09, -1.1688687,NA,-1.43,-1.4942923,NA,-1.2600000,-1.3149772,NA, -1.0750000,-1.1423035,NA,-0.9000000,-0.9629884) lines(lllx,llly) text(.19,-1.61,"0.1",srt=16,cex=.5) text(.652,-1.44,"0.2",srt=16,cex=.5) text(1.135,-1.28,"0.3",srt=16,cex=.5) text(.73,-1.65,"Assists per minute",srt=16,cex=.5) text(-.37,-1.617,"0.2",srt=-23.76,cex=.5) text(-.676,-1.427,"0.4",srt=-23.76,cex=.5) text(-.98,-1.27,"0.6",srt=-23.76,cex=.5) text(-1.26,-1.08,"0.8",srt=-23.76,cex=.5) text(-.9,-1.55,"Points per minute",srt=-23.76,cex=.5) # Figure 4.3 # This figure was originally constructed using Scott's bivariate histogram code. # A similar version is constructed below using the hist2d() function. persp(hist2d(baskball$APM,baskball$PPM,scale=T,xbreaks=c(0:8)*.04+.04, ybreaks=c(0:8)*.1+.1),xlab="Assists per minute", ylab="Points per minute",zlab="Density",box=F,cex=.5) # Figure 4.4 # This figure was originally constructed using Scott's bivariate histogram code. # A similar version is constructed below using the hist2d() function. contour(hist2d(baskball$APM,baskball$PPM,scale=T,xbreaks=c(0:8)*.04+.04, ybreaks=c(0:8)*.1+.1),xlab="",ylab="",cex=.7,levels=c(1:9)*2-.65,labex=0) mtext("Assists per minute",side=1,line=2,cex=.7) mtext("Points per minute",side=2,line=2,cex=.7) # Figure 4.5 # This figure was originally constructed based on personally-written code. # Bowman and Azzalini's sm library is used below to create a similar display. # Note that the contours in the original plot were based on levels of the # estimated density, while sm.density bases them on probability levels. library(sm) sm.density(cbind(baskball$APM,baskball$PPM),h=c(.025,.05), display="slice",labex=0,props=c(99,87,75,63,51,39,27,15,3),xlab="",ylab="") mtext("Assists per minute",side=1,line=2,cex=.7) mtext("Points per minute",side=2,line=2,cex=.7) # Figure 4.6 # This figure was originally constructed based on personally-written code. # Wand's KernSmooth library is used below to create a similar display. library(KernSmooth) par(mfrow=c(2,1),mex=.5) varplot<-array(NA,c(51,51,200)) for (i in 1:200){ ind<-sample(length(baskball$APM),length(baskball$APM),replace=T) vardata<-cbind(baskball$APM[ind],baskball$PPM[ind]) varplot[,,i]<-bkde2D(vardata,bandwidth=c(.025,.05),range.x=list(c(.05,.35), c(.15,.85)),gridsize=c(51,51))$fhat} for (i in 1:51) {for (j in 1:51) varplot[i,j,]<-sort(varplot[i,j,])} contour(c(0:50)*.006+.05,c(0:50)*.014+.15,varplot[,,5],cex=.7,xlab="", ylab="",main="Lower limit plot",labex=0,xaxt="n",yaxt="n",triangles=T) points(baskball$APM,baskball$PPM,col=.3,pch="o") mtext("Assists per minute",side=1,line=3,cex=.7) mtext("Points per minute",side=2,line=3,cex=.7) contour(c(0:50)*.006+.05,c(0:50)*.014+.15,varplot[,,195],cex=.7,xlab="",ylab="", main="Upper limit plot",labex=0,xaxt="n",yaxt="n",triangles=T) points(baskball$APM,baskball$PPM,col=.3,pch="o") mtext("Assists per minute",side=1,line=3,cex=.7) mtext("Points per minute",side=2,line=3,cex=.7) # Figure 4.7 # This figure was originally created with great personal pain and suffering! # Constructing the map correctly required much pre- and post-processing. # The world map database that was used is available at http://lib.stat.cmu.edu/maps/. # A new map, centered at the International Date Line, has now been submitted # to that location, which might make the construction of the map easier, # but I have not investigated that. I would be very interested to hear of # any successes by other people in this area. The code given below constructs # a similar contour plot to the one in the book, but without the map, # using Wand's KernSmooth library. Note that the estimate isn't quite right, # since it does not take into account that the data are on a sphere. The # function sm.sphere() in Bowman and Azzalini's sm library provides density # estimation on a sphere. library(KernSmooth) long<-quake$Longtude long[long<0]<-long[long<0]+360 bb<-bkde2D(cbind(long,quake$Latitude),bandwidth=c(13.33,5.4), range.x=list(c(0,360),c(-70,80))) contour(bb$x1,bb$x2,bb$fhat,xlab="Longitude",ylab="Latitude", xaxt="n",yaxt="n",labex=0,nlevels=12) axis(1,at=c(0,60,120,180,240,300,360),labels=c("0","60","120", "180","120","60","0")) axis(2,at=c(-60,-30,0,30,60),labels=c("60","30","0","30","60")) # Figure 4.8 # This plot is created using Scott's ash library. library(ash) long<-quake$Longtude long[long<0]<-long[long<0]+360 data<-cbind(-long[quake$Depth>0],quake$Latitude[quake$Depth>0], log10(quake$Depth[quake$Depth>0])) xyz<-compress(data,ab=matrix(c(-360,0,-70,80,0,3),byrow=T,ncol=2)) ashn(xyz,mv=c(2,4,2),ff=.08) 4 0 6 -1.5 4 4 0 99 # Figure 4.9 # This plot was created using personally-written Fortran code. # I don't know of any available software that will calculate # a three-dimensional kernel estimate and output the values # of the estimate at a grid of values (as both Bowman and # Azzalini's sm.density() function and Wand's bkde2D() # function will do for a two-dimensional kernel estimate). # The same issues in constructing the map in Figure 4.7 # arose here as well. Once a gridded value of the estimate # is obtained (on a longitude/latitude grid for each of the # four log(depth) values used), each of the plots in the # figure is constructed as Figure 4.7 was, restricting the # values to the given log(depth). # Figure 4.10 # This plot was created using personally-written Fortran code. # Bowman and Azzalini's sm library is used below to create a # similar display. library(sm) par(mfrow=c(2,1),mex=.5) sm.density(cbind(c(swissmon$Bottforg,swissmon$Bottreal), c(swissmon$Diagforg,swissmon$Diagreal)),display="slice", labex=0,props=c(1:9)*10+5,h=c(.397,.233),xlab="", ylab="",pch=".",main="Plug-in choice of h") mtext("Bottom margin",side=1,line=3,cex=.7) mtext("Diagonal length",side=2,line=3,cex=.7) sm.density(cbind(c(swissmon$Bottforg,swissmon$Bottreal), c(swissmon$Diagforg,swissmon$Diagreal)),display="slice", labex=0,props=c(1:9)*10+5,h=c(.1,.2),xlab="", ylab="",pch=".",main="Undersmoothed choice of h") mtext("Bottom margin",side=1,line=3,cex=.7) mtext("Diagonal length",side=2,line=3,cex=.7) # Figure 4.11 # The marginal/conditional estimator was constructed using # the Simonoff's bivar code. The gridded values of the # marginal/conditional estimate were stored in the file # fig4<-11.est. margcond<-matrix(scan("fig4<-11.est"),byrow=T,ncol=3) contour(interp(margcond[,1],margcond[,2],margcond[,3]),labex=0, levels=c(.01,.05,.09,.13,.17,.21,.25),xlab="",ylab="",triangles=T,cex=.7) points(c(swissmon$Bottforg,swissmon$Bottreal), c(swissmon$Diagforg,swissmon$Diagreal),pch=".") mtext("Bottom margin",side=1,line=2,cex=.7) mtext("Diagonal length",side=2,line=2,cex=.7) # Figure 4.12 # This plot was created using personally-written Fortran code. # Bowman and Azzalini's sm library can be used to create a # similar display. Simonoff's bivar code can be used to # get the marginal/conditional estimate at a fine grid. Then, # the inverse of the square root of the values of the # estimate at (or near) each data point are given as the # weights in a vector "weights". library(sm) sm.density(cbind(c(swissmon$Bottforg,swissmon$Bottreal), c(swissmon$Diagforg,swissmon$Diagreal)), h.weights=weights,h=c(.05,.05),display="slice", labex=0,props=c(1:9)*10+5,xlab="",ylab="",pch=".") mtext("Bottom margin",side=1,line=2,cex=.7) mtext("Diagonal length",side=2,line=2,cex=.7) # Figure 4.13 # This plot was created using personally-written code. # Bowman and Azzalini's sm library is used below to create # the display. library(sm) sm.density(cbind(quake$Depth,quake$Magntude),h=c(9.75,.05), xlab="Focal depth",ylab="Magnitude") # Figure 4.14 # This plot uses Loader's locfit library. I used an earlier # version of the code, and have been unable to reproduce # the picture with the latest version of the code. The # code below is not correct, but is of the right form. library(locfit) plot(locfit(~cbind(quake$Depth,quake$Magntude),alpha=6), type="persp") # Figure 4.15 # This plot uses Loader's locfit library library(locfit) plot(locfit(~cbind(c(swissmon$Bottforg,swissmon$Bottreal), c(swissmon$Diagforg,swissmon$Diagreal)),alpha=.25), labex=0,levels=c(1:10)*.03-.02,cex=.7) points(c(swissmon$Bottforg,swissmon$Bottreal), c(swissmon$Diagforg,swissmon$Diagreal),pch=".") mtext("Bottom margin",side=1,line=2,cex=.7) mtext("Diagonal length",side=2,line=2,cex=.7) # Figure 4.16 # This figure was constructed using Friedman's projpurs # code and personally-written Fortran code. The projection # pursuit values were put in the "carspp*.out" files, # which are then read into S-PLUS. The contour # plotting is handled below using Bowman and Azzalini's # sm library, yielding a similar figure. library(sm) carspp1<-matrix(scan("carspp1.out"),byrow=T,ncol=2) carspp2<-matrix(scan("carspp2.out"),byrow=T,ncol=2) par(mfrow=c(2,1),mex=.5) sm.density(carspp1,h=c(.6,.4),display="slice",props=c(1:9)*10+5, labex=0,xlab="Projection 1 (horizontal axis)", ylab="Projection 1 (vertical axis)",pch=".",cex=.7) sm.density(carspp2,h=c(.35,.2),display="slice",props=c(1:9)*10+5, labex=0,xlab="Projection 2 (horizontal axis)", ylab="Projection 2 (vertical axis)",pch=".",cex=.7) # Figure 4.17 plot(ksmooth(cars93$Engsiz,ker="n",band=.81,n.points=200),type="l", xlab="",ylab="",cex=.7) rug(jitter(cars93$Engsiz)) mtext("Engine size",side=1,line=2,cex=.7) mtext("Density",side=2,line=2,cex=.7) # Figure 4.18 # This figure was constructed using personally-written code. # Bowman and Azzalini's sm library is used below to yield # a similar picture. library(sm) par(mfrow=c(3,1)) sm.density(cbind(cars93$Engsiz,cars93$Price),h=c(.49,4.1),display="slice", props=c(1:9)*10+7,labex=0,xlab="Engine size",ylab="Price",pch=".",cex=.7) sm.density(cbind(cars93$Engsiz,cars93$EPACity),h=c(.49,2.6),display="slice", props=c(1:9)*10+8,labex=0,xlab="Engine size",ylab="City miles per gallon", pch=".",cex=.7) sm.density(cbind(cars93$Engsiz,cars93$Fuel),h=c(.49,1.5),display="slice", props=c(1:9)*10+8,labex=0,xlab="Engine size",ylab="Fuel tank capacity", pch=".",cex=.7) # Figure 5.1 plot(geyser$Duration,geyser$Interval,cex=.7,xlab="",ylab="") abline(lsfit(geyser$Duration,geyser$Interval)) mtext("Eruption duration time",side=1,line=2,cex=.7) mtext("Intereruption time",side=2,line=2,cex=.7) # Figure 5.2 # The bandwidth used is equivalent to h=.25 in the ksmooth() scaling plot(geyser$Duration,geyser$Interval,cex=.7,xlab="",ylab="") lines(ksmooth(geyser$Duration,geyser$Interval,kernel="n",band=.675)) mtext("Eruption duration time",side=1,line=2,cex=.7) mtext("Intereruption time",side=2,line=2,cex=.7) # Figure 5.3 plot(ethanol$E,ethanol$NOx,cex=.7,xlab="",ylab="") lines(ksmooth(ethanol$E,ethanol$NOx,ker="n",band=.0683,n.points=200)) lines(x,-19.09386+49.21772*x-27.29573*x*x,lty=2) mtext("Equivalence ratio",side=1,line=2,cex=.7) mtext("Concentration of nitrous oxides",side=2,line=2,cex=.7) # Figure 5.4 # The bandwidth used is equivalent to h=150 in the ksmooth() scaling plot(newscirc$Daily,newscirc$Sunday,cex=.7,xlab="",ylab="") lines(ksmooth(newscirc$Daily,newscirc$Sunday,kernel="n",band=405,n.points=200)) mtext("Daily circulation",side=1,line=2,cex=.7) mtext("Sunday circulation",side=2,line=2,cex=.7) # Figure 5.5 # This figure is constructed using Wand's KernSmooth library library(KernSmooth) plot(newscirc$Daily,newscirc$Sunday,cex=.7,xlab="",ylab="",pch="o") mtext("Daily circulation",side=1,line=2,cex=.7) mtext("Sunday circulation",side=2,line=2,cex=.7) lines(locpoly(newscirc$Daily,newscirc$Sunday,bandwidth=150)) lines(locpoly(newscirc$Daily,newscirc$Sunday,bandwidth=150,degree=0),lty=2) # Figure 5.6 # This figure is constructed using Wand's KernSmooth library library(KernSmooth) plot(elusage$Temperat,elusage$Usage,cex=.7,xlab="",ylab="",pch="o") mtext("Average daily temperature",side=1,line=2,cex=.7) mtext("Average daily usage",side=2,line=2,cex=.7) lines(locpoly(elusage$Temperat,elusage$Usage,bandwidth=9)) lines(locpoly(elusage$Temperat,elusage$Usage,bandwidth=9,degree=0),lty=2) # Figure 5.7 # This figure is constructed using Wand's KernSmooth library # Note that calls to smple will lead to different subsamples, so the # plot will not be exactly the same as in the book. library(KernSmooth) varplot<-matrix(NA,401,200) for (i in 1:200) { vardata<-cbind(elusage$Temperat,elusage$Usage)[sample(55,replace=T),] varplot[,i]<-locpoly(vardata[,1],vardata[,2],bandwidth=9, range.x=c(24,79))$y } for (i in 1:401) varplot[i,]<-sort(varplot[i,]) plot(elusage$Temperat,elusage$Usage,cex=.7,xlab="",ylab="",pch="o", ylim=c(10,112)) lines(locpoly(elusage$Temperat,elusage$Usage,bandwidth=9)) lines(c(0:400)*.1375+24,varplot[,195],lty=2) lines(c(0:400)*.1375+24,varplot[,5],lty=2) mtext("Average daily temperature",side=1,line=2,cex=.7) mtext("Average daily usage",side=2,line=2,cex=.7) # Figure 5.8 # This figure is constructed using Wand's KernSmooth library library(KernSmooth) plot(ethanol$E,ethanol$NOx,cex=.7,xlab="",ylab="",pch="o") mtext("Equivalence ratio",side=1,line=2,cex=.7) mtext("Nitric oxide concentration",side=2,line=2,cex=.7) lines(locpoly(ethanol$E,ethanol$NOx,bandwidth=.04,degree=3)) lines(locpoly(ethanol$E,ethanol$NOx,bandwidth=.0253),lty=2) # Figure 5.9 # This figure is constructed using Wand's KernSmooth library library(KernSmooth) vine<-vineyard$Lugs89+vineyard$Lugs90+vineyard$Lugs91 par(mfrow=c(2,1),mex=.5) plot(c(1:52),vine,cex=.7,xlab="",ylab="",main="h=3",pch="o") lines(locpoly(c(1:52),vine,bandwidth=3)) mtext("Row",side=1,line=3,cex=.7) mtext("Total number of lugs",side=2,line=3,cex=.7) plot(c(1:52),vine,cex=.7,xlab="",ylab="",main="h=1.5",pch="o") lines(locpoly(c(1:52),vine,bandwidth=1.5)) mtext("Row",side=1,line=3,cex=.7) mtext("Total number of lugs",side=2,line=3,cex=.7) # Figure 5.10 # This figure is constructed using Wand's KernSmooth library library(KernSmooth) vine<-vineyard$Lugs89+vineyard$Lugs90+vineyard$Lugs91 plot(c(1:52),vine,cex=.7,xlab="",ylab="",pch="o") lines(locpoly(c(1:52),vine,bandwidth=3,degree=3)) mtext("Row",side=1,line=3,cex=.7) mtext("Total number of lugs",side=2,line=3,cex=.7) # Figure 5.11 # This figure is constructed using Wand's KernSmooth library # Note that calls to smple will lead to different subsamples, so the # plot will not be exactly the same as in the book. library(KernSmooth) par(mfrow=c(2,1),mex=.5) varplot<-matrix(NA,401,200) for (i in 1:200) { vardata<-cbind(elusage$Temperat,elusage$Usage)[sample(55,replace=T),] varplot[,i]<-locpoly(vardata[,1],vardata[,2],bandwidth=9,degree=2, range.x=c(24,79))$y } for (i in 1:401) varplot[i,]<-sort(varplot[i,]) plot(elusage$Temperat,elusage$Usage,cex=.7,xlab="",ylab="", main="Local quadratic",pch="o",ylim=c(10,145)) lines(locpoly(elusage$Temperat,elusage$Usage,bandwidth=9,degree=2)) lines(c(0:400)*.1375+24,varplot[,195],lty=2) lines(c(0:400)*.1375+24,varplot[,5],lty=2) mtext("Average daily temperature",side=1,line=3,cex=.7) mtext("Average daily usage",side=2,line=3,cex=.7) varplot<-matrix(NA,401,200) for (i in 1:200) { vardata<-cbind(elusage$Temperat,elusage$Usage)[sample(55,replace=T),] varplot[,i]<-locpoly(vardata[,1],vardata[,2],bandwidth=9,degree=3, range.x=c(24,79))$y } for (i in 1:401) varplot[i,]<-sort(varplot[i,]) plot(elusage$Temperat,elusage$Usage,cex=.7,xlab="",ylab="", main="Local cubic",pch="o",ylim=c(10,215)) lines(locpoly(elusage$Temperat,elusage$Usage,bandwidth=9,degree=3)) lines(c(0:400)*.1375+24,varplot[,195],lty=2) lines(c(0:400)*.1375+24,varplot[,5],lty=2) mtext("Average daily temperature",side=1,line=2,cex=.7) mtext("Average daily usage",side=2,line=2,cex=.7) # Figure 5.12 # This figure is constructed using Wand's KernSmooth library # Note that calls to smple will lead to different subsamples, so the # plot will not be exactly the same as in the book. library(KernSmooth) varplot<-matrix(NA,401,200) vine<-vineyard$Lugs89+vineyard$Lugs90+vineyard$Lugs91 for (i in 1:200) { vardata<-cbind(c(1:52),vine)[sample(52,replace=T),] varplot[,i]<-locpoly(vardata[,1],vardata[,2],bandwidth=3,degree=3, range.x=c(1,52))$y } for (i in 1:401) varplot[i,]<-sort(varplot[i,]) plot(c(1:52),vine,cex=.7,xlab="",ylab="",pch="o") lines(locpoly(c(1:52),vine,bandwidth=3,degree=3)) lines(c(0:400)*.1275+1,varplot[,195],lty=2) lines(c(0:400)*.1275+1,varplot[,5],lty=2) mtext("Row",side=1,line=3,cex=.7) mtext("Total number of lugs",side=2,line=3,cex=.7) # Figure 5.13 # This figure is constructed using Wand's KernSmooth library library(KernSmooth) vine<-vineyard$Lugs89+vineyard$Lugs90+vineyard$Lugs91 plot(c(1:52),vine,cex=.7,xlab="",ylab="",pch="o") lines(locpoly(c(1:52),vine,bandwidth=dpill(c(1:52),vine))) mtext("Row",side=1,line=3,cex=.7) mtext("Total number of lugs",side=2,line=3,cex=.7) # Figure 5.14 # This figure is constructed using Wand's KernSmooth library library(KernSmooth) vine<-cbind(c(rep(c(1:52),3)),c(vineyard$Lugs89-mean(vineyard$Lugs89), vineyard$Lugs90-mean(vineyard$Lugs90), vineyard$Lugs91- mean(vineyard$Lugs91))) plot(vine,cex=.7,xlab="",ylab="",pch="o") lines(locpoly(vine[,1],vine[,2],bandwidth=dpill(vine[,1],vine[,2])),lwd=2) mtext("Row",side=1,line=2,cex=.7) mtext("Deviation from annual average number of lugs",side=2,line=2,cex=.7) # Figure 5.15 lll<-loess(adptvisa$Log9291~adptvisa$Log9188,span=.75,degree=1) plot(adptvisa$Log9188,adptvisa$Log9291,cex=.7,xlab="",ylab="") mtext("Log[(1991 visas+1)/(1988 visas+1)]",side=1,line=2,cex=.7) mtext("Log[(1992 visas+1)/(1991 visas+1)]",side=2,line=2,cex=.7) lines(c(-54:341)/100,predict(lll,c(-54:341)/100)) # Figure 5.16 # This figure is constructed using Wand's KernSmooth library library(KernSmooth) bandgrid<-rep(7.5,191) bandgrid[62:87]<-5 bandgrid[140:191]<-3 ll<-locpoly(c(1:96),birthrt$Births,band=bandgrid,gridsize=191,degree=3) plot(c(1:96),birthrt$Births,cex=.7,xlab="",ylab="",axes=F,pch="o") mtext("Year",side=1,line=2,cex=.7) mtext("Birth rate",side=2,line=2,cex=.7) axis(2,cex=.7) axis(1,cex=.7,at=c(1,13,25,37,49,61,73,85,97),labels=c("1940","","1942", "","1944","","1946","","1948")) lines(c(1:191)/2+.5,ll$y) abline(v=31.5,lty=2) abline(v=44.5,lty=2) abline(v=70.5,lty=2) text(15,2800,"h=7.5",cex=.5) text(37,2800,"h=5",cex=.5) text(57,2800,"h=7.5",cex=.5) text(88,2200,"h=3",cex=.5) box() # Figure 5.17 # This figure is constructed using Wand's KernSmooth library library(KernSmooth) bandgrid<-rep(7.5,191) bandgrid[62:87]<-5 bandgrid[140:191]<-3 ll<-locpoly(c(1:96),birthrt$Births,band=bandgrid,gridsize=191,degree=3) ll96<-rep(0,96) for (i in 1:96) ll96[i]<-ll$y[2*i-1] plot(c(1:96),birthrt$Births-ll96,cex=.7,xlab="",ylab="",axes=F,type="l") mtext("Year",side=1,line=2,cex=.7) mtext("Residuals",side=2,line=2,cex=.7) axis(2,cex=.7) axis(1,cex=.7,at=c(1,13,25,37,49,61,73,85,97),labels=c("1940","","1942", "","1944","","1946","","1948")) box() # Figure 5.18 # This figure is constructed using Wand's KernSmooth library library(KernSmooth) bandgrid<-rep(11,228) bandgrid[1:22]<-5.5 bandgrid[23:39]<-2.75 bandgrid[124:164]<-6 ll<-locpoly(whale$Period,whale$Hudson,band=bandgrid,gridsize=228,degree=2) plot(whale$Period,whale$Hudson,cex=.7,xlab="",ylab="",pch="o") mtext("Period",side=1,line=2,cex=.7) mtext("Nursing time",side=2,line=2,cex=.7) lines(c(1:228),ll$y,lwd=2) # Figure 5.19 lllg<-loess(salmon$Recruits~salmon$Spawners,span=.6) llls<-loess(salmon$Recruits~salmon$Spawners,span=.6,family="s") pppg<-predict(lllg,c(50:1100)) ppps<-predict(llls,c(50:1100)) plot(salmon$Spawners,salmon$Recruits,cex=.7,xlab="",ylab="") mtext("Spawners",side=1,line=2,cex=.7) mtext("Recruits",side=2,line=2,cex=.7) lines(c(50:1100),pppg,lty=2) lines(c(50:1100),ppps) # Figure 5.20 lllg<-loess(calibrat$Counts~calibrat$ConcTSH,span=.36) llls<-loess(calibrat$Counts~calibrat$ConcTSH,span=.36,family="s") pppg<-predict(lllg,c(0:200)/2) ppps<-predict(llls,c(0:200)/2) plot(c(0:200)/2,ppps,type="l",cex=.7,xlab="",ylab="",cex=.7) points(calibrat$ConcTSH,calibrat$Counts) mtext("Concentration",side=1,line=2,cex=.7) mtext("Counts",side=2,line=2,cex=.7) lines(c(0:200/2),pppg,lty=2) # Figure 5.21 # This figure is constructed using Wand's KernSmooth library library(KernSmooth) par(mfrow=c(2,1),mex=.5) plot(votfraud$Machine,votfraud$Absentee,cex=.7,xlab="",ylab="", pch="o",main="(a)") mtext("Democrat - Republican machine vote",side=1,line=3,cex=.7) mtext("Democrat - Republican absentee vote",side=2,line=3,cex=.7) lines(locpoly(votfraud$Machine,votfraud$Absentee, bandwidth=dpill(votfraud$Machine,votfraud$Absentee))) voteclean<-cbind(votfraud$Machine,votfraud$Absentee) voteclean[18,1]<-NA voteclean[22,1]<-NA voteclean<-na.omit(voteclean) plot(votfraud$Machine,votfraud$Absentee,cex=.7,xlab="",ylab="", pch="o",main="(b)") mtext("Democrat - Republican machine vote",side=1,line=3,cex=.7) mtext("Democrat - Republican absentee vote",side=2,line=3,cex=.7) lines(locpoly(votfraud$Machine,votfraud$Absentee, bandwidth=dpill(voteclean[,1],voteclean[,2]))) # Figure 5.22 # This figure is constructed using Wand's KernSmooth library library(KernSmooth) voteclean<-cbind(votfraud$Machine,votfraud$Absentee) voteclean[18,1]<-NA voteclean[22,1]<-NA voteclean<-na.omit(voteclean) plot(votfraud$Machine,votfraud$Absentee,cex=.7,xlab="",ylab="",pch="o") mtext("Democrat - Republican machine vote",side=1,line=3,cex=.7) mtext("Democrat - Republican absentee vote",side=2,line=3,cex=.7) lines(locpoly(voteclean[,1],voteclean[,2], bandwidth=dpill(voteclean[,1],voteclean[,2]))) lll<-loess(votfraud$Absentee~votfraud$Machine,family="s",span=.5) lines(c(-20:80)*1000,predict(lll,c(-20:80)*1000),lty=2) # Figure 5.23 # This figure is constructed using Wand's KernSmooth library library(KernSmooth) birthts<-ts(data=birthrt$Births,start=1940,frequency=12) brthtsll<-ts(data=locpoly(c(1:96),birthrt$Births,bandwidth=dpill(c(1:96), birthrt$Births),gridsize=96)$y,start=1940,frequency=12) tsplot(birthts,ylab="",xlab="",type="p",pch="o",cex=.7) mtext("Year",side=1,line=2,cex=.7) mtext("Birth rate",side=2,line=2,cex=.7) tslines(brthtsll) # Figure 5.24 # This figure is constructed using Wand's KernSmooth library library(KernSmooth) par(mfrow=c(2,1),mex=.5) plot(geyser$Duration,geyser$Interval,cex=.7,xlab="",ylab="", pch="o",main="Geyser eruptions") mtext("Eruption duration time",side=1,line=3,cex=.7) mtext("Intereruption time",side=2,line=3,cex=.7) lines(locpoly(geyser$Duration,geyser$Interval, bandwidth=dpill(geyser$Duration,geyser$Interval))) plot(elusage$Temperat,elusage$Usage,cex=.7,xlab="",ylab="", pch="o",main="Electricity usage") mtext("Average daily temperature",side=1,line=3,cex=.7) mtext("Average daily usage",side=2,line=3,cex=.7) lines(locpoly(elusage$Temperat,elusage$Usage, bandwidth=dpill(elusage$Temperat,elusage$Usage))) # Figure 5.25 par(mfrow=c(2,1),mex=.5) plot(diabetes$Age,diabetes$LogCpep,xlim=c(0,16),xlab="",ylab="",cex=.7, main="Diabetes data") mtext("Age",side=1,line=3,cex=.7) mtext("Log C-peptide concentration",side=2,line=3,cex=.7) lines(smooth.spline(diabetes$Age,diabetes$LogCpep)) plot(sulfate$Distance,sulfate$Corr,xlab="",ylab="",pch=".",cex=.7, main="Acid deposition data") lines(smooth.spline(sulfate$Distance,sulfate$Corr),lwd=3) mtext("Distance between stations",side=1,line=3,cex=.7) mtext("Correlation",side=2,line=3,cex=.7) # Figure 5.26 plot(adptvisa$Log9188,adptvisa$Log9291,cex=.7,xlab="",ylab="") mtext("Log[(1991 visas+1)/(1988 visas+1)]",side=1,line=2,cex=.7) mtext("Log[(1992 visas+1)/(1991 visas+1)]",side=2,line=2,cex=.7) sss<-smooth.spline(adptvisa$Log9188,adptvisa$Log9291) lines(c(-54:341)/100,predict(sss,c(-54:341)/100)$y) # Figure 5.27 # The smoothing parameter is equivalent to alpha=.102 using # smooth.spline()'s scaling vine<-vineyard$Lugs89+vineyard$Lugs90+vineyard$Lugs91 plot(c(1:52),vine,cex=.7,xlab="",ylab="") mtext("Row",side=1,line=2,cex=.7) mtext("Total number of lugs",side=2,line=2,cex=.7) lines(c(10:520)/10,predict(smooth.spline(c(1:52),vine,spar=.00004), c(10:520)/10)$y) # Figure 5.28 # The smoothing parameters are equivalent to alpha=1.92 X 10^{-7} # and alpha=245223, respectively, using smooth.spline()'s scaling par(mfrow=c(2,1),mex=.5) plot(ethanol$E,ethanol$NOx,cex=.7,xlab="",ylab="",main="Ethanol data") mtext("Equivalence ratio",side=1,line=3,cex=.7) mtext("Concentration of nitric oxides",side=2,line=3,cex=.7) lines(c(535:1232)/1000,predict(smooth.spline(ethanol$E,ethanol$NOx, spar=.00005),c(535:1232)/1000)$y,lty=2,lwd=2) lines(c(535:1232)/1000,predict(smooth.spline(ethanol$E,ethanol$NOx), c(535:1232)/1000)$y) plot(newscirc$Daily,newscirc$Sunday,cex=.7,xlab="",ylab="", main="Newspaper circulation data") mtext("Daily circulation",side=1,line=3,cex=.7) mtext("Sunday circulation",side=2,line=3,cex=.7) lines(c(120:1280),predict(smooth.spline(newscirc$Daily, newscirc$Sunday,spar=.003),c(120:1280))$y,lty=2,lwd=2) lines(c(120:1280),predict(smooth.spline(newscirc$Daily, newscirc$Sunday),c(120:1280))$y) # Figure 5.29 # The smoothing parameter is equivalent to alpha=8.372 X 10^{10} # using smooth.spline()'s scaling plot(votfraud$Machine,votfraud$Absentee,cex=.7,xlab="",ylab="") mtext("Democrat - Republican machine vote",side=1,line=2,cex=.7) mtext("Democrat - Republican absentee vote",side=2,line=2,cex=.7) lines(c(-132:718)*100,predict(smooth.spline(votfraud$Machine, votfraud$Absentee),c(-132:718)*100)$y) lines(c(-132:718)*100,predict(smooth.spline(votfraud$Machine, votfraud$Absentee,spar=.003),c(-132:718)*100)$y,lty=2) # Figure 5.30 birthts<-ts(data=birthrt$Births,start=1940,frequency=12) sss<-predict(smooth.spline(c(1:96),birthrt$Births),c(1:96)) brthtsss<-ts(data=sss$y,start=1940,frequency=12) tsplot(birthts,ylab="",xlab="",type="p",pch="o",cex=.7) mtext("Year",side=1,line=2,cex=.7) mtext("Birth rate",side=2,line=2,cex=.7) tslines(brthtsss) # Figure 5.31 attach(baskball) nbalo<-loess(PPM~MPG*Height,degree=1,span=.3) H.marginal<-seq(min(Height),max(Height),length=50) M.marginal<-seq(min(MPG),max(MPG),length=50) MH.marginal<-list(MPG=M.marginal,Height=H.marginal) nba.fit<-predict(nbalo,expand.grid(MH.marginal)) persp(M.marginal,H.marginal,nba.fit,box=F,xlab="Minutes per game", ylab="Height (cm)",zlab="Points per minute",cex=.5) # Figure 5.32 # Edit the Postscript file to change x labels of the plots attach(baskball) nbagam<-gam(PPM~lo(MPG,span=.5,degree=1)+lo(Height,span=.5,degree=1)) par(mfrow=c(2,1),mex=.5,pch="o",cex=.7) plot(nbagam,rug=F,residuals=T,ask=F,scale=.5) # Figure 5.33 attach(baskball) nbagam<-gam(PPM~lo(MPG,span=.5,degree=1)+lo(Height,span=.5,degree=1)) H.marginal<-seq(min(Height),max(Height),length=50) M.marginal<-seq(min(MPG),max(MPG),length=50) MH.marginal<-list(MPG=M.marginal,Height=H.marginal) nba.fit<-predict(nbagam,expand.grid(MH.marginal)) persp(M.marginal,H.marginal,nba.fit,box=F,xlab="Minutes per game", ylab="Height (cm)",zlab="Points per minute",cex=.5) # Figure 5.34 # Edit the Postscript file to change x labels of the plots attach(baskball) nbagam<-gam(APM~s(MPG)+s(Height)) par(mfrow=c(2,1),mex=.5,pch="o",cex=.7) plot(nbagam,rug=F,residuals=T,ask=F,scale=.5) # Figure 5.35 plot(gascons$Pricegas,gascons$Consump,cex=.7,xlab="",ylab="") mtext("Price index for gasoline",side=1,line=2,cex=.7) mtext("Total gasoline consumption",side=2,line=2,cex=.7) lines(c(91:411)/100,predict(smooth.spline(gascons$Pricegas, gascons$Consump,spar=.0005),c(91:411)/100)$y) # Figure 5.36a # This figure was created interactively, by requesting plots # s(PCIncome) and then s(Prcusecr). The Postscript file was # then edited to change the x labels. attach(gascons) gasgam<-gam(Consump~s(Pricegas)+s(PCIncome)+s(Prcusecr), bf.maxit=100) par(mfrow=c(2,1),mex=.5,pch="o",cex=.7) plot(gasgam,rug=F,residuals=T,scale=175,ask=T) # Figure 5.36b # This figure was created interactively, by requesting a plot # s(Pricegas). The Postscript file was then edited to change # the x label. attach(gascons) gasgam<-gam(Consump~s(Pricegas)+s(PCIncome)+s(Prcusecr), bf.maxit=100) plot(gasgam,rug=F,residuals=T,scale=175,ask=T) # Figure 5.37 attach(schlvote) taxvote<-na.omit(data.frame(Taxrate,Result)) attach(taxvote) votegam<-gam(Result~s(Taxrate),family=binomial(link=logit)) par(pch="o",cex=.7) plot(votegam,rug=F,residuals=T,xlab="Tax rate",ylab="Residuals") # Figure 5.38 attach(schlvote) taxvote<-na.omit(data.frame(Taxrate,Result)) attach(taxvote) o<-order(Taxrate) votegam<-gam(Result~s(Taxrate),family=binomial(link=logit)) fit<-fitted(votegam) par(pch="o",cex=.7) plot(Taxrate[o],fit[o],type="l",xlab="",ylab="") mtext("Tax rate",side=1,line=2,cex=.7) mtext("Probability of budget passing",side=2,line=2,cex=.7) votegam<-gam(Result~Taxrate,family=binomial(link=logit)) fit<-fitted(votegam) lines(Taxrate[o],fit[o],lty=2) points(Taxrate,Result) # Figure 6.1 par(mfrow=c(3,1)) sal6<-salary$Salary6[!is.na(salary$Salary6)] sal12<-salary$Salary12[!is.na(salary$Salary12)] plot(c(1:6),sal6/147,type="b",pch="o",xlab="",ylab="") mtext("Cell",side=1,line=2) mtext("Relative frequency",side=2,line=2) mtext("6 cells",side=3,line=2,cex=.75) plot(c(1:12),sal12/147,type="b",pch="o",xlab="",ylab="") mtext("Cell",side=1,line=2) mtext("Relative frequency",side=2,line=2) mtext("12 cells",side=3,line=2,cex=.75) plot(c(1:28),salary$Salary28/147,type="b",pch="o",xlab="",ylab="") mtext("Cell",side=1,line=2) mtext("Relative frequency",side=2,line=2) mtext("28 cells",side=3,line=2,cex=.75) # Figure 6.2 # This figure is constructed using Wand's KernSmooth library library(KernSmooth) sal6<-salary$Salary6[!is.na(salary$Salary6)] sal12<-salary$Salary12[!is.na(salary$Salary12)] lll6<-locpoly(c(1:6),sal6/147,degree=0,band=.45,gridsize=6) lll12<-locpoly(c(1:12),sal12/147,degree=0,band=.8,gridsize=12) lll28<-locpoly(c(1:28),salary$Salary28/147,degree=0,band=1.6,gridsize=28) lll6$y<-lll6$y/sum(lll6$y) lll12$y<-lll12$y/sum(lll12$y) lll28$y<-lll28$y/sum(lll28$y) par(mfrow=c(3,1)) plot(c(1:6),sal6/147,type="n",xlab="",ylab="") lines(c(1:6),sal6/147,lty=2) lines(lll6,type="b",pch="o") mtext("Cell",side=1,line=2) mtext("Probability",side=2,line=2) mtext("6 cells",side=3,line=2,cex=.75) plot(c(1:12),sal12/147,type="n",xlab="",ylab="") lines(c(1:12),sal12/147,lty=2) lines(lll12,type="b",pch="o") mtext("Cell",side=1,line=2) mtext("Probability",side=2,line=2) mtext("12 cells",side=3,line=2,cex=.75) plot(c(1:28),salary$Salary28/147,type="n",xlab="",ylab="") lines(c(1:28),salary$Salary28/147,lty=2) lines(lll28,type="b",pch="o") mtext("Cell",side=1,line=2) mtext("Probability",side=2,line=2) mtext("28 cells",side=3,line=2,cex=.75) # Figure 6.3 plot(c(1:55),mineexpl$Count/109,type="b",pch="o",cex=.7,xlab="",ylab="") mtext("Cell",side=1,line=2,cex=.7) mtext("Relative frequency",side=2,line=2,cex=.7) # Figure 6.4 # This figure is constructed using Wand's KernSmooth library library(KernSmooth) lll0<-locpoly(c(1:55),mineexpl$Count/109,degree=0,band=1.6,gridsize=55) lll1<-locpoly(c(1:55),mineexpl$Count/109,degree=1,band=1.6,gridsize=55) lll0$y<-lll0$y/sum(lll0$y) lll1$y<-lll1$y/sum(lll1$y) par(mfrow=c(2,1),mex=.5) plot(c(1:55),mineexpl$Count/109,type="n",xlab="",ylab="",cex=.7, main="Local constant estimate") lines(lll0,type="b",pch="o") lines(c(1:55),mineexpl$Count/109,lty=2) mtext("Cell",side=1,line=3,cex=.7) mtext("Probability",side=2,line=3,cex=.7) plot(c(1:55),mineexpl$Count/109,type="n",xlab="",ylab="",cex=.7, main="Local linear estimate") lines(lll1,type="b",pch="o") lines(c(1:55),mineexpl$Count/109,lty=2) mtext("Cell",side=1,line=3,cex=.7) mtext("Probability",side=2,line=3,cex=.7) # Figure 6.5 # This figure is constructed using Wand's KernSmooth library library(KernSmooth) lll0<-locpoly(c(1:55),mineexpl$Count/109,degree=0,band=1.6,gridsize=55) lll1<-locpoly(c(1:55),mineexpl$Count/109,degree=1,band=1.6,gridsize=55) lll0$y<-lll0$y/sum(lll0$y) lll1$y<-lll1$y/sum(lll1$y) plot(lll1,type="b",pch="o",cex=.7,xlab="",ylab="") lines(lll0,type="b",pch="x",lty=2) mtext("Cell",side=1,line=2,cex=.7) mtext("Probability",side=2,line=2,cex=.7) # Figure 6.6 plot(c(1:50),caco2$Count/55,type="b",pch="o",cex=.7,xlab="",ylab="") mtext("Cell",side=1,line=2,cex=.7) mtext("Relative frequency",side=2,line=2,cex=.7) # Figure 6.7 # This figure is constructed using Wand's KernSmooth library library(KernSmooth) lll1<-locpoly(c(1:50),caco2$Count/55,degree=1,band=6.4,gridsize=50) lll2<-locpoly(c(1:50),caco2$Count/55,degree=2,band=6.4,gridsize=50) lll1$y<-lll1$y/sum(lll1$y) lll2$y<-lll2$y/sum(lll2$y) par(mfrow=c(2,1),mex=.5) plot(lll1,type="b",pch="o",xlab="",ylab="",cex=.7, main="Local linear estimate") mtext("Cell",side=1,line=3,cex=.7) mtext("Probability",side=2,line=3,cex=.7) plot(lll2,type="b",pch="o",xlab="",ylab="",cex=.7, main="Local quadratic estimate") mtext("Cell",side=1,line=3,cex=.7) mtext("Probability",side=2,line=3,cex=.7) # Figure 6.8 # This figure is constructed using Wand's KernSmooth library lll3<-locpoly(c(1:50),caco2$Count/55,degree=3,band=6.4,gridsize=50) lll3$y<-lll3$y/sum(lll3$y) plot(lll3,type="b",pch="o",cex=.7,xlab="",ylab="") mtext("Cell",side=1,line=2,cex=.7) mtext("Probability",side=2,line=2,cex=.7) # Figure 6.9 ll<-loess(mbasurv$Count~mbasurv$Statist*mbasurv$Economic,span=.25, degree=1) survey.tab<-matrix(mbasurv$Count,ncol=6) survlin.smo<-matrix(predict(ll),ncol=6)/sum(predict(ll))*55 par(mfrow=c(2,1),mex=.5) image(c(1:7),7-c(1:6),survey.tab,xlab="Economics",ylab="", xaxt="n",yaxt="n",cex=.7) axis(1,at=c(1:7),labels=c("1","2","3","4","5","6","7")) axis(2,at=c(1:6),labels=c("7","6","5","4","3","2")) title(main="Unsmoothed counts",cex=.6) mtext(side=2,line=2,"Statistics",cex=.7) image(c(1:7),7-c(1:6),floor(survlin.smo),xlab="Economics",ylab="", xaxt="n",yaxt="n",cex=.7) axis(1,at=c(1:7),labels=c("1","2","3","4","5","6","7")) axis(2,at=c(1:6),labels=c("7","6","5","4","3","2")) mtext(side=2,line=2,"Statistics",cex=.7) title(main="Smoothed counts",cex=.6) # Figure 6.10 ll<-loess(mbasurv$Count~mbasurv$Statist*mbasurv$Economic,span=.2, degree=0) survcon.smo<-matrix(predict(ll),ncol=6)/sum(predict(ll))*55 image(c(1:7),7-c(1:6),floor(survcon.smo),xlab="Economics",ylab="Statistics", xaxt="n",yaxt="n") axis(1,at=c(1:7),labels=c("1","2","3","4","5","6","7")) axis(2,at=c(1:6),labels=c("7","6","5","4","3","2")) # Figure 6.11 salary.tab<-matrix(salyear$Count,ncol=12) par(mfrow=c(2,1),mex=.5) image(c(1:10),13-c(1:12),salary.tab,xlab="Years since degree", ylab="",xaxt="n",yaxt="n",cex=.7) axis(1,at=c(1:10),labels=c("0-2","3-5","6-8","9-11","12-14","15-17","18-23", "24-29","30-35","36+"),cex=.5) mtext(side=2,line=2,"Monthly salary",cex=.7) axis(2,at=c(1:12),labels=c("","","","","","","","","","","","")) title(main="Unsmoothed counts",cex=.6) ll<-loess(salyear$Count~salyear$Salary*salyear$Yearsdeg,span=.35) salquad.smo<-matrix(predict(ll),ncol=12)/sum(predict(ll))*147 image(c(1:10),13-c(1:12),floor(salquad.smo),xlab="Years since degree", ylab="",xaxt="n",yaxt="n",cex=.7) axis(1,at=c(1:10),labels=c("0-2","3-5","6-8","9-11","12-14","15-17","18-23", "24-29","30-35","36+"),cex=.5) mtext(side=2,line=2,"Monthly salary",cex=.7) axis(2,at=c(1:12),labels=c("","","","","","","","","","","","")) title(main="Smoothed counts",cex=.6) # Figure 6.12 llh<-loess(salyear$Count~salyear$Salary*salyear$Yearsdeg,span=.13, degree=0) ll2h<-loess(salyear$Count~salyear$Salary*salyear$Yearsdeg,span=.52, degree=0) salgeom.smo<-((matrix(predict(llh),ncol=12)/sum(predict(llh)))^(4/3))/ ((matrix(predict(ll2h),ncol=12)/sum(predict(ll2h)))^(1/3))*147 image(c(1:10),13-c(1:12),floor(salgeom.smo),xlab="Years since degree", ylab="Monthly salary",xaxt="n",yaxt="n",cex=.7) axis(1,at=c(1:10),labels=c("0-2","3-5","6-8","9-11","12-14","15-17","18-23", "24-29","30-35","36+"),cex=.5) axis(2,at=c(1:12),labels=c("3350","3050","2850","2650", "2450","2250","2050","1850","1650","1450","1250","1050"),cex=.5) # Figure 6.13 # The density estimate is the fitted values from the loess fit, # standardized to have unit mean, divided by the length of the # range of evaluation. minefreq<-hist(mineacci$Interval,breaks=c(0:51)-.5,probability=T, plot=F)$counts lll<-loess(minefreq~c(0:50),degree=1,span=.4) plot(c(0:50),minefreq,type="n",cex=.7,xlab="",ylab="") points(c(0:50),minefreq,col=.3) lines(c(0:50),predict(lll)/mean(predict(lll))/50) mtext("Days",side=1,line=2,cex=.7) mtext("Density",side=2,line=2,cex=.7) # Figure 6.14 # The density estimate is the fitted values from the loess fit, # standardized to have unit mean, divided by the area of the # region of evaluation. marginal.grid<-list(Rural85=c(0:50)*2,Rural90=c(0:50)*2) newdata<-expand.grid(marginal.grid) watrcount<-as.vector(hist2d(safewatr$Rural85,safewatr$Rural90, xbreaks=c(0:10)*10+.001,ybreaks=c(0:10)*10+.001,scale=T)$z) Rural90<-rep(NA,100) for (i in 1:10) Rural90[(i*10-9):(i*10)]<-rep(i*10-5,10) Rural85<-rep(c(1:10)*10-5,10) lll<-loess(watrcount~Rural85*Rural90,degree=2,span=.3) ppp<-predict(lll,newdata) par(mfrow=c(2,1),mex=.5) plot(safewatr$Rural85,safewatr$Rural90,pch="o",xlab="",ylab="",cex=.7) mtext("Rural access to safe water, 1985",side=1,line=3,cex=.7) mtext("Safe water access, 1990",side=2,line=3,cex=.7) contour(c(0:50)*2,c(0:50)*2,ppp/mean(ppp[!is.na(ppp)])/10000,xlab="", ylab="",cex=.5,labex=0,levels=c(0:4)/7000) mtext("Rural access to safe water, 1985",side=1,line=3,cex=.7) mtext("Safe water access, 1990",side=2,line=3,cex=.7) points(safewatr$Rural85,safewatr$Rural90,pch="o") # Figure 6.15 # The density estimate is the fitted values from the loess fit, # standardized to have unit mean, divided by the area of the # region of evaluation. # The z-axis label was incorrect in the first printing of the # book, and is given correctly below. Note that it is 7000 # times the density that is actually plotted. marginal.grid<-list(Rural85=c(0:50)*2,Rural90=c(0:50)*2) newdata<-expand.grid(marginal.grid) watrcount<-as.vector(hist2d(safewatr$Rural85,safewatr$Rural90, xbreaks=c(0:10)*10+.001,ybreaks=c(0:10)*10+.001,scale=T)$z) Rural90<-rep(NA,100) for (i in 1:10) Rural90[(i*10-9):(i*10)]<-rep(i*10-5,10) Rural85<-rep(c(1:10)*10-5,10) lll<-loess(watrcount~Rural85*Rural90,degree=2,span=.3) ppp<-predict(lll,newdata) persp(c(0:50)*2,c(0:50)*2,ppp/mean(ppp[!is.na(ppp)])/10000*7000, xlab="Rural access to safe water, 1985", ylab="Rural access to safe water, 1990",zlab="Density X 7000", cex=.5,box=F) # Figure 6.16 # The density estimate is the fitted values from the loess fit, # standardized to have unit mean, divided by the length of the # range of evaluation. minefreq<-hist(mineacci$Interval,breaks=c(0:51)-.5,probability=T, plot=F)$counts lll<-loess(minefreq~c(0:50),span=.7) plot(c(0:50),predict(lll)/mean(predict(lll))/50,type="l",cex=.7, xlab="",ylab="") abline(h=0) mtext("Days",side=1,line=2,cex=.7) mtext("Density",side=2,line=2,cex=.7) # Figure 6.17 # The density estimate is the fitted values from the loess gam fit, # standardized to have unit mean, divided by the length of the # range of evaluation. minefreq<-hist(mineacci$Interval,breaks=c(0:51)-.5,probability=T, plot=F)$counts days<-c(0:50) ggg<-gam(minefreq~lo(days,degree=2,span=.7),family=poisson) ppp<-predict(ggg,type="response") plot(c(0:50),ppp/mean(ppp)/50,type="l",cex=.7,xlab="",ylab="") mtext("Days",side=1,line=2,cex=.7) mtext("Density",side=2,line=2,cex=.7) # Figure 6.18 # The density estimate is the fitted values from the loess gam fit, # standardized to have unit mean, divided by the length of the # range of evaluation. # Note that each call to sample changes the data chosen, so the # picture will not be exactly the same as in the book. minefreq<-hist(mineacci$Interval,breaks=c(0:51)-.5,probability=T, plot=F)$counts days<-c(0:50) ggg<-gam(minefreq~lo(days,degree=2,span=.7),family=poisson) ppp<-predict(ggg,type="response") plot(c(0:50),ppp/mean(ppp)/50,type="l",cex=.7,xlab="",ylab="",ylim=c(0,.2)) mtext("Days",side=1,line=2,cex=.7) mtext("Density",side=2,line=2,cex=.7) varplot<-matrix(NA,51,200) for (i in 1:200){ vardata<-sample(mineacci$Interval,replace=T) minefreq<-hist(vardata,breaks=c(0:51)-.5,probability=T, plot=F)$counts ggg<-gam(minefreq~lo(days,degree=2,span=.7),family=poisson) ppp<-predict(ggg,type="response") varplot[,i]<-ppp/mean(ppp)/50} for (i in 1:51) varplot[i,]<-sort(varplot[i,]) lines(c(0:50),varplot[,195],lty=2) lines(c(0:50),varplot[,5],lty=2) # Figure 6.19 # The density estimate is the fitted values from the loess gam fit, # standardized to have unit mean, divided by the length of the # range of evaluation. options(object.size=10000000) racefreq<-hist(racial$Propwhit,breaks=c(0:1000)/1000,probability=T, plot=F)$counts prop<-c(1:1000)/1000-.0005 ggg<-gam(racefreq~lo(prop,degree=2,span=.8),family=poisson) ppp1<-predict(ggg,type="response") ggg<-gam(racefreq~lo(prop,degree=2,span=.5),family=poisson) ppp2<-predict(ggg,type="response") ggg<-gam(racefreq~lo(prop,degree=2,span=.2),family=poisson) ppp3<-predict(ggg,type="response") ppp<-ppp1 ppp[462:810]<-ppp2[462:810] ppp[811:1000]<-ppp3[811:1000] plot(prop,ppp/mean(ppp),type="l",cex=.7,xlab="",ylab="") mtext("Proportion of white students",side=1,line=2,cex=.7) mtext("Density",side=2,line=2,cex=.7) # Figure 7.1 par(mfrow=c(2,1),mex=.5) malemean<-mean(mbasalry$Salary[mbasalry$Gender==0]) femmean<-mean(mbasalry$Salary[mbasalry$Gender==1]) sdjnt<-sqrt((var(mbasalry$Salary[mbasalry$Gender==0])*64+ var(mbasalry$Salary[mbasalry$Gender==1])*25)/89) plot(c(0:104)*1000,dnorm(c(0:104)*1000,mean=malemean,sd=sdjnt)*.65, type="l",xlab="",ylab="",cex=.7,main="Full data set") rug(mbasalry$Salary[mbasalry$Gender==0]) lines(c(0:104)*1000,dnorm(c(0:104)*1000,mean=femmean,sd=sdjnt)*.35,lty=2) rug(mbasalry$Salary[mbasalry$Gender==1],side=3) mtext("Salary",side=1,line=3,cex=.7) mtext("Density",side=2,line=3,cex=.7) mbaout<-cbind(mbasalry$Gender[mbasalry$Salary<104000], mbasalry$Salary[mbasalry$Salary<104000]) malemean<-mean(mbaout[,2][mbaout[,1]==0]) femmean<-mean(mbaout[,2][mbaout[,1]==1]) sdjnt<-sqrt((var(mbaout[,2][mbaout[,1]==0])*64+ var(mbaout[,2][mbaout[,1]==1])*24)/88) plot(c(0:100)*1000,dnorm(c(0:100)*1000,mean=malemean,sd=sdjnt)*.65, type="l",xlab="",ylab="",cex=.7,main="Omitting outlier") rug(mbaout[,2][mbaout[,1]==0]) lines(c(0:100)*1000,dnorm(c(0:100)*1000,mean=femmean,sd=sdjnt)*.35,lty=2) rug(mbaout[,2][mbaout[,1]==1],side=3) mtext("Salary",side=1,line=3,cex=.7) mtext("Density",side=2,line=3,cex=.7) # Figure 7.2 # The bandwidths correspond to h=5200 and h=3150, respectively, in the # ksmooth() scaling. par(mfrow=c(2,1),mex=.5) kkkm<-ksmooth(mbasalry$Salary[mbasalry$Gender==0],x.points=c(0:104)*1000, ker="n",band=14029.4) kkkf<-ksmooth(mbasalry$Salary[mbasalry$Gender==1],x.points=c(0:104)*1000, ker="n",band=8498.58) plot(kkkm$x,kkkm$y*.65,type="n",xlab="",ylab="",cex=.7,ylim=c(0,.000021), main="Kernel-based discriminant analysis") polygon(c(30890,38029,38029,30890),c(0,0,.000021,.000021),col=6,border=F) polygon(c(100018,104000,104000,100018),c(0,0,.000021,.000021),col=6,border=F) lines(kkkm$x,kkkm$y*.65) lines(kkkf$x,kkkf$y*.35,lty=2) rug(mbasalry$Salary[mbasalry$Gender==0]) rug(mbasalry$Salary[mbasalry$Gender==1],side=3) mtext("Salary",side=1,line=3,cex=.7) mtext("Density",side=2,line=3,cex=.7) mbaout<-cbind(mbasalry$Gender[mbasalry$Salary<104000], mbasalry$Salary[mbasalry$Salary<104000]) malemean<-mean(mbaout[,2][mbaout[,1]==0]) femmean<-mean(mbaout[,2][mbaout[,1]==1]) malesd<-sqrt(var(mbaout[,2][mbaout[,1]==0])) femsd<-sqrt(var(mbaout[,2][mbaout[,1]==1])) plot(c(0:104)*1000,dnorm(c(0:104)*1000,mean=malemean,sd=malesd)*.65, type="n",xlab="",ylab="",cex=.7,ylim=c(0,.000023), main="Quadratic discriminant analysis, outlier omitted") polygon(c(26700,38060,38060,26700),c(0,0,.000023,.000023),col=6,border=F) lines(c(0:104)*1000,dnorm(c(0:104)*1000,mean=malemean,sd=malesd)*.65) lines(c(0:104)*1000,dnorm(c(0:104)*1000,mean=femmean,sd=femsd)*.35,lty=2) rug(mbasalry$Salary[mbasalry$Gender==0]) rug(mbasalry$Salary[mbasalry$Gender==1],side=3) mtext("Salary",side=1,line=3,cex=.7) mtext("Density",side=2,line=3,cex=.7) # Figure 7.3 # This figure was constructed using personally-written Fortran code. # Bowman and Azzalini's sm library is used below to construct a # similar display. library(sm) sm.density(cbind(mbagrade$GMAT[mbagrade$Gender==0], mbagrade$GPA[mbagrade$Gender==0]),h=c(25,.15), display="slice",pch="x",xlim=c(420,720), ylim=c(2.5,4),props=c(1:5)*20-10,labex=0, xlab="GMAT score",ylab="Grade point average") sm.density(cbind(mbagrade$GMAT[mbagrade$Gender==1], mbagrade$GPA[mbagrade$Gender==1]),h=c(45,.12), display="slice",props=c(1:5)*20-10,labex=0,add=T, xlim=c(420,720),ylim=c(2.5,4),lty=2) points(mbagrade$GMAT[mbagrade$Gender==1], mbagrade$GPA[mbagrade$Gender==1]) # Figure 7.4 # This figure was constructed using personally-written Fortran code. # Bowman and Azzalini's sm library is used below to construct a # similar display. library(sm) mden<-sm.density(cbind(mbagrade$GMAT[mbagrade$Gender==0], mbagrade$GPA[mbagrade$Gender==0]),h=c(25,.15), display="none",xlim=c(420,720),ylim=c(2.5,4)) fden<-sm.density(cbind(mbagrade$GMAT[mbagrade$Gender==1], mbagrade$GPA[mbagrade$Gender==1]),h=c(45,.12), display="none",xlim=c(420,720),ylim=c(2.5,4)) diffdens<-mden$estimate*.65-fden$estimate*.35 contour(mden$eval.points[,1],mden$eval.points[,2],diffdens, levels=c(0),labex=0,xlab="GMAT score",ylab="Grade point average",cex=.7) points(mbagrade$GMAT[mbagrade$Gender==0], mbagrade$GPA[mbagrade$Gender==0],pch="x") points(mbagrade$GMAT[mbagrade$Gender==1], mbagrade$GPA[mbagrade$Gender==1]) text(x=675,y=3.1,"FEMALE",cex=.6) text(x=475,y=2.5,"FEMALE",cex=.6) # Figure 7.5 # The bandwidth corresponds to h=4.8 using the ksmooth() scaling. plot(c(-100:700)/10,dnorm(c(-100:700)/10,mean=mean(jantemp$Jantemp), sd=sqrt(var(jantemp$Jantemp)*49/50)),type="l",ylim=c(0,.035),xlab="", ylab="",cex=.7) mtext("Normal minimum January temperature",side=1,line=2,cex=.7) mtext("Density",side=2,line=2,cex=.7) lines(ksmooth(jantemp$Jantemp,ker="n",band=12.95, range.x=c(-10,70)),lty=2) rug(jantemp$Jantemp) # Figure 7.6 # This figure was constructed using personally-written Fortran # code based on a grid search minimization. The nlminb() # function returns a slightly different answer below. kkj<-ksmooth(jantemp$Jantemp,ker="n",band=12.95,n.points=801) parj<-c(mean(jantemp$Jantemp),sqrt(var(jantemp$Jantemp)*49/50)) dsq<-function(parj,kkj){ sum((dnorm(kkj$x,mean=parj[1],sd=parj[2])^(.5)-kkj$y^(.5))^2)} parj<-nlminb(start=parj,obj=dsq,kkj=kkj)$parameters plot(c(-100:700)/10,dnorm(c(-100:700)/10,mean=parj[1],sd=parj[2]), type="l",ylim=c(0,.035),xlab="",ylab="",cex=.7) mtext("Normal minimum January temperature",side=1,line=2,cex=.7) mtext("Density",side=2,line=2,cex=.7) lines(ksmooth(jantemp$Jantemp,ker="n",band=12.95, range.x=c(-10,70)),lty=2) # Figure 7.7 accasia<-hist(airaccid$Accident,breaks=c(0:14)-.5,plot=F)$counts/17 plot(c(0:13),accasia,type="b",pch="x",xlab="",ylab="",cex=.7) mtext("Number of accidents",side=1,line=2,cex=.7) mtext("Probability",side=2,line=2,cex=.7) lines(c(0:13),dpois(c(0:13),lambda=mean(airaccid$Accident)),lty=2, pch="o",type="b") # Figure 7.8 # Wand's KernSmooth library is used here. # This figure was constructed using personally-written Fortran # code based on a grid search minimization. The nlminb() # function is used below. library(KernSmooth) dsq<-function(parj,llj){ sum((dpois(llj[,1],lambda=parj)^(.5)-llj[,2]^(.5))^2)} accasia<-hist(airaccid$Accident,breaks=c(0:14)-.5,plot=F)$counts/17 para<-mean(airaccid$Accident) llj<-cbind(c(0:13),accasia) para<-nlminb(start=para,obj=dsq,llj=llj)$parameters par(mfrow=c(2,1),mex=.5) plot(llj[,1],llj[,2],type="b",pch="x",xlab="",ylab="",ylim=c(0,.62), cex=.7,main="Using unsmoothed counts") mtext("Number of accidents",side=1,line=3,cex=.7) mtext("Probability",side=2,line=3,cex=.7) lines(c(0:13),dpois(c(0:13),lambda=para),lty=2,pch="o",type="b") llj<-locpoly(c(0:13),accasia,degree=1,band=.91,gridsize=14) llj$y[llj$y<0]<-0 llj$y<-llj$y/sum(llj$y) llj<-cbind(llj$x,llj$y) para<-mean(airaccid$Accident) para<-nlminb(start=para,obj=dsq,llj=llj)$parameters plot(c(0:13),llj[,2],type="b",pch="*",xlab="",ylab="",main= "Using smoothed counts") mtext("Number of accidents",side=1,line=3,cex=.7) mtext("Probability",side=2,line=3,cex=.7) lines(c(0:13),dpois(c(0:13),lambda=para),lty=2,pch="o",type="b")