# ----------------------------------------------------------------------------
#  pwt70_test.R  
#  Grabs Penn World Tables 7.0 
#  Data locations:  
#    http://pwt.econ.upenn.edu/php_site/pwt_index.php
#    http://pwt.econ.upenn.edu/Downloads/pwt70/pwt70_05102011version.zip
#  Data is large flat file, each row a (country, date) pair, cols are vars 
#  Variables include   
#    1=country, 2=isocode, 3=year, 4=POP,  ..
#   24=rgdpl (Laspeyres real gdp per capita), 
#   26=rgdpch (chained real gdp per capita),
#   29=ki (real investment share, based on rgdpl), 
#   31=rgdpwok (chained real gdp per worker),   
#  On reading spreadsheets (read.xls):  
#    http://cran.r-project.org/doc/manuals/R-data.html#Reading-Excel-spreadsheets
#  Comment:  we like the automation of reading the xls straight from the
#  zipped source.  But if you save as csv, input is much faster.  
#  Written by Ram Yamarthy and Espen Henriksen, September 2010 
#  Adapted by Dave Backus 
# ----------------------------------------------------------------------------
# 0. Preliminaries 

# clear workspace ("remove" everything in list generated by ls command) 
rm(list=ls()) 
# set working directory for output 
# NB: check your own directory, and make sure to use / not \ 
#setwd("c:/Documents and Settings/dbackus/My Documents/Userdata/Papers/BCH/data/PWT")
setwd("c:/Users/dbackus/My Documents/Papers/BCH/data/PWT")

# packages needed to read xls files 
# (if not installed, do so, also install perl at perl.org) 

library(gtools)
library(gdata)

# 1. Download data
# we do this in two steps:  download zip, then read xls inside it

noquote("Data download...")
# step 1:  download.file command grabs data and saves as local file
# we add date to name just in case source changes or disappears 
zipname = paste("PWT_",Sys.Date(),".zip",sep="")  # creates filename string with date

# download.file [from package utils] gets data from internet, saves as destfile 
# Espen likes to add quiet=TRUE  
download.file(url="http://pwt.econ.upenn.edu/Downloads/pwt70/pwt70_05102011version.zip", destfile=zipname, method="auto", quiet=FALSE) 

# read.xls inputs flat file of data, countries stacked in alpha order 
pwt <- read.xls(unzip(zipfile=zipname, files="pwt70_w_country_names.xls", list=FALSE), sheet=1, skip=0, header=TRUE)  

# check contents 
names(pwt)
head(pwt) 
tail(pwt) 

# save for faster access (ie, start with load in future)  
save(pwt,file="PWT.RData")

rm(list=ls())
load("PWT.RData")

# 2. Data manipulation 
# get columns we want and put into variables 
# rgdp is real gdp  
# ki is investment share of real gdp 

# select countries 
noquote("Collect countries we want...")
countries <- c("AUS","CAN","CHN","CH2","GER","FRA","ITA","JPN","NOR","GBR","USA")

subset <- {data.frame(country <- pwt$country,
                      isocode <- pwt$isocode,
                      year    <- pwt$year,
                      rgdp    <- pwt$rgdpl,
                      ki      <- pwt$ki)}

pwt.inv <- NULL
pwt.gdp <- NULL
for(i in 1:length(countries)){
  pwt.inv <- cbind(pwt.inv,subset[subset$isocode == countries[i],]$rgdp*subset[subset$isocode == countries[i],]$ki/100)
  pwt.gdp <- cbind(pwt.gdp,subset[subset$isocode == countries[i],]$rgdp)
}

noquote("Compute capital stocks...")
# depreciation rate 
delta <- 0.06

dimnames(pwt.inv)[[2]] <- countries
pwt.inv <- ts(pwt.inv,start=1950,frequency=1)

first <- mat.or.vec(length(countries),1)
for(i in 1:length(countries)){
  j <- 1
  while(is.na(pwt.inv[j,i])){
    j <- j + 1
  }
  first[i] <- j
}
rm(i,j)

g <- mat.or.vec(length(countries),1)
for(i in 1:length(countries)){
  g[i] <- (pwt.inv[22,i]/pwt.inv[first[i],i])^(1/(22-first[i]))
}

K  <- mat.or.vec(dim(pwt.inv)[1]+1,dim(pwt.inv)[2])*NaN
KY <- mat.or.vec(dim(pwt.inv)[1],dim(pwt.inv)[2])*NaN
for(i in 1:dim(pwt.inv)[2]){
  for(j in first[i]:dim(pwt.inv)[1]){
    if(j == first[i]){
      K[j,i] <- pwt.inv[j,i]/((g[i]-1)+delta)
    }
    K[j+1,i] = (1-delta)*K[j,i] + pwt.inv[j,i]
    if(j <= dim(pwt.inv)[1]){
      KY[j,i] <- K[j,i]/pwt.gdp[j,i]
    }
  }
}

KY <- ts(KY,start=1950,frequency=1)

# 3. Plots 

noquote("Plot capital-output ratios...")
ts.plot(KY,gpars=list(xlab="Year",ylab="Capital-to-output ratio",ylim=c(0,6),col=c(1:ncol(KY)),lwd = 1.75,axes=F))
axis(1, labels=dimnames(KY)[[1]], las=1)
axis(2)
legend("topleft",legend = countries,cex=.8,ncol=5,lwd=2.0,col=c(1:ncol(KY)))
mtext("Data source:Penn World Tables",side=4,line=-.2,cex=.4,adj=0)
dev.print(device=pdf, file="pwtKYratios.pdf")