data_dir = "data" # Download data and unzip download_pwt_data <- function(directory) { zip_url = "http://pwt.econ.upenn.edu/Downloads/pwt71/pwt71_07262012version.zip" download.file(zip_url, basename(zip_url), method="auto") unzip(basename(zip_url), exdir=directory) } # Read the csv into a data frame import_pwt_data <- function(directory) { read.csv(paste(directory, "pwt71_w_country_names.csv", sep="/")) } # Create a table of countries and their iso codes from the first two columns # of the data frame country_index <- function(dataset) { unique(cbind(as.character(dataset$country), as.character(dataset$isocode))) } # From here you can just use 'subset' to get specifc countries and variables. # This code gives the same subset used in Espen's code (pwt_ky_ratios_db.R) #subset(pwt_dataframe, # isocode %in% c("CHN", "JPN", "IND", "USA"), # countries (rows) # select=c("country", "isocode", "year", "rdpl", "ki") # variables (columns) #) # Compute capital for one or more countries # Algorithm adapted from STATA code by Gian Luca Clementi capital <- function(dataset, code, depreciation=0.06) { # subset data by country code and remove years with missing data cdata <- subset(dataset, isocode==code) cdata <- cdata[!is.na(cdata$rgdpch) & !is.na(cdata$POP) & !is.na(cdata$ki),] # compute investment investment <- cdata$rgdpch * cdata$ki / 100 # compute growth rates growth_y <- with(cdata, (log(rgdpch[10]) - log(rgdpch[1]))/10) growth_pop <- with(cdata, (log(POP[10]) - log(POP[1]))/10) # compute initial capital (k0) init_capital <- investment[1]/(exp(growth_y + growth_pop) - 1 + depreciation) # compute capital series according to the recurrence relation capital <- Reduce( function(k,t) { c(k, (1-depreciation)*k[t-1] + investment[t-1]) }, 2:length(investment), init_capital ) # return table with data and year, for merging back into full dataset series <- cbind(capital, cdata$year) colnames(series) <- c("capital","year") return(series) } # Adds a new column to the pwt dataframe containing capital numbers as # by the above function. add_capital_column <- function(dataset) { # Calculate capital for each country capital_series <- Reduce( function(result, code) { result[[code]] <- capital(dataset, code) return(result) }, as.character(country_index(dataset)[,2]), # country codes list() ) # Initialize column with NAs dataset$capital <- NA # Fill in the new column, row by row for (rownum in 1:nrow(dataset)) { code <- as.character(dataset$isocode[rownum]) year <- dataset$year[rownum] # For each row, find the capital number that belongs in it: # First, index by country # Next, find the row with the matching year... # ...and take the observation from the capital column capital_obs <- capital_series[[code]][ capital_series[[code]][,'year'] == year, 'capital' ] # Convert NULLs to NAs dataset$capital[rownum] <- ifelse(is.null(capital_obs), NA, capital_obs) } return(dataset) }