data_dir = "data"

# Download data and unzip
download_pwt_data <- function(directory) {
	zip_url = "http://pwt.econ.upenn.edu/Downloads/pwt71/pwt71_07262012version.zip"
	download.file(zip_url, basename(zip_url), method="auto")
	unzip(basename(zip_url), exdir=directory)
}

# Read the csv into a data frame
import_pwt_data <- function(directory) {
	read.csv(paste(directory, "pwt71_w_country_names.csv", sep="/"))
}

# Create a table of countries and their iso codes from the first two columns
# of the data frame
country_index <- function(dataset) {
	unique(cbind(as.character(dataset$country), as.character(dataset$isocode)))
}

# From here you can just use 'subset' to get specifc countries and variables.
# This code gives the same subset used in Espen's code (pwt_ky_ratios_db.R)
#subset(pwt_dataframe,
#	isocode %in% c("CHN", "JPN", "IND", "USA"), # countries (rows)
#	select=c("country", "isocode", "year", "rdpl", "ki") # variables (columns)
#)

# Compute capital for one or more countries
# Algorithm adapted from STATA code by Gian Luca Clementi
capital <- function(dataset, code, depreciation=0.06) {
	# subset data by country code and remove years with missing data
	cdata <- subset(dataset, isocode==code)
	cdata <- cdata[!is.na(cdata$rgdpch) & !is.na(cdata$POP) & !is.na(cdata$ki),]

	# compute investment
	investment <- cdata$rgdpch * cdata$ki / 100

	# compute growth rates
	growth_y <- with(cdata, (log(rgdpch[10]) - log(rgdpch[1]))/10)
	growth_pop <- with(cdata, (log(POP[10]) - log(POP[1]))/10)
	
	# compute initial capital (k0)
	init_capital <- investment[1]/(exp(growth_y + growth_pop) - 1 + depreciation)
	
	# compute capital series according to the recurrence relation
	capital <- Reduce(
		function(k,t) {
			c(k, (1-depreciation)*k[t-1] + investment[t-1])
		},
		2:length(investment),
		init_capital
	)

	# return table with data and year, for merging back into full dataset
	series <- cbind(capital, cdata$year)
	colnames(series) <- c("capital","year")
	return(series)
}

# Adds a new column to the pwt dataframe containing capital numbers as
# by the above function.
add_capital_column <- function(dataset) {
	# Calculate capital for each country
	capital_series <- Reduce(
		function(result, code) {
			result[[code]] <- capital(dataset, code)
			return(result)
		},
		as.character(country_index(dataset)[,2]), # country codes
		list()
	)

	# Initialize column with NAs
	dataset$capital <- NA

	# Fill in the new column, row by row
	for (rownum in 1:nrow(dataset)) {
		code <- as.character(dataset$isocode[rownum])
		year <- dataset$year[rownum]
		# For each row, find the capital number that belongs in it:
		#   First, index by country
		#   Next, find the row with the matching year...
		#   ...and take the observation from the capital column
		capital_obs <- capital_series[[code]][
			capital_series[[code]][,'year'] == year, 
			'capital'
		]
		# Convert NULLs to NAs
		dataset$capital[rownum] <- ifelse(is.null(capital_obs), NA, capital_obs)
	}
	return(dataset)
}