data_dir <- "data" download_bl_data <- function(directory) { csv_urls = c( "http://www.barrolee.com/data/BL_v1.2/BL(2010)_MF2599_v1.2.csv", "http://www.barrolee.com/data/BL_v1.2/BL(2010)_MF1599_v1.2.csv" ) for (url in csv_urls) { download.file(url, paste(directory, basename(url), sep="/"), method="auto") } } import_bl15_data <- function(directory) { read.csv(paste(directory, "BL(2010)_MF1599_v1.2.csv", sep="/")) } import_bl25_data <- function(directory) { read.csv(paste(directory, "BL(2010)_MF2599_v1.2.csv", sep="/")) } # Fill in missing values by linear interpolation interp_nas <- function(x) { pos=which(!is.na(x)) if (length(pos) < 2) { warning("Not enough data to interpolate.") return(x) } for (i in 1:(length(pos) - 1)) { x[pos[i]:pos[i+1]] <- approx( c(x[pos[i]], x[pos[i+1]]), n=(pos[i+1] - pos[i] + 1) )$y } return(x) } # Adds the specified column from edudata to pwtdata and fills in missing values add_education_column <- function(pwtdata, edudata, colname="yr_sch") { # subset to year, country code, and the column we want to merge edudata <- subset(edudata, select = c("year", colname, "WBcode")) # merge data frames combined <- merge( pwtdata, edudata, by.x = c("year", "isocode"), by.y = c("year", "WBcode"), all.x = TRUE # include all years in the result ) # interpolate missing values combined <- do.call( rbind, lapply( split(combined, combined$isocode), function(cdata) { print(cdata$yr_sch) cdata$yr_sch <- interp_nas(cdata$yr_sch) cdata } ) ) return(combined) }