data_dir = "data" # Download and unzip data files download_ameco_data <- function(directory) { zip_url = "http://ec.europa.eu/economy_finance/db_indicators/ameco/documents/ameco0.zip" download.file(zip_url, destfile=basename(zip_url), method="auto") unzip(basename(zip_url), exdir=directory) } # Read everything into one big data frame import_ameco_data <- function(directory) { data <- do.call( rbind, lapply( as.list(Sys.glob(paste(directory, "AMECO*.TXT", sep="/"))), read.csv2 ) ) # remove leading "X" on year columns colnames(data) <- sapply( colnames(data), function(name) { sub("^X", "", name) } ) # get rid of trailing garbage column data <- data[,-ncol(data)] } # Creates a table of code-name pairs from the first two columns of the dataset country_index <- function(dataset) { unique( cbind( lapply( strsplit(as.character(dataset$CODE), ".", fixed=TRUE), function(code) { code[1] } ), as.character(dataset$COUNTRY) ) ) } # Same as above, but for the variable codes variable_index <- function(dataset) { unique( cbind( lapply( strsplit(as.character(dataset$CODE), ".", fixed=TRUE), function(code) { code[length(code)] } ), as.character(dataset$SUB.CHAPTER), as.character(dataset$TITLE) ) ) } # Writes an index created by one of the above functions to a tab-separated # text file, for easy visual inspection. dump_index <- function(idx, filename) { write.table( idx, filename, sep="\t", row.names=FALSE, col.names=FALSE ) } # Takes a vector of coutry codes (see generate_country_index above) and # returns a subset of the given dataframa containing only those countries subset_by_country <- function(dataset, codes) { dataset[ # select rows that match a code grep( paste( lapply( codes, function(code) { paste("^", code,"[.]", sep="") } ), collapse="|" ), dataset$CODE ), TRUE # select all columns ] } # Takes a vector of variable codes (see genrate_variable_index) and # returns a subset of the given dataframe containing only those variables subset_by_variable <- function(dataset, codes) { dataset[ # select rows that match a code grep( paste( lapply( codes, function(code) { paste("([^.]*[.]){5}", code, "$", sep="") } ), collapse="|" ), dataset$CODE ), TRUE # select all columns ] } # Create a multivariate timeseries object from an AMECO data frame (or subset) # Uses the "CODE" field from the database for the column names ameco_to_ts <- function(dataset) { ts_data <- ts( t(dataset[,6:ncol(dataset)]), # ts expects data in columns start=as.numeric(colnames(dataset)[6]), end=as.numeric(colnames(dataset)[ncol(dataset)]), frequency=1 ) colnames(ts_data) <- as.character(dataset$CODE) return(ts_data) } # Calculate an index of a time series relative to a given base year # This will clobber everything with NAs if the apply returns a matrix, # so we need to copy over the time-series metadata manually index_ts <- function(ts_data, base) { ts( apply( ts_data, 2, # iterate over columns (1 for rows) function(col) { if(is.na(col[base - start(ts_data)[1] + 1])) { # division by NA is NA warning("Base year observation missing (NA); data will be clobbered") } as.numeric(col)/as.numeric(col[base - start(ts_data)[1] + 1]) } ), start=start(ts_data)[1], end=end(ts_data)[1], frequency=frequency(ts_data) ) } # Create a plot of a single variable across multiple countries plot_variable <- function(dataset, v_code, c_codes) { # Choose colors for the lines at random # This was the easiest way I could think of to generate an arbitrary # number of colors procedurally--feel free to replace with something # less likely to show up ugly line_colors <- sample(colors(), length(c_codes)) plot( ameco_to_ts( subset_by_country( subset_by_variable(dataset, v_code), c_codes ) ), plot.type='single', col=line_colors, ylab=v_code ) legend("topright", fill=line_colors, legend=c_codes) }