data_dir = "data"

# Download and unzip data files
download_ameco_data <- function(directory) {
	zip_url = "http://ec.europa.eu/economy_finance/db_indicators/ameco/documents/ameco0.zip"
	download.file(zip_url, destfile=basename(zip_url), method="auto")
	unzip(basename(zip_url), exdir=directory)
}

# Read everything into one big data frame
import_ameco_data <- function(directory) {
	data <- do.call(
		rbind,
		lapply(
			as.list(Sys.glob(paste(directory, "AMECO*.TXT", sep="/"))),
			read.csv2
		)
	)
	# remove leading "X" on year columns
	colnames(data) <- sapply(
		colnames(data),
		function(name) { sub("^X", "", name) }
	)
	# get rid of trailing garbage column
	data <- data[,-ncol(data)]
}

# Creates a table of code-name pairs from the first two columns of the dataset
country_index <- function(dataset) {
	unique(
		cbind(
			lapply(
				strsplit(as.character(dataset$CODE), ".", fixed=TRUE),
				function(code) { code[1] }
			),
			as.character(dataset$COUNTRY)
		)
	)
}

# Same as above, but for the variable codes
variable_index <- function(dataset) {
	unique(
		cbind(
			lapply(
				strsplit(as.character(dataset$CODE), ".", fixed=TRUE),
				function(code) { code[length(code)] }
			),
			as.character(dataset$SUB.CHAPTER),
			as.character(dataset$TITLE)
		)
	)
}

# Writes an index created by one of the above functions to a tab-separated
# text file, for easy visual inspection.
dump_index <- function(idx, filename) {
	write.table(
		idx,
		filename,
		sep="\t",
		row.names=FALSE,
		col.names=FALSE
	)
}

# Takes a vector of coutry codes (see generate_country_index above) and
# returns a subset of the given dataframa containing only those countries
subset_by_country <- function(dataset, codes) {
	dataset[
		# select rows that match a code
		grep(
			paste(
				lapply(
					codes,
					function(code) { paste("^", code,"[.]", sep="") }
				),
				collapse="|"
			),
			dataset$CODE
		),
		TRUE # select all columns
	]
}

# Takes a vector of variable codes (see genrate_variable_index) and
# returns a subset of the given dataframe containing only those variables
subset_by_variable <- function(dataset, codes) {
	dataset[
		# select rows that match a code
		grep(
			paste(
				lapply(
					codes,
					function(code) { paste("([^.]*[.]){5}", code, "$", sep="") }
				),
				collapse="|"
			),
			dataset$CODE
		),
		TRUE # select all columns
	]
}

# Create a multivariate timeseries object from an AMECO data frame (or subset)
# Uses the "CODE" field from the database for the column names
ameco_to_ts <- function(dataset) {
	ts_data <- ts(
		t(dataset[,6:ncol(dataset)]), # ts expects data in columns
		start=as.numeric(colnames(dataset)[6]),
		end=as.numeric(colnames(dataset)[ncol(dataset)]),
		frequency=1
	)
	colnames(ts_data) <- as.character(dataset$CODE)
	return(ts_data)
}

# Calculate an index of a time series relative to a given base year
# This will clobber everything with NAs if the apply returns a matrix,
# so we need to copy over the time-series metadata manually
index_ts <- function(ts_data, base) {
	ts(
		apply(
			ts_data,
			2, # iterate over columns (1 for rows)
			function(col) {
				if(is.na(col[base - start(ts_data)[1] + 1])) {
					# division by NA is NA
					warning("Base year observation missing (NA); data will be clobbered")
				}
				as.numeric(col)/as.numeric(col[base - start(ts_data)[1] + 1])
			}
		),
		start=start(ts_data)[1],
		end=end(ts_data)[1],
		frequency=frequency(ts_data)
	)
}

# Create a plot of a single variable across multiple countries
plot_variable <- function(dataset, v_code, c_codes) {
	# Choose colors for the lines at random
	# This was the easiest way I could think of to generate an arbitrary
	# number of colors procedurally--feel free to replace with something
	# less likely to show up ugly
	line_colors <- sample(colors(), length(c_codes))

	plot(
		ameco_to_ts(
			subset_by_country(
				subset_by_variable(dataset, v_code),
				c_codes
			)
		),
		plot.type='single',
		col=line_colors,
		ylab=v_code
	)
	legend("topright", fill=line_colors, legend=c_codes)
}