# ------------------------------------------------------------------------------
#  UN_LE_test.R  
#  Grabs UN's life expectancy data 
#  See:  http://esa.un.org/unpd/wpp2008/
#  Data is large flat file, each row a country or region 
#  Written by Espen Henriksen, September 2010 
#  Adapted by Dave Backus 
# ------------------------------------------------------------------------------
# 0. Preliminaries 

# clear workspace ("remove" everything in list generated by ls command) 
rm(list=ls()) 

# set working directory for output 
# NB: check your own directory, and make sure to use / not \ 
setwd("c:/Documents and Settings/dbackus/My Documents/Userdata/Papers/BCH/data/UNPop")

# load libraries of apps (must be installed -- and the second needs perl, too)
library(gtools)
library(gdata)

# 1.  Download data 
lifeexp.historic   <- data.frame(read.xls("http://esa.un.org/unpd/wpp2008/xls_2008/DB01_Period_Indicators/WPP2008_DB1_F05_1_LIFE_EXPECTANCY_0_BOTH_SEXES.XLS",sheet=1,skip=10))
lifeexp.projection <- data.frame(read.xls("http://esa.un.org/unpd/wpp2008/xls_2008/DB01_Period_Indicators/WPP2008_DB1_F05_1_LIFE_EXPECTANCY_0_BOTH_SEXES.XLS",sheet=2,skip=10))

head(lifeexp.historic)
head(lifeexp.projection)


# DOWNLOAD AND INITIALIZE COUNTRY CODES, COUNTRY NAMES, AND PERIOD NAMES
country.codes <- read.csv("countrycodes.csv",header=F)
dimnames(country.codes)[[2]] <- c("name","code")

period.names <- {c("1950-1955","1955-1960","1960-1965","1965-1970",
                   "1970-1975","1975-1980","1980-1985","1985-1990",
                   "1990-1995","1995-2000","2000-2005","2005-2010",
                   "2010-2015","2015-2020","2020-2025","2025-2030",
                   "2030-2035","2035-2040","2040-2045","2045-2050")}


# FORMAT AND MERGE DOWNLOADED DATA
hist <- (lifeexp.historic[lifeexp.historic$Country.code ==  country.codes[1,2],][,3:17])
for(i in 1 : (nrow(country.codes)-1)){
  hist <- rbind(hist,lifeexp.historic[lifeexp.historic$Country.code ==  country.codes[i+1,2],][,3:17])
}
rm(i)
hist[,3] <- NULL
hist[,2] <- NULL
hist[,1] <- NULL

proj <- (lifeexp.projection[lifeexp.projection$Country.code == country.codes[1,2],][,3:13])
for(i in 1 : (nrow(country.codes)-1)){
  proj <- rbind(proj,lifeexp.projection[lifeexp.projection$Country.code ==  country.codes[i+1,2],][,3:13])
}
rm(i)
proj[,3] <- NULL
proj[,2] <- NULL
proj[,1] <- NULL

dataset <- cbind(hist,proj)
rm(lifeexp.historic,lifeexp.projection,hist,proj)

dimnames(dataset)[[2]] <- period.names
dimnames(dataset)[[1]] <- country.codes[,1]

# PLOT DENSITITES OF LIFE EXPECTANCIES

plot(density(dataset[,1]),col=c(1),lwd = 2,ylim = c(0,0.055),xlim=c(0,100),xlab="",ylab="Density of life expectancies across all countries",main="",axes=F)
for(i in 1 : 6){
  lines(density(dataset[,(2*i + 1)]),lwd = 2,col=c(i+1))
}
axis(1)
axis(2)

legend("topleft",legend = c("1950-1955","1960-1965","1970-1975","1980-1985","1990-1995","2000-2005","2010-2015"),cex=.9,lwd=2.0,col=c(1:7))
mtext("Data source: UN World Population Prospects, 2008 rev.",side=4,line=-.2,cex=.6,adj=0)
mtext("Backus, Cooley, and Henriksen (2010)",side=4,line=-.7,cex=.6,adj=0)
dev.print(device=pdf, file="LifeExpDensity.pdf")