# This can (in theory) be changed to point to other datasets without breaking anything
data.url = "http://mba.tuck.dartmouth.edu/pages/faculty/ken.french/ftp/F-F_Research_Data_Factors.zip"

# Some of this code is formatted using LISP-style indentation, with function arguments
# stacked vertically instead of listed horizontally. For example, in LISP stlye, this:
#   ifelse(x%%2 == 0, "even", "odd")
# would be rendered like this:
#   ifelse(
#     x%%2 == 0,
#     "even",
#     "odd"
#   )

# Insert today's date into the filename
get.zipname = function (url) {
	paste(
		# Separate name from .zip extension
		strsplit(
			# Extract filename from URL
			rev(
				# Split the url into /-delimited components
				strsplit(data.url, "/", fixed=TRUE)[[1]]
			)[1],
			".",
			fixed=TRUE
		)[[1]][1],
		"_",
		Sys.Date(),
		".zip",
		sep=""
	)
}

zipname = get.zipname(data.url)

# Download the file and save it with a datestamp
download.file(data.url, zipname)

# Splits x into sub-vectors at the indices contained in idxs
split.at = function(x, idxs) {
	# sapply essentially functions as a for loop here, iterating
	# the variable "n" over the given range
	sapply(
		0:length(idxs), # number of sub-vectors == length(idxs) + 1 == size of range
		function(n) {
			start = ifelse(
				n==0, 
				1, 
				idxs[n] + 1)
			end = ifelse(
				n==length(idxs), 
				length(x), 
				idxs[n+1] - 1) # Exclude splitting elements (like strsplit)
			
			x[start:end]
		}
	)
}

# Iterative version of the above function, for reference
#split.at = function(x, idxs) {
#	parts = list()
#	for n in (0:length(idxs)) {
#		start = ifelse(n==0, 1, idxs[n] + 1)
#		end = ifelse(n==length(idxs), length(x), idxs[n+1] - 1) # Exclude delimiting elements
#		parts[[n + 1]] = x[start:end]
#	}
#}

# Extract the zipfile
files = unzip(zipname)

# Split each file into individual tables and writes each one to its own file
for (f in files) {
	text = readLines(f)
	parts = split.at(text, grep("^$", text)) # divide the file on blank lines
	print(sprintf("Writing %d parts for input file %s", length(parts), f))
	for (pn in (1:length(parts))) {
		writeLines(parts[[pn]], paste(f, ".part", pn, sep=""))
	}
}

# Functional (as opposed to procedural) version of above code, with the additional feature
# that the filenames of the resulting parts are returned as a matrix
#outfiles = sapply(
#	files,
#	function(f) {
#		text = readLines(f)
#		parts = split.at(text, grep("^$", text)) # divide the file on blank lines
#		print(sprintf("Writing %d parts for input file %s", length(parts), f))
#		ofs = sapply(
#			1:length(parts),
#			function(pn) {
#				of = paste(f, ".part", pn, sep="")
#				writeLines(parts[[pn]], of)
#				of
#			}
#		)
#		ofs
#	}
#)