#!/usr/bin/env python3

# edumunds2csv.py
# -John Taylor
# May-26-2016

# This script takes Edmunds monthly "$199 Lease Deals" and converts it to a CSV file which
# is then suitable to use in a spreasdsheet such as Microsoft Excel, Apple Numbers or Google Sheets.
# You will then be able to sort data by different columns.

# Start from: http://www.edmunds.com/car-leasing/monthly-199-lease-deals.html
# Download the PDF that is linked from this page
# Example: http://static.ed.edmunds-media.com/unversioned/img/pdf/lease.deals/199.lease.deals.may.2016.pdf

# PDF to Text
# ------------
# Download: https://pdfbox.apache.org/download.cgi
# java -jar pdfbox-app-2.0.1.jar ExtractText 199.lease.deals.may.2016.pdf 199.txt
#

# Text to CSV
# -----------
# Finally, this script takes the java program's output, 199.txt and subsequently uses it as input (this Python script)
# It will output a csv file
# Example: python edumunds2csv.py > 199.csv

import re, sys

# the filename generated by the pdfbox-app jar program
fname="199.txt"

# set to 1 if the fname file includes a header line (with column fields), otherwise set it to 0
header_line = 1

#############################################################################################################################################################################

#                       make   model,        down             other       months      miles per          lease          total         cost per      each add'l      add'l
#                              price/mon     payment          charges                  months             cost          miles           miles       mile cost       terms
entry_re = re.compile("(.*?) ([0-9].*?\$) .*?([-0-9].*?\$) .*?([#0-9]+) ([0-9][0-9]) ([0-9]{1,4}) .*? ([0-9].*?\$) .*? ([0-9]{5}) .*? (\$[\.0-9]+) ([\.0-9]+\$) .*? (.*)")

#                              model
#                        year  trim     price
model_re = re.compile("(\d{4}) (.*?) ([\.0-9]+\$)")


def process_entry(entry):
	make, year_model_price, initial_payment, other_charges, months, miles_per_month, lease_cost, total_miles, cost_per_mile, each_addl_mile, addl_terms = entry

	match = model_re.findall(year_model_price)
	if not match:
		print(); print("error with:", year_model_price); print(); sys.exit(1)

	year, model, price = match[0]

	price = price.replace("$","").replace(".00","")
	initial_payment = initial_payment.replace("$","").replace(".00","").replace("-", "0")
	lease_cost = lease_cost.replace("$","").replace(".00","")
	cost_per_mile = cost_per_mile.replace("$","")
	each_addl_mile = each_addl_mile.replace("$","")
	addl_terms = addl_terms.strip()
	if not len(addl_terms): addl_terms = "No additional terms"

	#print(make, year, model, price)
	#print(initial_payment, other_charges, months, miles_per_month)
	#print(lease_cost, total_miles, cost_per_mile, each_addl_mile)
	#print(addl_terms)

	return ( make, year, model, price, initial_payment, other_charges, months, miles_per_month, lease_cost, total_miles, cost_per_mile, each_addl_mile, addl_terms )


def make_csv(entry):
	return ",".join(entry)


def main():
	global header_line
	with open(fname) as fp: lines = fp.read().splitlines()
	
	print("make, year, model, price, initial payment, other charges, months, miles per month, lease cost, total miles, cost per mile, each addl mile, addl terms")

	for line in lines:
		# skip the first line as it may be the header
		if header_line:
			header_line = 0
			continue
		if len(line) <= 60: continue
		line = line.replace(",","")

		match = entry_re.findall(line)
		if not match:
			print(); print("error with:\n", line); sys.exit(1)

		current = process_entry(match[0])
		csv = make_csv( current )
		print(csv)

	return 0


if __name__ == '__main__':
	sys.exit( main() )