#!/usr/bin/python2.2

# jsu.py - JS Usage Meter reader
# Version 0.9.3
#
# Copyright (C) 2003 Alex King
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.

# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA


from HTMLParser import HTMLParser
import urllib,urllib2,Cookie,urlparse,cPickle,string,re

class ceurl(Cookie.BaseCookie):
	"""URL Retriever "Cookie Enhanced" """

	def __init__(self, cd=None):
		if cd:
			self.data=cPickle.loads(cd)
		else:
			self.data={}

	def geturl(self,url,data=None):
		ro=urllib2.Request(url)
		if data:
			pairs=[]
			for key in data.keys():
				try:
					pairs.append((data[key]["name"],data[key]["value"]))
				except:
					pass
			ro.add_data(urllib.urlencode(pairs))
		if self.data.has_key(ro.get_host()):
			ro.add_header("Cookie",self.data[ro.get_host()].output(attrs=[], header=""))
		rv=urllib2.urlopen(ro)
		urlpart=urlparse.urlparse(rv.geturl())
		host=urlpart[1]
		for item in rv.info().getallmatchingheaders("Set-Cookie"):
			if not self.data.has_key(host):
				self.data[host]=Cookie.BaseCookie()
			self.data[host].load(item)
		return rv
	
	def savecookies(self):
		return cPickle.dumps(self.data)

class parseform(HTMLParser):
	"""Parse html pages for a specified form, and save the input elements"""
	def __init__(self,f,frmtag):
		HTMLParser.__init__(self)
		self.pf=0
		self.inputs={}
		self.frmtag=frmtag
		while 1:
			data=f.read()
			if not data:
				break
			#hack to fix a stupid html error on Telecom's site:
			offset=data.find("'topmargin")
			if offset!=-1:
				data=data[:offset+1]+" "+data[offset+1:]
			try:
				self.feed(data)
			except "StopProcessing":
				break

	def dattrs(self,attrs):
		rv={}
		for item in attrs:
			try:
				rv[item[0]]=item[1]
			except:
				pass
		return rv

	def handle_starttag(self, tag, attrs):
		if tag=="form":
			d=self.dattrs(attrs)
			if d.has_key("name") and d["name"]==self.frmtag:
				self.pf=1
				self.nexturl=d["action"]
		elif tag=="input" and self.pf:
			d=self.dattrs(attrs)
			if d.has_key("name"):
				self.inputs[d["name"]]=d

	def handle_endtag(self, tag):
		if tag=="form":
			if self.pf==1:
				raise "StopProcessing"
				
class parseusage(HTMLParser):
	def __init__(self,f):
		HTMLParser.__init__(self)
		self.td=0
		self.th=0
		self.total=""
		self.heading=""
		self.reflects=""
		self.previous=None
		self.repperiod=re.compile("\d{1,2} [A-Z][a-z]{2} 20\d{2} -  ?\d{1,2} [A-Z][a-z]{2} 20\d{2}")
		self.repusage=re.compile("\d+[.]\d{2}")
		self.feed(f.read())

	def parse_starttag(self,i):
		"""Ignore font start tags

		Multiple attribute values in the font tags are crashing
		the parser, so simply ignore them"""
		if self.rawdata[i+1:i+5]=="font":
			return string.find(self.rawdata,">",i)+1
		return HTMLParser.parse_starttag(self,i)

	def handle_starttag(self,tag,attrs):
		if tag=="td":
			self.td=1
		elif tag=="th":
			self.th=1
		elif tag=="h2" and self.heading=="":
			self.heading="Looking"
		elif tag=="tr":
			self.pperiod=None
			self.pusage=None

	def handle_endtag(self,tag):
		if tag=="td":
			self.td=0
		elif tag=="th":
			self.th=0
		elif tag=="tr" and self.pperiod and self.pusage:
			self.previous.append((self.pperiod.group(),self.pusage.group()))

	def handle_data(self,data):
		if self.heading=="Looking":
			self.heading=data
		elif self.td==1:
			if self.total=="" and data=="Total:":
				self.total="Looking"
			elif self.total=="Looking":
				x=re.search("(\d+[.]?\d+)\s+(\w+)",data)
				self.total="%s %s" % (x.group(1),x.group(2))
			elif self.reflects=="" and data[:10]=="This Meter":
				self.reflects=data
			elif self.previous != None:
				if not self.pperiod:
					self.pperiod=self.repperiod.match(data)
				if not self.pusage:
					self.pusage=self.repusage.search(data)
		elif self.th==1:
			if data=="Previous Usage":
				self.previous=[]


def main(username,password):
	try:
		cs=open("Cookies").read()
	except:
		cs=None
	ro=ceurl(cs)
	# Get a "Read Object", with saved cookies if available
	x=ro.geturl("http://www.telecom.co.nz/chm/0,5123,200343-202546,00.html")
	# Use it to get Telecom's usage meter logon page.
	f=parseform(x,"frmMain")
	# Parse the returned page for the logon form.
	for key in ['IAF_LogonUserID','IAF_userID']:
		f.inputs[key]["value"]=username
	f.inputs['IAF_LogonPassword']["value"]=password
	x=ro.geturl(f.nexturl, f.inputs)
	# Log on and get the next page which has the a link to the usage meter.
	f=parseform(x,"ViewCDR")
	# Parse the returned page for a the link which is again a form
	x=ro.geturl(urlparse.urljoin(x.geturl(),f.nexturl), f.inputs)
	# Get the usage page
	try:
		open("Cookies","w").write(ro.savecookies())
	except:
		pass
	# and save our cookies, if possible
	return parseusage(x)
	# parse the page and return the results

if __name__=="__main__":
	import sys
	if len(sys.argv)==3:
		d=main(sys.argv[1],sys.argv[2])
		print d.total
		print d.heading
		print d.reflects
		for item in d.previous:
			print item[0],item[1]
	#print out the results
	else:
		print """Usage: %s username password

Telecom Jetstream Usage Meter Retrival program

This program prints out JetStream/JetStart Usage stats from the web page,
and saves some data to the file "Cookies"
""" % sys.argv[0]
