UWA Minutes Aggregator

From UWA
Jump to: navigation, search

The python script below is used to create the Teleconference Records. To recreate what is shown there, download the code below, save it as a "minuteripper.py" and then run:

python minuteripper.py uwawg 3 162

The output will be stored in "uwawg.wiki", the contents of which can then be put on the wiki.

NOTE: You must have python with the Beautiful Soup package installed in order for this to work. Also NOTE: this is a complete hack and my first python script ever. Be forewarned.

"""
This program is my first python script, it's a quick hack to generate
a table of the UWA weekly minutes.  It requires the BeautifulSoup
package, but everything else should be builtin I think.

My apologies in advance.

-M

"""
import datetime
import calendar
import urllib2
import os
import re
import sys
import getpass

from BeautifulSoup import BeautifulSoup

# set these two variables if you're sick of typing your username
# and password over and over

if len(sys.argv) != 4:
	print "arguments: <group short name> <0-6 day of week 0 being Monday> <num weeks back>"
	sys.exit(2)

shortname = sys.argv[1]
mtgday = int(sys.argv[2])
weeksback = int(sys.argv[3])

if (mtgday < 0 or mtgday >6):
	print "Error: second argument is day of week 0-6, 0 being Monday:" + mtgday + " is not valid"
	sys.exit(3)

if (weeksback < 0 or weeksback > 162):
	print "Error: weeks to go back must be less than 3 years (162) and greater than 0"
	sys.exit(3)


username = os.getenv("MINUTES_USER")
password = os.getenv("MINUTES_PASS")

if username == None:
	username=raw_input("Please enter your w3c username: ")
if password == None:
	password=getpass.getpass("Please enter your w3c password: ")

print "Using username: " + username

spaces = re.compile("\s\s+")

def findNextDD(dt):
	if not dt:
		return ""
	while (dt.nextSibling != None):
		dt=dt.nextSibling
		if dt.__class__.__name__ != "NavigableString":
			if dt.name == "dd":
				if len(dt.contents) > 0:
					return dt.contents[0]
				return ""
			if dt.name == "dt":
				return ""
		
	return ""


# day of the week we meet on:


import urllib2
# from: http://docs.python.org/library/urllib2.html#examples
# Create an OpenerDirector with support for Basic HTTP Authentication...
auth_handler = urllib2.HTTPBasicAuthHandler()
auth_handler.add_password(realm='W3CACL',
                          uri='http://www.w3.org',
                          user=username,
                          passwd=password)
opener = urllib2.build_opener(auth_handler)

# ...and install it globally so it can be used with urlopen.
urllib2.install_opener(opener)

# started this code from:
#   http://www.daniweb.com/code/snippet236.html

today = datetime.date.today()
oneday = datetime.timedelta(days=1)
oneweek = datetime.timedelta(days=7)

# list of our 404's we don't do anything with 'm
fourOfours = []

#list of all unique participants, don't do anything with them either.
participants = []
minfile = open(shortname+".wiki","w")
# I started this as writing html, should be possible to make it that
# way again, as I commented it all out
#minfile.write("<table border='1'>")
try:
	os.mkdir("minutes_cache")
except OSError:
	# hm, I can't figure out why as doesn't work,let's just pass since
	# this is likely just an errno 17.
	pass
#minfile.write('<tr><td>Date</td><td>Scribe</td><td>Topics</td></tr>\n')
minfile.write("""
= Minutes =
{|
! Minutes !! Scribe !! Topics and Resolutions
""")
# find last day that there would have been a meeting starting from today and looping backwards
nextmtg = today
while nextmtg.weekday() != mtgday:
    nextmtg -= oneday

oneweek = datetime.timedelta(days=7)

week = 0
lastYearProcessed = 0
style=""

while week < weeksback:
	mtg = nextmtg - (oneweek*week)
	week +=1
	# format: http://www.w3.org/2007/06/14-uwawg-minutes.html
	url = "http://www.w3.org/" + mtg.strftime("%Y/%m/%d") + "-"+shortname+"-minutes.html"
	filename = "minutes_cache/" + mtg.strftime("%Y-%m-%d") + "-"+shortname+"-minutes.html"
	if not os.path.isfile(filename):
		print "No cache for "+filename +" fetching: " + url
		try: f = urllib2.urlopen(url)
		except urllib2.URLError, e:
			if e.code == 404:
				fourOfours.append(url)
				FILE = open(filename,"w")
				FILE.writelines("no minutes :(")
				FILE.close
				
			else:
				print  url, "failed due, HTTP code: ", e.code
			continue
	
		FILE = open(filename,"w")
		FILE.writelines(f.read())
		FILE.close

	print "Using cache: ", filename

	if mtg.year != lastYearProcessed:
		lastYearProcessed = mtg.year
		line='|-\n! id="min_'+str(mtg.year)+'" colspan="3" style="background: #e2edfe; color: #005a9c;" | Minutes for ' + str(mtg.year) +"\n"
		
		minfile.write(line.encode("utf-8"))
		
	FILE = open(filename,"r")
	soup = BeautifulSoup(FILE.read())
	FILE.close
	try:
		intro = soup.find("div",{"class":"intro"}).dl
	except:
		print "No minutes for " + filename
		line='|-\n! colspan="3" style="background: red; color: #005a9c;" | [' + url + ' No minutes ' + mtg.strftime("%d %b %Y") +"]\n"
		minfile.write(line.encode("utf-8"))
		continue


	present=""
	regrets=""
	scribe=""

	foundtext = intro.find(text='Present')
	if foundtext:
		present = findNextDD(foundtext.parent)
		present = present.strip()

	foundtext = intro.find(text='Scribe')
	if foundtext:
		scribe = findNextDD(foundtext.parent)

	foundtext = intro.find(text='Regrets')
	if foundtext:
		regrets = findNextDD(foundtext.parent)
		

#	[participants.append(x.strip()) for x in present.split(",") if x.strip() not in participants]
	scribes=""
	for x in scribe.split(","):
		scribes=scribes+("[[User:"+x.strip()+"|"+ x.strip()+"]] ")

# some other attempts, HTML with presents/regrets in there somewhere, unused now.
#	line='<tr><td><a href="'+url+'">'+mtg.strftime("%Y/%m/%d")+'</a></td><td>'+ scribe+ '</td><td>'+ regrets+ '</td><td>'+present+'</td></tr>\n'
#	line='<tr><td><a href="'+url+'">'+mtg.strftime("%Y/%m/%d")+'</a></td><td>'+ scribe+ '</td><td>\n'
#	line='|-\n|['+url+' ' +mtg.strftime("%d %b %Y")+'] || '+scribes+' || style="border:1px solid darkgray;" |\n'


#swap each line between the two backgrounds.

	if style=="":
		style="style='background-color: lightgrey'"
	else:
		style=""

	# put a an id of "min_DAY_MON_YEAR" on each minutes now.
	
	line='|- id="min_'+ mtg.strftime("%d_%b_%Y") +'" '+style+'\n|['+url+' ' +mtg.strftime("%d %b %Y")+'] || '+scribes+' || \n'

	minfile.write(line.encode("utf-8"))
	# what was all this utf-8 encoding business about?  I can't remember
	
	agenda = soup.find("ol")
	if agenda != None:
		alltopics = agenda.findAll("a")
		minfile.write("Topics:\n")
		
		for x in alltopics:
#			line="<a href="+url+x["href"]+">"+x.contents[0]+"</a><br />"
			contents=x.contents[0].replace("\n","")
			contents = spaces.sub(" ",contents)
			line="# ["+url+x["href"]+" "+contents+"]\n"
			minfile.write(line.encode("utf-8"))

	# each of the resolutions that are officially recorded are in a
	# <strong> with a class of 'resolution'.
	
	resolutions = soup.findAll(attrs={"class":"resolution"})
	if len(resolutions) != 0:
		minfile.write("Resolutions:\n")
		line=""
		for x in resolutions:
			contents = x.contents[0].replace("\n","")
			contents = contents.replace("RESOLUTION:","")
			contents = spaces.sub(" ",contents)
			line="* " + contents
		minfile.write(line.encode("utf-8"))
		minfile.write("\n")
minfile.write("|}")
minfile.write("""

------
Generated using [[UWA Minutes Aggregator]]
""")

print "Done!"