Warning:
This wiki has been archived and is now read-only.
UWA Minutes Aggregator
From UWA
The python script below is used to create the Teleconference Records. To recreate what is shown there, download the code below, save it as a "minuteripper.py" and then run:
- python minuteripper.py uwawg 3 162
The output will be stored in "uwawg.wiki", the contents of which can then be put on the wiki.
NOTE: You must have python with the Beautiful Soup package installed in order for this to work. Also NOTE: this is a complete hack and my first python script ever. Be forewarned.
""" This program is my first python script, it's a quick hack to generate a table of the UWA weekly minutes. It requires the BeautifulSoup package, but everything else should be builtin I think. My apologies in advance. -M """ import datetime import calendar import urllib2 import os import re import sys import getpass from BeautifulSoup import BeautifulSoup # set these two variables if you're sick of typing your username # and password over and over if len(sys.argv) != 4: print "arguments: <group short name> <0-6 day of week 0 being Monday> <num weeks back>" sys.exit(2) shortname = sys.argv[1] mtgday = int(sys.argv[2]) weeksback = int(sys.argv[3]) if (mtgday < 0 or mtgday >6): print "Error: second argument is day of week 0-6, 0 being Monday:" + mtgday + " is not valid" sys.exit(3) if (weeksback < 0 or weeksback > 162): print "Error: weeks to go back must be less than 3 years (162) and greater than 0" sys.exit(3) username = os.getenv("MINUTES_USER") password = os.getenv("MINUTES_PASS") if username == None: username=raw_input("Please enter your w3c username: ") if password == None: password=getpass.getpass("Please enter your w3c password: ") print "Using username: " + username spaces = re.compile("\s\s+") def findNextDD(dt): if not dt: return "" while (dt.nextSibling != None): dt=dt.nextSibling if dt.__class__.__name__ != "NavigableString": if dt.name == "dd": if len(dt.contents) > 0: return dt.contents[0] return "" if dt.name == "dt": return "" return "" # day of the week we meet on: import urllib2 # from: http://docs.python.org/library/urllib2.html#examples # Create an OpenerDirector with support for Basic HTTP Authentication... auth_handler = urllib2.HTTPBasicAuthHandler() auth_handler.add_password(realm='W3CACL', uri='http://www.w3.org', user=username, passwd=password) opener = urllib2.build_opener(auth_handler) # ...and install it globally so it can be used with urlopen. urllib2.install_opener(opener) # started this code from: # http://www.daniweb.com/code/snippet236.html today = datetime.date.today() oneday = datetime.timedelta(days=1) oneweek = datetime.timedelta(days=7) # list of our 404's we don't do anything with 'm fourOfours = [] #list of all unique participants, don't do anything with them either. participants = [] minfile = open(shortname+".wiki","w") # I started this as writing html, should be possible to make it that # way again, as I commented it all out #minfile.write("<table border='1'>") try: os.mkdir("minutes_cache") except OSError: # hm, I can't figure out why as doesn't work,let's just pass since # this is likely just an errno 17. pass #minfile.write('<tr><td>Date</td><td>Scribe</td><td>Topics</td></tr>\n') minfile.write(""" = Minutes = {| ! Minutes !! Scribe !! Topics and Resolutions """) # find last day that there would have been a meeting starting from today and looping backwards nextmtg = today while nextmtg.weekday() != mtgday: nextmtg -= oneday oneweek = datetime.timedelta(days=7) week = 0 lastYearProcessed = 0 style="" while week < weeksback: mtg = nextmtg - (oneweek*week) week +=1 # format: http://www.w3.org/2007/06/14-uwawg-minutes.html url = "http://www.w3.org/" + mtg.strftime("%Y/%m/%d") + "-"+shortname+"-minutes.html" filename = "minutes_cache/" + mtg.strftime("%Y-%m-%d") + "-"+shortname+"-minutes.html" if not os.path.isfile(filename): print "No cache for "+filename +" fetching: " + url try: f = urllib2.urlopen(url) except urllib2.URLError, e: if e.code == 404: fourOfours.append(url) FILE = open(filename,"w") FILE.writelines("no minutes :(") FILE.close else: print url, "failed due, HTTP code: ", e.code continue FILE = open(filename,"w") FILE.writelines(f.read()) FILE.close print "Using cache: ", filename if mtg.year != lastYearProcessed: lastYearProcessed = mtg.year line='|-\n! id="min_'+str(mtg.year)+'" colspan="3" style="background: #e2edfe; color: #005a9c;" | Minutes for ' + str(mtg.year) +"\n" minfile.write(line.encode("utf-8")) FILE = open(filename,"r") soup = BeautifulSoup(FILE.read()) FILE.close try: intro = soup.find("div",{"class":"intro"}).dl except: print "No minutes for " + filename line='|-\n! colspan="3" style="background: red; color: #005a9c;" | [' + url + ' No minutes ' + mtg.strftime("%d %b %Y") +"]\n" minfile.write(line.encode("utf-8")) continue present="" regrets="" scribe="" foundtext = intro.find(text='Present') if foundtext: present = findNextDD(foundtext.parent) present = present.strip() foundtext = intro.find(text='Scribe') if foundtext: scribe = findNextDD(foundtext.parent) foundtext = intro.find(text='Regrets') if foundtext: regrets = findNextDD(foundtext.parent) # [participants.append(x.strip()) for x in present.split(",") if x.strip() not in participants] scribes="" for x in scribe.split(","): scribes=scribes+("[[User:"+x.strip()+"|"+ x.strip()+"]] ") # some other attempts, HTML with presents/regrets in there somewhere, unused now. # line='<tr><td><a href="'+url+'">'+mtg.strftime("%Y/%m/%d")+'</a></td><td>'+ scribe+ '</td><td>'+ regrets+ '</td><td>'+present+'</td></tr>\n' # line='<tr><td><a href="'+url+'">'+mtg.strftime("%Y/%m/%d")+'</a></td><td>'+ scribe+ '</td><td>\n' # line='|-\n|['+url+' ' +mtg.strftime("%d %b %Y")+'] || '+scribes+' || style="border:1px solid darkgray;" |\n' #swap each line between the two backgrounds. if style=="": style="style='background-color: lightgrey'" else: style="" # put a an id of "min_DAY_MON_YEAR" on each minutes now. line='|- id="min_'+ mtg.strftime("%d_%b_%Y") +'" '+style+'\n|['+url+' ' +mtg.strftime("%d %b %Y")+'] || '+scribes+' || \n' minfile.write(line.encode("utf-8")) # what was all this utf-8 encoding business about? I can't remember agenda = soup.find("ol") if agenda != None: alltopics = agenda.findAll("a") minfile.write("Topics:\n") for x in alltopics: # line="<a href="+url+x["href"]+">"+x.contents[0]+"</a><br />" contents=x.contents[0].replace("\n","") contents = spaces.sub(" ",contents) line="# ["+url+x["href"]+" "+contents+"]\n" minfile.write(line.encode("utf-8")) # each of the resolutions that are officially recorded are in a # <strong> with a class of 'resolution'. resolutions = soup.findAll(attrs={"class":"resolution"}) if len(resolutions) != 0: minfile.write("Resolutions:\n") line="" for x in resolutions: contents = x.contents[0].replace("\n","") contents = contents.replace("RESOLUTION:","") contents = spaces.sub(" ",contents) line="* " + contents minfile.write(line.encode("utf-8")) minfile.write("\n") minfile.write("|}") minfile.write(""" ------ Generated using [[UWA Minutes Aggregator]] """) print "Done!"