Warning:
This wiki has been archived and is now read-only.
UWA Minutes Aggregator
From UWA
The python script below is used to create the Teleconference Records. To recreate what is shown there, download the code below, save it as a "minuteripper.py" and then run:
- python minuteripper.py uwawg 3 162
The output will be stored in "uwawg.wiki", the contents of which can then be put on the wiki.
NOTE: You must have python with the Beautiful Soup package installed in order for this to work. Also NOTE: this is a complete hack and my first python script ever. Be forewarned.
"""
This program is my first python script, it's a quick hack to generate
a table of the UWA weekly minutes. It requires the BeautifulSoup
package, but everything else should be builtin I think.
My apologies in advance.
-M
"""
import datetime
import calendar
import urllib2
import os
import re
import sys
import getpass
from BeautifulSoup import BeautifulSoup
# set these two variables if you're sick of typing your username
# and password over and over
if len(sys.argv) != 4:
print "arguments: <group short name> <0-6 day of week 0 being Monday> <num weeks back>"
sys.exit(2)
shortname = sys.argv[1]
mtgday = int(sys.argv[2])
weeksback = int(sys.argv[3])
if (mtgday < 0 or mtgday >6):
print "Error: second argument is day of week 0-6, 0 being Monday:" + mtgday + " is not valid"
sys.exit(3)
if (weeksback < 0 or weeksback > 162):
print "Error: weeks to go back must be less than 3 years (162) and greater than 0"
sys.exit(3)
username = os.getenv("MINUTES_USER")
password = os.getenv("MINUTES_PASS")
if username == None:
username=raw_input("Please enter your w3c username: ")
if password == None:
password=getpass.getpass("Please enter your w3c password: ")
print "Using username: " + username
spaces = re.compile("\s\s+")
def findNextDD(dt):
if not dt:
return ""
while (dt.nextSibling != None):
dt=dt.nextSibling
if dt.__class__.__name__ != "NavigableString":
if dt.name == "dd":
if len(dt.contents) > 0:
return dt.contents[0]
return ""
if dt.name == "dt":
return ""
return ""
# day of the week we meet on:
import urllib2
# from: http://docs.python.org/library/urllib2.html#examples
# Create an OpenerDirector with support for Basic HTTP Authentication...
auth_handler = urllib2.HTTPBasicAuthHandler()
auth_handler.add_password(realm='W3CACL',
uri='http://www.w3.org',
user=username,
passwd=password)
opener = urllib2.build_opener(auth_handler)
# ...and install it globally so it can be used with urlopen.
urllib2.install_opener(opener)
# started this code from:
# http://www.daniweb.com/code/snippet236.html
today = datetime.date.today()
oneday = datetime.timedelta(days=1)
oneweek = datetime.timedelta(days=7)
# list of our 404's we don't do anything with 'm
fourOfours = []
#list of all unique participants, don't do anything with them either.
participants = []
minfile = open(shortname+".wiki","w")
# I started this as writing html, should be possible to make it that
# way again, as I commented it all out
#minfile.write("<table border='1'>")
try:
os.mkdir("minutes_cache")
except OSError:
# hm, I can't figure out why as doesn't work,let's just pass since
# this is likely just an errno 17.
pass
#minfile.write('<tr><td>Date</td><td>Scribe</td><td>Topics</td></tr>\n')
minfile.write("""
= Minutes =
{|
! Minutes !! Scribe !! Topics and Resolutions
""")
# find last day that there would have been a meeting starting from today and looping backwards
nextmtg = today
while nextmtg.weekday() != mtgday:
nextmtg -= oneday
oneweek = datetime.timedelta(days=7)
week = 0
lastYearProcessed = 0
style=""
while week < weeksback:
mtg = nextmtg - (oneweek*week)
week +=1
# format: http://www.w3.org/2007/06/14-uwawg-minutes.html
url = "http://www.w3.org/" + mtg.strftime("%Y/%m/%d") + "-"+shortname+"-minutes.html"
filename = "minutes_cache/" + mtg.strftime("%Y-%m-%d") + "-"+shortname+"-minutes.html"
if not os.path.isfile(filename):
print "No cache for "+filename +" fetching: " + url
try: f = urllib2.urlopen(url)
except urllib2.URLError, e:
if e.code == 404:
fourOfours.append(url)
FILE = open(filename,"w")
FILE.writelines("no minutes :(")
FILE.close
else:
print url, "failed due, HTTP code: ", e.code
continue
FILE = open(filename,"w")
FILE.writelines(f.read())
FILE.close
print "Using cache: ", filename
if mtg.year != lastYearProcessed:
lastYearProcessed = mtg.year
line='|-\n! id="min_'+str(mtg.year)+'" colspan="3" style="background: #e2edfe; color: #005a9c;" | Minutes for ' + str(mtg.year) +"\n"
minfile.write(line.encode("utf-8"))
FILE = open(filename,"r")
soup = BeautifulSoup(FILE.read())
FILE.close
try:
intro = soup.find("div",{"class":"intro"}).dl
except:
print "No minutes for " + filename
line='|-\n! colspan="3" style="background: red; color: #005a9c;" | [' + url + ' No minutes ' + mtg.strftime("%d %b %Y") +"]\n"
minfile.write(line.encode("utf-8"))
continue
present=""
regrets=""
scribe=""
foundtext = intro.find(text='Present')
if foundtext:
present = findNextDD(foundtext.parent)
present = present.strip()
foundtext = intro.find(text='Scribe')
if foundtext:
scribe = findNextDD(foundtext.parent)
foundtext = intro.find(text='Regrets')
if foundtext:
regrets = findNextDD(foundtext.parent)
# [participants.append(x.strip()) for x in present.split(",") if x.strip() not in participants]
scribes=""
for x in scribe.split(","):
scribes=scribes+("[[User:"+x.strip()+"|"+ x.strip()+"]] ")
# some other attempts, HTML with presents/regrets in there somewhere, unused now.
# line='<tr><td><a href="'+url+'">'+mtg.strftime("%Y/%m/%d")+'</a></td><td>'+ scribe+ '</td><td>'+ regrets+ '</td><td>'+present+'</td></tr>\n'
# line='<tr><td><a href="'+url+'">'+mtg.strftime("%Y/%m/%d")+'</a></td><td>'+ scribe+ '</td><td>\n'
# line='|-\n|['+url+' ' +mtg.strftime("%d %b %Y")+'] || '+scribes+' || style="border:1px solid darkgray;" |\n'
#swap each line between the two backgrounds.
if style=="":
style="style='background-color: lightgrey'"
else:
style=""
# put a an id of "min_DAY_MON_YEAR" on each minutes now.
line='|- id="min_'+ mtg.strftime("%d_%b_%Y") +'" '+style+'\n|['+url+' ' +mtg.strftime("%d %b %Y")+'] || '+scribes+' || \n'
minfile.write(line.encode("utf-8"))
# what was all this utf-8 encoding business about? I can't remember
agenda = soup.find("ol")
if agenda != None:
alltopics = agenda.findAll("a")
minfile.write("Topics:\n")
for x in alltopics:
# line="<a href="+url+x["href"]+">"+x.contents[0]+"</a><br />"
contents=x.contents[0].replace("\n","")
contents = spaces.sub(" ",contents)
line="# ["+url+x["href"]+" "+contents+"]\n"
minfile.write(line.encode("utf-8"))
# each of the resolutions that are officially recorded are in a
# <strong> with a class of 'resolution'.
resolutions = soup.findAll(attrs={"class":"resolution"})
if len(resolutions) != 0:
minfile.write("Resolutions:\n")
line=""
for x in resolutions:
contents = x.contents[0].replace("\n","")
contents = contents.replace("RESOLUTION:","")
contents = spaces.sub(" ",contents)
line="* " + contents
minfile.write(line.encode("utf-8"))
minfile.write("\n")
minfile.write("|}")
minfile.write("""
------
Generated using [[UWA Minutes Aggregator]]
""")
print "Done!"