%s

#!/usr/bin/python # Script to produce http://www.w3.org/2005/06/poak/ pages and feeds. # Dean Jackson http://www.w3.org/People/Dean/ # # # Like many useful programs, this started out as a little # hack that has evolved into something bigger. And, like nearly # every program I write, it's at the stage where it is # useful enough that it should be rewritten so that it is # no longer a hack. Oh well. import feedparser import time import sys import re import pytechnorati # A regular expression used to extract the images from the # flickr RSS feed fre = re.compile(r"src\=\"(http://(.*?)\_m\.jpg)\"") # Holds all configuration parameters config = {} # basic XML escaping def escape(text): return text.replace("&", "&").replace("<", "<").replace(">", ">") class Entry: def __init__(self, title, link, date): self.title = title.encode('utf-8') self.link = link self.date = date def __cmp__(self, other): return other.date - self.date class FeedEntry(Entry): def __init__(self, title, link, date, basename, baselink, summary): Entry.__init__(self, title, link, date) self.basename = basename.encode('utf-8') self.baselink = baselink self.summary = summary.encode('utf-8') class FlickrEntry(Entry): def __init__(self, title, link, date, photo): Entry.__init__(self, title, link, date) self.photo = photo def aggregate(entrieslist, num): feedList = [] for entries in entrieslist: for entry in entries: feedList.append(entry) feedList.sort() return feedList[:num] def feeds(feedlist, num): feedList = [] for feed in feedlist: print "Fetching", feed[0] rss = feedparser.parse(feed[0]) for e in rss.entries: if not e.has_key('modified_parsed'): continue date = time.mktime(e.modified_parsed) if e.has_key('summary') and e.summary != '': summary = e.summary elif e.has_key('content') and e.content[0].value != '': summary = e.content[0].value else: summary = "" feedList.append(FeedEntry(e.title, e.link, date, rss.feed.title, rss.feed.link, summary)) feedList.sort() return feedList[:num] def flickr(feedlist, num): feedList = [] for feed in feedlist: print "Fetching", feed[0] rss = feedparser.parse(feed[0]) for e in rss.entries: if not e.has_key('modified_parsed'): continue desc = e.description m = fre.search(desc) photo = m.group(1).replace("_m", "_s") feedList.append(FlickrEntry(e.title, e.link, time.mktime(e.modified_parsed), photo)) feedList.sort() return feedList[:num] def cosmos(feedlist, num): feedList = [] for feed in feedlist: if feed[0] == "tag": print "Fetching tag", feed[1] c = pytechnorati.tag(feed[1]) else: print "Fetching url", feed[1] c = pytechnorati.cosmos(feed[1]) for e in c.item: t = FeedEntry(e.title, e.permalink, time.mktime(time.strptime(e.created, "%Y-%m-%d %H:%M:%S")), e.name, e.url, e.excerpt) feedList.append(t) feedList.sort() return feedList[:num] def outputHTML(feeds, flickr, delicious, cosmos, num, filename): f = open(filename, "w") f.write(''' %s ''' % (config["title"], config["name"], config["title"], config["subtitle"])) f.write('

\n') f.write('

\n') for i in flickr[:5]: f.write('

\n' % (i.link, i.photo, i.title)) f.write('

\n') f.write('

Latest News

\n') count = 0 while count < num: for i in feeds[count:count+5]: f.write('

\n') f.write('

') f.write(i.title) f.write('

\n') if i.summary[:3] == "

": f.write(i.summary) f.write('\n') else: f.write('

') f.write(i.summary) f.write('

\n') f.write('

%s ' % time.strftime("%d %b %Y", time.gmtime(i.date))) f.write(' by ') f.write(i.basename) f.write(' ') f.write(' ' % i.link.replace("#", "%23").replace("/", "%2F")) f.write(' Get Conversations

\n') f.write('

\n') count = count + 5 if len(flickr) > count: f.write('

\n') for i in flickr[count:count+5]: f.write('

\n') f.write('

Discussion

\n') f.write('

Via technorati

\n') for i in cosmos: f.write('

\n') f.write('

' % i.link) f.write(i.title) f.write('

\n') print i.title print i.link f.write('

') f.write(i.summary) f.write('

\n') f.write('

%s ' % (time.strftime("%d %b %Y", time.gmtime(i.date)))) f.write('from ') f.write(i.basename) f.write('') f.write('

\n') f.write('

The data displayed here comes from the following sources: \n') f.write('RSS Feeds ( ') for i in config["feed"]: f.write('%s ' % (i[1], i[2])) f.write('), \n') f.write('Flickr ( ') for i in config["flickr"]: f.write('%s ' % (i[1], i[2])) f.write('), \n') f.write('Technorati ( ') for i in config["technorati"]: f.write('%s ' % (i[1], i[1])) f.write('), \n') f.write('and Del.icio.us ( ') for i in config["delicious"]: f.write('%s ' % (i[1], i[2])) f.write(')

\n') f.write('

News aggregation available as an RSS feed.

\n' % config["name"]) f.write('

Unfortunately due to the nature of the content we are unable to guarantee this page is valid XHTML. Some portions are Copyright W3C 2005.

\n') f.write('

Information on POAK including configuration and source code - Dean Jackson.

\n') f.write('

\n') f.write('\n\n') f.close() def outputRSS(entries, num, filename): f = open(filename, "w") f.write(''' %s http://www.w3.org/ %s en http://www.w3.org/ %s ''' % (config["title"], config["subtitle"], time.strftime("%Y-%m-%dT%H:%M+00:00", time.gmtime()))) for entry in entries[:num]: f.write(' \n' % entry.link) f.write(''' ''') for entry in entries[:num]: f.write('\n' % entry.link) f.write(' ') f.write(entry.title) f.write('\n') f.write(' ') f.write(entry.link) f.write('\n') f.write(' %s\n' % time.strftime("%Y-%m-%dT%H:%M:%S+10:00", time.gmtime(entry.date))) f.write(' ') f.write(escape(entry.summary)) f.write('\n') f.write('\n') f.write('\n') f.close() if __name__ == "__main__": if len(sys.argv) != 3: print "Usage: python %s configname technoratikey" % sys.argv[0] print "where configname is the basename of a configuration file" print "eg. 'w3c' for the file 'w3c.config'" print "and technoratikey is the API key from technorati" sys.exit(1) configname = sys.argv[1] config["name"] = configname config["title"] = "No Title" config["subtitle"] = "Subtitle" config["feed"] = [] config["flickr"] = [] config["delicious"] = [] config["technorati"] = [] pytechnorati.setLicense(sys.argv[2]) for l in open("%s.config" % configname).readlines(): l = l.strip() if l == "": continue if l.split()[0] == "title": config["title"] = " ".join(l.split()[1:]) elif l.split()[0] == "subtitle": config["subtitle"] = " ".join(l.split()[1:]) elif l.split()[0] == "feed": config["feed"].append((l.split()[1], l.split()[2], " ".join(l.split()[3:]))) elif l.split()[0] == "delicious": config["delicious"].append((l.split()[1], l.split()[2], " ".join(l.split()[3:]))) elif l.split()[0] == "flickr": config["flickr"].append((l.split()[1], l.split()[2], " ".join(l.split()[3:]))) elif l.split()[0] == "technorati": config["technorati"].append((l.split()[1], l.split()[2])) print "Doing feeds" print "-----------" feedlist = feeds(config["feed"], 30) print "\nDoing delicious" print "---------------" deliciouslist = feeds(config["delicious"], 20) print "\nDoing flickr" print "---------------" flickrlist = flickr(config["flickr"], 20) print "\nDoing technorati" print "---------------" cosmoslist = cosmos(config["technorati"], 20) print "\nProducing HTML" print "----------------" outputHTML(feedlist, flickrlist, deliciouslist, cosmoslist, 40, "%s.html" % configname) print "\nProducing RSS" print "---------------" outputRSS(feedlist, 20, "%s.rss" % configname)

%s

%s

Latest News

') f.write(i.title) f.write('

Links

Discussion

' % i.link) f.write(i.title) f.write('