#!/usr/bin/python

# Script to produce http://www.w3.org/2005/06/poak/ pages and feeds.
# Dean Jackson http://www.w3.org/People/Dean/ 
# 
#
# Like many useful programs, this started out as a little
# hack that has evolved into something bigger. And, like nearly
# every program I write, it's at the stage where it is 
# useful enough that it should be rewritten so that it is
# no longer a hack. Oh well.

import feedparser
import time
import sys
import re
import pytechnorati


# A regular expression used to extract the images from the 
# flickr RSS feed
fre = re.compile(r"src\=\"(http://(.*?)\_m\.jpg)\"")

# Holds all configuration parameters
config = {}

# basic XML escaping
def escape(text):
  return text.replace("&", "&amp;").replace("<", "&lt;").replace(">", "&gt;")
  

class Entry:
  def __init__(self, title, link, date):
    self.title = title.encode('utf-8')
    self.link = link
    self.date = date
  def __cmp__(self, other):
    return other.date - self.date

class FeedEntry(Entry):
  def __init__(self, title, link, date, basename, baselink, summary):
    Entry.__init__(self, title, link, date)
    self.basename = basename.encode('utf-8')
    self.baselink = baselink
    self.summary = summary.encode('utf-8')
    
class FlickrEntry(Entry):
  def __init__(self, title, link, date, photo):
    Entry.__init__(self, title, link, date)
    self.photo = photo

def aggregate(entrieslist, num):
  feedList = []
  for entries in entrieslist:
    for entry in entries:
      feedList.append(entry)
  feedList.sort()
  return feedList[:num]

def feeds(feedlist, num):
  feedList = []
  for feed in feedlist:
    print "Fetching", feed[0]
    rss = feedparser.parse(feed[0])
    for e in rss.entries:
      if not e.has_key('modified_parsed'):
        continue
      date = time.mktime(e.modified_parsed)
      if e.has_key('summary') and e.summary != '':
        summary = e.summary
      elif e.has_key('content') and e.content[0].value != '':
        summary = e.content[0].value
      else:
        summary = ""
      
      feedList.append(FeedEntry(e.title, e.link, date, rss.feed.title, rss.feed.link, summary))
    
  feedList.sort()
  return feedList[:num]

def flickr(feedlist, num):
    feedList = []
    for feed in feedlist:
        print "Fetching", feed[0]
        rss = feedparser.parse(feed[0])
        for e in rss.entries:
            if not e.has_key('modified_parsed'):
                continue
            desc = e.description
            m = fre.search(desc)
            photo = m.group(1).replace("_m", "_s")
            feedList.append(FlickrEntry(e.title, e.link, time.mktime(e.modified_parsed), photo))
    
    feedList.sort()
    return feedList[:num]

def cosmos(feedlist, num):
    feedList = []
    for feed in feedlist:
        if feed[0] == "tag":
            print "Fetching tag", feed[1]
            c = pytechnorati.tag(feed[1])
        else:
            print "Fetching url", feed[1]
            c = pytechnorati.cosmos(feed[1])

        for e in c.item:
            t = FeedEntry(e.title, e.permalink, time.mktime(time.strptime(e.created, "%Y-%m-%d %H:%M:%S")), e.name, e.url, e.excerpt)
            feedList.append(t)
    
    feedList.sort()
    return feedList[:num]

def outputHTML(feeds, flickr, delicious, cosmos, num, filename):
    f = open(filename, "w")
    f.write('''<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"
    "http://www.w3.org/TR/xhtml11/DTD/xhtml1.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en-au">
  <head>
    <title>%s</title>
    <link rel="stylesheet" href="poak.css" type="text/css"/>
    <link rel="alternate" type="application/rss+xml" title="RSS 1.0" href="%s.rss"/>
  </head>
  <body>
  <div id="header">
    <h1>%s</h2>
    <h2>%s</h2>
  </div>
''' % (config["title"], config["name"], config["title"], config["subtitle"]))

    f.write('<div id="main">\n')

    f.write('<div class="news">\n')
    f.write('<div class="photos">\n')
    for i in flickr[:5]:
        f.write('<a href="%s"><img src="%s" alt="%s"/></a> \n' % (i.link, i.photo, i.title))
    f.write('</div>\n')

    f.write('<h2>Latest News</h2>\n')

    count = 0
    while count < num:
      
      for i in feeds[count:count+5]:
        f.write('<div class="newsitem">\n')
        f.write('<h3><a href="')
        f.write(i.link)
        f.write('">')
        f.write(i.title)
        f.write('</a></h3>\n')
        if i.summary[:3] == "<p>":
          f.write(i.summary)
          f.write('\n')
        else:
          f.write('<p>')
          f.write(i.summary)
          f.write('</p>\n')

        f.write('<p class="iteminfo"><span class="date">%s</span> ' % time.strftime("%d %b %Y", time.gmtime(i.date)))
        f.write(' by <a href="')
        f.write(i.baselink)
        f.write('">')
        f.write(i.basename)
        f.write('</a> ')
        f.write(' <a class="techoratilink" href="http://www.technorati.com/cosmos/search.html?rank=&amp;url=%s">' % i.link.replace("#", "%23").replace("/", "%2F"))
        f.write('<img src="bubble_h17.gif" class="cosmos" alt="Get Conversations" /></a></p>\n')
        f.write('</div>\n')

      count = count + 5

      if len(flickr) > count:

        f.write('<div class="photos">\n')
        for i in flickr[count:count+5]:
          f.write('<a href="')
          f.write(i.link)
          f.write('"><img src="')
          f.write(i.photo)
          f.write('" alt="')
          f.write(i.title)
          f.write('"/></a> \n')
        f.write('</div>\n')

    f.write('</div>\n')

    f.write('</div>\n')

    f.write('<div class="sidebar">\n')
    f.write('<div class="links">\n')
    f.write('<h2>Links</h2>\n')
    f.write('<p>Via <a href="http://del.icio.us/">del.icio.us</a></p>\n')
    f.write('<ul>\n')
    for i in delicious[:10]:
      f.write('<li><a href="')
      f.write(i.link)
      f.write('">')
      f.write(i.title)
      f.write('</a> from <a href="')
      f.write(i.baselink)
      f.write('">')
      f.write(i.basename.replace('del.icio.us/', '').replace('tag/', ''))
      f.write('</a> <a class="techoratilink" href="http://www.technorati.com/cosmos/search.html?rank=&amp;url=')
      f.write(i.link.replace("#", "%23").replace("/", "%2F"))
      f.write('"><img src="bubble_h17.gif" class="cosmos" alt="Get Conversations" /></a></li>\n')
    f.write('</ul>\n')
    f.write('</div>\n')

    f.write('<div class="discussion">\n')
    f.write('<h2>Discussion</h2>\n')
    f.write('<p>Via <a href="http://www.technorati.com/">technorati</a></p>\n')

    for i in cosmos:
      f.write('<div class="discussionitem">\n')
      f.write('<h3><a href="%s">' % i.link)
      f.write(i.title)
      f.write('</a></h3>\n')
      print i.title
      print i.link
      f.write('<p>')
      f.write(i.summary)
      f.write('</p>\n')
      f.write('<p><span class="date">%s</span> ' % (time.strftime("%d %b %Y", time.gmtime(i.date))))
      f.write('from <a href="')
      f.write(i.baselink)
      f.write('">')
      f.write(i.basename)
      f.write('</a>')
      f.write('</p>\n')
      f.write('</div>\n')

    f.write('</div>\n')

    f.write('</div>\n')

    f.write('<div class="info">\n')
    f.write('<p>The data displayed here comes from the following sources: \n')
    f.write('RSS Feeds ( ')
    for i in config["feed"]:
      f.write('<a href="%s">%s</a> ' % (i[1], i[2]))
    f.write('), \n')

    f.write('Flickr ( ')
    for i in config["flickr"]:
      f.write('<a href="%s">%s</a> ' % (i[1], i[2]))
    f.write('), \n')

    f.write('Technorati ( ')
    for i in config["technorati"]:
      f.write('<a href="http://www.technorati.com/tags/%s">%s</a> ' % (i[1], i[1]))
    f.write('), \n')

    f.write('and Del.icio.us ( ')
    for i in config["delicious"]:
      f.write('<a href="%s">%s</a> ' % (i[1], i[2]))
    f.write(')</p>\n')

    f.write('<p>News aggregation available as an RSS <a href="%s.rss">feed</a>.</p>\n' % config["name"])
    
    f.write('<p>Unfortunately due to the nature of the content we are unable to guarantee this page is valid XHTML. Some portions are Copyright W3C 2005.</p>\n')
    f.write('<p><a href=".">Information on POAK</a> including configuration and source code - <a href="http://www.w3.org/People/Dean/">Dean Jackson</a>.</p>\n')

    f.write('</div>\n')

    f.write('</body>\n</html>\n')
    f.close()



def outputRSS(entries, num, filename):
    f = open(filename, "w")
    f.write('''<?xml version="1.0" encoding="utf-8"?>
<rdf:RDF
   xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
   xmlns:dc="http://purl.org/dc/elements/1.1/"
   xmlns:content="http://purl.org/rss/1.0/modules/content/"
   xmlns="http://purl.org/rss/1.0/">

 <channel rdf:about="http://www.w3.org/">
   <title>%s</title>
   <link>http://www.w3.org/</link>
   <description>%s</description>
   <dc:language>en</dc:language>
   <dc:creator>http://www.w3.org/</dc:creator>
   <dc:date>%s</dc:date>
   <items>
     <rdf:Seq>
''' % (config["title"], config["subtitle"], time.strftime("%Y-%m-%dT%H:%M+00:00", time.gmtime())))

    for entry in entries[:num]:
      f.write('      <rdf:li rdf:resource="%s"/>\n' % entry.link)

    f.write('''    </rdf:Seq>
   </items>
 </channel>
''')

    for entry in entries[:num]:
        f.write('<item rdf:about="%s">\n' % entry.link)
        f.write('  <title>')
        f.write(entry.title)
        f.write('</title>\n')
        f.write('  <link>')
        f.write(entry.link)
        f.write('</link>\n')
        f.write('  <dc:date>%s</dc:date>\n' % time.strftime("%Y-%m-%dT%H:%M:%S+10:00", time.gmtime(entry.date)))
        f.write('  <description>')
        f.write(escape(entry.summary))
        f.write('</description>\n')
        f.write('</item>\n')

    f.write('</rdf:RDF>\n')
    f.close()

if __name__ == "__main__":
    if len(sys.argv) != 3:
        print "Usage: python %s configname technoratikey" % sys.argv[0]
        print "where configname is the basename of a configuration file"
        print "eg. 'w3c' for the file 'w3c.config'"
        print "and technoratikey is the API key from technorati"
        sys.exit(1)
    
    configname = sys.argv[1]
    config["name"] = configname
    config["title"] = "No Title"
    config["subtitle"] = "Subtitle"
    config["feed"] = []
    config["flickr"] = []
    config["delicious"] = []
    config["technorati"] = []
    
    pytechnorati.setLicense(sys.argv[2])
    
    for l in open("%s.config" % configname).readlines():
        l = l.strip()
        if l == "": continue
        if l.split()[0] == "title":
            config["title"] = " ".join(l.split()[1:])
        elif l.split()[0] == "subtitle":
            config["subtitle"] = " ".join(l.split()[1:])
        elif l.split()[0] == "feed":
            config["feed"].append((l.split()[1], l.split()[2], " ".join(l.split()[3:])))
        elif l.split()[0] == "delicious":
            config["delicious"].append((l.split()[1], l.split()[2], " ".join(l.split()[3:])))
        elif l.split()[0] == "flickr":
            config["flickr"].append((l.split()[1], l.split()[2], " ".join(l.split()[3:])))            
        elif l.split()[0] == "technorati":
            config["technorati"].append((l.split()[1], l.split()[2]))            
    
    print "Doing feeds"
    print "-----------"
    feedlist = feeds(config["feed"], 30)

    print "\nDoing delicious"
    print "---------------"
    deliciouslist = feeds(config["delicious"], 20)

    print "\nDoing flickr"
    print "---------------"
    flickrlist = flickr(config["flickr"], 20)
    
    print "\nDoing technorati"
    print "---------------"
    cosmoslist = cosmos(config["technorati"], 20)
        
    print "\nProducing HTML"
    print "----------------"
    outputHTML(feedlist, flickrlist, deliciouslist, cosmoslist, 40, "%s.html" % configname) 
    
    print "\nProducing RSS"
    print "---------------"
    outputRSS(feedlist, 20, "%s.rss" % configname)
