#!/usr/bin/python

# POAK Lite
# Script to produce http://www.w3.org/2005/06/poak/ pages and feeds.
# Dean Jackson http://www.w3.org/People/Dean/ 
# 
#
# Like many useful programs, this started out as a little
# hack that has evolved into something bigger. And, like nearly
# every program I write, it's at the stage where it is 
# useful enough that it should be rewritten so that it is
# no longer a hack. Oh well.

import feedparser
import time
import sys
import re

# Holds all configuration parameters
config = {}

# basic XML escaping
def escape(text):
  return text.replace("&", "&amp;").replace("<", "&lt;").replace(">", "&gt;")
  

class Entry:
  def __init__(self, title, link, date):
    self.title = title.encode('utf-8')
    self.link = link
    self.date = date
  def __cmp__(self, other):
    return other.date - self.date

class FeedEntry(Entry):
  def __init__(self, title, link, date, basename, baselink, summary):
    Entry.__init__(self, title, link, date)
    self.basename = basename.encode('utf-8')
    self.baselink = baselink
    self.summary = summary.encode('utf-8')
    

def aggregate(entrieslist, num):
  feedList = []
  for entries in entrieslist:
    for entry in entries:
      feedList.append(entry)
  feedList.sort()
  return feedList[:num]

def feeds(feedlist, num):
  feedList = []
  for feed in feedlist:
    print "  -", feed[2], "-", feed[0]
    rss = feedparser.parse(feed[0])
    for e in rss.entries:
      if not e.has_key('modified_parsed') or \
         e.modified_parsed == None or \
         e.modified_parsed == '':
         continue
      date = time.mktime(e.modified_parsed)
      if e.has_key('summary') and e.summary != '':
        summary = e.summary
      elif e.has_key('content') and e.content[0].value != '':
        summary = e.content[0].value
      else:
        summary = ""
      
      feedList.append(FeedEntry(e.title, e.link, date, rss.feed.title, rss.feed.link, summary))
    
  feedList.sort()
  return feedList[:num]


def outputHTML(feeds, num, filename):
    f = open(filename, "w")
    f.write('''<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"
    "http://www.w3.org/TR/xhtml11/DTD/xhtml1.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en-au">
  <head>
    <title>%s</title>
    <link rel="stylesheet" href="/2005/12/room/room.css" type="text/css"/>
    <link rel="alternate" type="application/rss+xml" title="RSS 1.0" href="%s.rss"/>
  </head>
  <body>
    <h1>%s</h2>
    <h2>%s</h2>
''' % (config["title"], config["name"], config["title"], config["subtitle"]))

    for i in feeds:
      f.write('<div class="newsitem">\n')
      f.write('<h3><a href="')
      f.write(i.link)
      f.write('">')
      f.write(i.title)
      f.write('</a></h3>\n')
      if i.summary[:3] == "<p>":
        f.write(i.summary)
        f.write('\n')
      else:
        f.write('<p>')
        f.write(i.summary)
        f.write('</p>\n')

      f.write('<p class="iteminfo"><span class="date">%s</span> ' % time.strftime("%d %b %Y", time.gmtime(i.date)))
      f.write(' by <a href="')
      f.write(i.baselink)
      f.write('">')
      f.write(i.basename)
      f.write('</a> ')
      f.write('</div>\n')

    f.write('<div class="info">\n')
    f.write('<p>The data displayed here comes from the following sources: \n')
    for i in config["feed"]:
      f.write('<a href="%s">%s</a> ' % (i[1], i[2]))
    f.write('</p>\n')

    f.write('<p>News aggregation available as an RSS <a href="%s.rss">feed</a>.</p>\n' % config["name"])
    
    f.write('<p>Unfortunately due to the nature of the content we are unable to guarantee this page is valid XHTML. Some portions are Copyright W3C 2005.</p>\n')
    f.write('<p><a href=".">Information on POAK</a> including configuration and source code - <a href="http://www.w3.org/People/Dean/">Dean Jackson</a>.</p>\n')

    f.write('</div>\n')

    f.write('</body>\n</html>\n')
    f.close()



def outputRSS(entries, num, filename):
    f = open(filename, "w")
    f.write('''<?xml version="1.0" encoding="utf-8"?>
<rdf:RDF
   xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
   xmlns:dc="http://purl.org/dc/elements/1.1/"
   xmlns:content="http://purl.org/rss/1.0/modules/content/"
   xmlns="http://purl.org/rss/1.0/">

 <channel rdf:about="http://www.w3.org/">
   <title>%s</title>
   <link>http://www.w3.org/</link>
   <description>%s</description>
   <dc:language>en</dc:language>
   <dc:creator>http://www.w3.org/</dc:creator>
   <dc:date>%s</dc:date>
   <items>
     <rdf:Seq>
''' % (config["title"], config["subtitle"], time.strftime("%Y-%m-%dT%H:%M+00:00", time.gmtime())))

    for entry in entries[:num]:
      f.write('      <rdf:li rdf:resource="%s"/>\n' % entry.link)

    f.write('''    </rdf:Seq>
   </items>
 </channel>
''')

    for entry in entries[:num]:
        f.write('<item rdf:about="%s">\n' % entry.link)
        f.write('  <title>')
        f.write(entry.title)
        f.write('</title>\n')
        f.write('  <link>')
        f.write(entry.link)
        f.write('</link>\n')
        f.write('  <dc:date>%s</dc:date>\n' % time.strftime("%Y-%m-%dT%H:%M:%S+10:00", time.gmtime(entry.date)))
        f.write('  <description>')
        f.write(escape(entry.summary))
        f.write('</description>\n')
        f.write('</item>\n')

    f.write('</rdf:RDF>\n')
    f.close()

if __name__ == "__main__":
    if len(sys.argv) != 2:
        print "Usage: python %s configname" % sys.argv[0]
        print "where configname is the basename of a configuration file"
        print "eg. 'w3c' for the file 'w3c.config'"
        sys.exit(1)
    
    configname = sys.argv[1]
    config["name"] = configname
    config["title"] = "No Title"
    config["subtitle"] = "Subtitle"
    config["feed"] = []
    
    for l in open("%s.config" % configname).readlines():
        l = l.strip()
        if l == "": continue
        if l.split()[0] == "title":
            config["title"] = " ".join(l.split()[1:])
        elif l.split()[0] == "subtitle":
            config["subtitle"] = " ".join(l.split()[1:])
        elif l.split()[0] == "feed":
            config["feed"].append((l.split()[1], l.split()[2], " ".join(l.split()[3:])))
    
    print "* Doing feeds"
    feedlist = feeds(config["feed"], 30)

    print "* Producing HTML"
    outputHTML(feedlist, 20, "%s.html" % configname) 
    
    print "* Producing RSS"
    outputRSS(feedlist, 20, "%s.rss" % configname)

    print "Done."