#!/usr/bin/env python

"""
This allows me to easily enter data into various logs from the
command line. For example:
  'b http://wwws.sun.com/software/xml/developers/diffmk/refentry.html w misc Noted ^'
  results in 
  '<li>030121: misc] Noted <a href="http://wwws.sun.com/software/xml/developers/diffmk/refentry.html">diffmk</a>.</li>'
This script can be easily extended for new "schemes" that parse the intput
and output it into a specified format. 
"""

def times():

    import string, time
    now = time.localtime()
    
    date_token = time.strftime("%y%m%d",now)
    return date_token

def hyperize(entry, title, url):
    """
    Replace the line with the '^' character with a hypertext link
    """

    import re
    return re.sub('(.*)\^(.*)','\\1<a href="%s">%s</a>\\2' % (url,title), entry)

    
def encode(s):
    '''
    Encode character for XML
    '''
    
    s = s.replace("&", "&amp;")  
    s = s.replace("<", "&lt;")    
    s = s.replace(">", "&gt;")
    s = s.replace("\t", "  ")     
    return s

    
def printUsage(message):
    print message
    print "Usage: b [url]? scheme [scheme parameters]? entry"    
    

def getScheme(options={}):
    """
    Matches the option string to grammar and output function
    """

    import re, sys
    scheme_regexps = {
        r'(?P<url>(\.|http)\S*)? ?(?P<scheme>g) (?P<entry>.*)' : outGoatee,
        r'(?P<url>(\.|http)\S*)? ?(?P<scheme>w) (?P<activity>\w*) (?P<entry>.*)' : outW3C,
        r'(?P<scheme>qw) (?P<query>.*)' : queryW3C
    }

    for key in scheme_regexps.keys():
        if re.match(key,options):
            function = scheme_regexps[key]
            regexp = key
            smatch = re.match(regexp, options, re.DOTALL|re.IGNORECASE)
            params = smatch.groupdict()
            if params:
                return(function(params))
            else:
                printUsage("Sorry, you scheme parameters were not correct.")

    printUsage("Sorry, I don't understand the scheme or URI")
    sys.exit()


def outGoatee(params={}):

    import os, re
    print "Outputting to Goatee Nifty"

    entry = params['entry']
    url = encode(params['url'])
    title = ''

    if url: title = getTitle(url)
    date_token = times()
    formatted_entry = '<dt><a href="%s">%s</a> (%s)</dt><dd>%s</dd>' % (url, title, date_token, entry)
#      print formatted_entry
        
    file = '/home/reagle/data/2web/goatee/nifty-stuff.html'
    fd = open(file)
    content = fd.read()
    fd.close()

    insertion_pattern = re.compile('(<dl style="clear:left">)')
    newcontent = insertion_pattern.sub('\\1 \n  %s' % formatted_entry,content, re.DOTALL|re.IGNORECASE)
    if newcontent:
        fd = open(file,'w')
        fd.write(newcontent)
        fd.close()
    else:
        printUsage("Sorry, output regexp subsitution failed.")
        
    
def outW3C(params={}):

    import os, re
    print "Outputting to W3C log"
    
    activity= params['activity']
    entry = params['entry']
    url = encode(params['url'])
    title =''

    if url: title = getTitle(url)
#     title = 'This is a test title'
    date_token = times()
    formatted_entry = "<li>%s: %s] %s</li>" % (date_token, activity, hyperize(entry,title,url))
    print "ENTRY IS", formatted_entry
    file = '/home/reagle/data/2web/WWW/Team/Reagle/Overview.html'
    fd = open(file)
    content = fd.read()
    fd.close()

    insertion_pattern = re.compile('(<h2>Done Work</h2>\s*<ol>)')
    newcontent = insertion_pattern.sub('\\1 \n  %s' % formatted_entry,content, re.DOTALL|re.IGNORECASE)
    if newcontent:
        fd = open(file,'w')
        fd.write(newcontent)
        fd.close()
    else:
        printUsage("Sorry, output regexp subsitution failed.")
        
    
def getTitle(url):

    import re
    title_regexps = {
        'http://lists.w3.org/.*'                     : '<!-- subject="(.*?)" -->',
        'http://impressive.net/archives/fogo/.*'    : r'<title>fogo archive: (.*?) by .*?</title>',
        'http://lists.kde.org/.*'                    : r"<title>MARC: msg '(.*?)'</title>",
        'http://lists.debian.org/.*'                : r'<title>(.*?)</title>',
        '.*'                                        : r'<title>(.*?)</title>'
    }

    for key in title_regexps.keys():
        if re.match(key,url): 
            break

    import urllib  
    sock = urllib.urlopen(url)
    htmlSource = sock.read()
    sock.close()

    tmatch = re.search(title_regexps[key], htmlSource, re.DOTALL|re.IGNORECASE)
    if tmatch: 
        return tmatch.group(1)
    else:
        printUsage("Sorry, but I don't know how to extract the title of the resource.")
        sys.exit()

        
def queryW3C(params={}):

    import os, re
    print "Querying W3C log"
    
    browser = "kfmclient openProfile webbrowsing "
    in_files = ["/home/reagle/data/2web/WWW/Team/Reagle/Overview.html",
        "/home/reagle/data/2web/WWW/Team/Reagle/history.html"]
    out_file = "/home/reagle/tmp/sponge_query_result.html"

    out_str = ''        
    query = params['query']
#     query = r'misc'
    query_pattern = re.compile(query, re.DOTALL|re.IGNORECASE)
    li_expression = r'<li>.*?</li>'
    li_pattern = re.compile(li_expression, re.DOTALL|re.IGNORECASE)
    
    for file in in_files:
        in_fd = open(file)
        content = in_fd.read() 
        in_fd.close() 
        lis = li_pattern.findall(content)
        for li in lis:
#             print "\nTESTING for %s in '%s'" % (query,li)
            if query_pattern.search(li):
                out_str = out_str + li
#                 print li
                
    if out_str != '':
        out_str = '''<?xml version="1.0" encoding="iso-8859-1"?>
            <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
            "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
            <html xmlns="http://www.w3.org/1999/xhtml">
            <head profile="http://www.w3.org/Team/Reagle/min2rss">
            <title>Reagle's Team Page</title>
            <link rel="stylesheet" type="text/css" 
               href="/home/reagle/data/2web/WWW/Team/Reagle/0103.css" />
            </head>
            <body xml:lang="en" lang="en"> 
              <ol>''' + out_str + '''</ol></body></html>'''
              
        out_fd = open(out_file,'w')
        out_fd.write(out_str)
        out_fd.close()
        os.system(browser + out_file)

        
#Check to see if the script is executing as main.
if __name__ == "__main__":

    import string,sys
   
    options = string.join(sys.argv[1:])
    getScheme(options)







   
