#!/usr/bin/python
"""
This is a sort of IMAP<->HTTP proxy service.

It's limited to my way of using IMAP
(in particular, I archive all mail in Archive/YYYY-MMWW
mailboxes. WW is week number).

See changelog at end.

"""
__version__ = '$Id: mid_proxy.py,v 1.7 2002/02/12 15:07:04 connolly Exp $'

import string
import imaplib2 # patched/kludged in this directory
import urllib
import getpass
import time # http://www.python.org/doc/current/lib/module-time.html

from imap_sort import parseList, intern, assoc # just SEXPR stuff
import msgHeaderExt

import BaseHTTPServer, SocketServer
import cgi

# see http://www.w3.org/2000/10/swap/
# for these modules...
import notation3
from RDFSink import SYMBOL, LITERAL

MID_scheme="mid:"
MID_path="/mid/"
part_path="/part?"
MID="urn:mid:" #@@ugh... bow to URN support (rather than general URI support) in Netscape 4

def main(argv):
    port=string.atoi(argv[1])
    httpd = Server(('', port), Handler)

    print "@@ serving at: http://127.0.0.1:%d/" % (port,)
    try:
	httpd.serve_forever()
    except KeyboardInterrupt:
	pass


class Server(BaseHTTPServer.HTTPServer):
    def __init__(self, addr, port):
	SocketServer.TCPServer.__init__(self, addr, port)
	c = myIMAP()
	c.loginDan()
	self._imapConn = c


class Handler(BaseHTTPServer.BaseHTTPRequestHandler):
    def do_GET(self):
	path = self.path

	if path == "/":
	    self.homePage()
	elif '/listMessages?' == path[:len('/listMessages?')]:
	    self.listMessages()
	elif MID_scheme == path[:len(MID_scheme)]:
	    self.showMessage(path[len(MID_scheme):])
	elif MID_path == path[:len(MID_path)]:
	    self.showMessage(urllib.unquote(path[len(MID_path):]))
	elif part_path == path[:len(part_path)]:
            params = cgi.parse_qs(path[len(part_path):])
            mid = params['mid'][0]
            try:
                part = params['part']
            except KeyError:
                part = ''
            else:
                part = part[0]
            mediaType = params['mt'][0]
	    self.showPart(mid, part, mediaType)
	else:
	    self.send_response(404)

    def showMessage(self, mid):
	path = self.path
	conn = self.server._imapConn
	try:
	    print "@@midCache[%s]?" % mid
	    folderName, uidvalidity, uid = conn.midCache[mid]
	    res, data = conn.select(folderName, 'readonly')
	    if res <> 'OK':
		self.send_response(500, "@@bogus select: %s" % selRes[1])
	    res, data = conn.fetch(uid, '(RFC822.HEADER RFC822.TEXT)')
	    if res <> 'OK':
		self.send_response(500, "@@fetch problem: %s" % fetchRes[1])
	    self.send_response(200)
	    self.send_header("Content-type", "message/rfc822")
	    self.end_headers()
	    print "@@fetch data:", data
	    for hit in data:
		num = hit[0]
		hitP = parseList(hit[1:])
		content = hitP[1] + hitP[3]
		self.wfile.write(content)

	except KeyError:
	    self.send_response(500, "@@ need to implement cache miss")

    def showPart(self, mid, part, mediaType):
	path = self.path
	conn = self.server._imapConn
	try:
	    print "@@midCache[%s]?" % mid
	    folderName, uidvalidity, uid = conn.midCache[mid]
	    res, data = conn.select(folderName, 'readonly')
	    if res <> 'OK':
		self.send_response(500, "@@bogus select: %s" % selRes[1])
            if part: res, data = conn.fetch(uid, 'BODY[%s]' % (part,))
            else: res, data = conn.fetch(uid, 'BODY')
	    if res <> 'OK':
		self.send_response(500, "@@fetch problem: %s" % fetchRes[1])
	    self.send_response(200)
	    self.send_header("Content-type", mediaType)
	    self.end_headers()
            parts = data[0]
            bytes = parseList(parts[2:]) # skip "BODY[nn]"
            self.wfile.write(bytes)

	except KeyError:
	    self.send_response(500, "@@ need to implement cache miss")

    def listMessages(self):
	path=self.path
	params = cgi.parse_qs(path[len('/listMessages?'):])
	q = params['q'][0]
	minDate = params['min'][0]
	maxDate = params['max'][0]
        if params.has_key('formal'):
            fm = FormalFormatter(self.wfile)
        else:
            fm = InformalFormatter(self.wfile.write)
            
	def listInFolder(conn, flags, sep, folderSym,
			 fm=fm, q=q, minDate=minDate, maxDate=maxDate):
	    """this presumes folders come in reverse chron/lexical order"""
	    if `folderSym` < ("IMAP/Archive/%s" % (minDate,)):
		return -1 # stop iterating
	    if `folderSym` > ("IMAP/Archive/%s" % (maxDate,)):
		return 0 # keep going

	    selRes = conn.select(`folderSym`, 'readonly')
	    if selRes[0] == 'OK':
		uidvalidity = conn.response('UIDVALIDITY')
		print "@@ folder %s: uidvalidity %s, flags %s, recent %s" \
		      % (`folderSym`, uidvalidity,
			 conn.response('FLAGS'),
			 conn.response('RECENT'))
		try:
		    resp = conn.search(None, q)
		except conn.error:
		    fm.reportError(q)
                    return #@@reraise?
		if resp[0] == 'OK':
		    hits = resp[1][0]

		    #print "hits: %s</p>" % (`hits`,)
		    if hits:
			fetchRes = conn.fetch(string.join(string.split(hits, " "), ","), '(ENVELOPE BODYSTRUCTURE UID)')
			if fetchRes[0]=='OK':
			    details = fetchRes[1]
			    details.reverse()
			    for hit in details:
				num = hit[0]
				l = parseList(hit[1:])
				fm.startHit()
				#print "parsed hit: ", l
				mid = l[1][9][1:-1]
				uid = l[5]
				#print "midCache[%s] = %s, %s, %s" % (mid, folderSym, uidvalidity, uid)
				conn.midCache[mid] = `folderSym`, uidvalidity, uid

				fm.showEnvelope(l[1])
				fm.showBodySummary(mid, l[3])
				fm.endHit()
		else:
		    raise IOError, "@@what is going on? [%s] for folder '%s'" \
                          % (`selRes`, `folderSym`)
	    else:
		print "@@I think response [%s] is OK to ignore for folder '%s'" \
		      % (`selRes`, `folderSym`)


	#@@handle if-modified-since?
	self.send_response(200)
	self.send_header("Content-type", fm.mediaType())
	self.end_headers()
	fm.startDoc(q)

	eachFolder(self.server._imapConn, listInFolder,
		   reverseChrono, "IMAP/Archive")
	fm.endDoc()


    def homePage(self):
	form = """
  <form action="listMessages" method="GET">
   <p>timespan: <input name="min" value="2000-01"/>
<input name="max" value="2000-12"/></p>

   <p>IMAP query: <input name="q"/></p>
   <p><input name="formal" type="checkbox"/> formal</p>
   <p>examples:</p>
    <ul>
     <li><tt>HEADER From "Connolly"</tt></li>
     <li><tt>or (from "connolly" to "emiller") (from "emiller" to "connolly)</tt></li>
    </ul>
   <p>spec: <a href="http://www.faqs.org/rfcs/rfc2060.html">RFC2060</a>
   </p>
   <p><input type="submit"/>
  </form>
"""
        #@@ handle if-modified-since?
	self.send_response(200)
	self.send_header("Content-type", "text/html")
	self.end_headers()
	startHTMLDoc(self.wfile.write, "Mail thingy@@")
	self.wfile.write(form)
	endHTMLDoc(self.wfile.write, "@@RCSId of this script")


def startHTMLDoc(w, title):
    w("""
<html xmlns='http://www.w3.org/1999/xhtml'
><head><title>%s</title></head
><body
>"""
                     % (title,))

def endHTMLDoc(w, sigMeta=''):
    w("""<address
>Dan Connolly@@ <br class=''/>
%s</address
></body
></html>
"""
      % (sigMeta,))


#"""emacs python-mode got confused


class InformalFormatter:
    """Format the body of an HTTP response informally,
    i.e. for human consumption, in HTML.
    """

    def __init__(self, w):
        self._w = w


    def mediaType(self):
        return "text/html"


    def startDoc(self, q):
        write = self._w
        startHTMLDoc(write, "IMAP query: %s" % (q,))
	write("<h1>IMAP Query</h1><p>query: %s</p\n>" % (q,))
	write("<ul\n>")


    def endDoc(self):
        write = self._w

	write("</ul\n>")

        endHTMLDoc(write, "@@RCSId of this script + last modification of IMAP mailboxes checked")


    def startHit(self):
        write = self._w
        write("<li\n>")
        
    def endHit(self):
        write = self._w
        write("</li\n>")
        
    def showEnvelope(self, env):
        write = self._w

        env_date, env_subject, \
                  env_from, env_sender, env_reply_to, \
                  env_to, env_cc, env_bcc, \
                  env_in_reply_to, env_message_id = env

        mid=env_message_id[1:-1] # URI-escape this?

        #@@ what's from[1]???
        write('<a href="%s%s"><b>%s</b></a> (<a href="%s%s">http</a>)<br class=""/>%s <tt>&lt;%s@%s></tt><br />%s'
              % (MID_scheme, mid, env_subject, MID_path, mid,
                 env_from[0][0], env_from[0][2], env_from[0][3],
                 env_date) )

    def showBodySummary(self, mid, bstruct, part=[]):
        write = self._w

        if type(bstruct[0]) is type ([]): # multipart
            write('<ul\n>')
            i = 0
            while i < len(bstruct):
                if type(bstruct[i]) is type([]):
                    write("<li\n>")
                    self.showBodySummary(mid, bstruct[i], part + [i+1])
                    write("</li\n>")
                else:
                    break
                i = i + 1
            write('</ul\n>')
        else:
            print "@@bstruct: ", bstruct
            body_type, body_subtype, body_params, body_id, \
                       body_description, body_encoding, body_size = bstruct[:7]
            if body_type == 'MESSAGE': #@@ use symbols?
                self.showEnvelope(bstruct[7])
                self.showBodySummary(mid, bstruct[8], part+[1])
            else:
                partS = string.join(map(lambda(n):`n`, part), '.') # [2,3,2] => '2.3.2'
                #@@ do something with description?
                write('<a href="%spart=%s&amp;mid=%s&amp;mt=%s%%2F%s"><strong>%s</strong> [%s/%s %s bytes]</a\n>'
                      % (part_path, partS, mid, body_type, body_subtype,
                         body_description,
                         body_type, body_subtype, body_size))


    def reportError(self, q):
        write = self._w
        write("<li>@@oops... bad query'%s';<br class=''/> should not have given you a 200 OK</li\n>" % (q,))
        


class Namespace:
    """A collection of URIs witha common prefix.

    ACK: AaronSw / #rdfig
    http://cvs.plexdev.org/viewcvs/viewcvs.cgi/plex/plex/plexrdf/rdfapi.py?rev=1.6&content-type=text/vnd.viewcvs-markup
    """
    def __init__(self, nsname): self.nsname = nsname
    def __getattr__(self, lname): return self.nsname + lname
    def sym(self, lname): return self.nsname + lname

SwMail = Namespace("http://www.w3.org/2000/10/swap/pim/email@@#")

class FormalFormatter:
    """Format the results of an IMAP query formally,
    i.e. for machine consumption, in RDF.
    """

    def __init__(self, fp):
        self.sink = notation3.ToRDF(fp, 'bogus@@:')
        self._serial = 1

    def something(self):
        s = self._serial +1
        self._serial = s
        return "mid:%s@example" % (s,)
        

    def mediaType(self):
        return "text/xml" #@@cf issue rdfms-mime-type or whatever

    def startDoc(self, q):
        self.sink.startDoc()
        self.sink.bind("email", (SYMBOL, SwMail.nsname))

    def endDoc(self):
        self.sink.endDoc()

    def startHit(self):
        pass

    def endHit(self):
        pass

    def showEnvelope(self, env):
        env_date, env_subject, \
                  env_from, env_sender, env_reply_to, \
                  env_to, env_cc, env_bcc, \
                  env_in_reply_to, env_message_id = env

        mid=env_message_id[1:-1] # URI-escape this?
        subj = "mid:%s" % (mid,) #@@escaping?
        
        say = self.sink.makeStatement
        c = (SYMBOL, "bogus2@@:")
        
        say((c, (SYMBOL, SwMail.date), (SYMBOL, subj),
             (LITERAL, env_date)))

        if env_subject:
            try:
                env_subject.encode("us-ascii")
            except UnicodeError: # bogus stuff in the subject field
                pass
            else:
                say((c, (SYMBOL, SwMail.subject),  (SYMBOL, subj),
                     (LITERAL, env_subject)))

        self.showWho(c, subj, SwMail.sym('from'), env_from)
        self.showWho(c, subj, SwMail.cc, env_cc)
        self.showWho(c, subj, SwMail.to, env_to)
        self.showWho(c, subj, SwMail.bcc, env_bcc)
        
        #@@in reply to/refs, sender?

    def showWho(self, c, subj, prop, parties):
        say = self.sink.makeStatement

        for who in parties:
            whoT = self.something()
            self.sink.startAnonymous((c, (SYMBOL, prop), (SYMBOL, subj),
                                      (SYMBOL, whoT)))
            #@@what properties to use here?
            if who[0]:
                try:
                    phrase = msgHeaderExt.decode(who[0])
                except (LookupError, UnicodeError):
                    # e.g.
                    # From: "=?ks_c_5601-1987?B?wNPB9sjGXChuZXRzZ28uY29tXCk=?=" <maxim98@netsgo.com>
                    # or From: Sebastian Mu<F1>iz <sjmuniz@...>
                    # we just don't know what that phrase is
                    pass
                else:
                    say((c, (SYMBOL, SwMail.phrase), (SYMBOL, whoT),
                         (LITERAL, phrase) ))
            say((c, (SYMBOL, SwMail.addr), (SYMBOL, whoT),
                 (LITERAL, '%s@%s' % (who[2], who[3])) ))
            say((c, (SYMBOL, SwMail.mbox), (SYMBOL, whoT),
                 (SYMBOL, 'mailto:%s@%s' % (who[2], who[3])) ))
            self.sink.endAnonymous((SYMBOL, subj), (SYMBOL, prop))

    def showBodySummary(self, mid, bstruct, part=[]):
        self.sink.makeComment("body summary@@")

    def reportError(self, q):
        self.sink.makeComment("error in processing query [%s]" % q)
    
def reverseChrono(x, y):
    """ takes two (flags, sep, folder) triples """
    return cmp(`y[2]`, `x[2]`)

def cmdline_main(argv):
    """ OBSOLETE """

    # msgId, folder, hostName, portNum, userName, passwd, dir = argv[1], argv[2], argv[3], string.atoi(argv[4]), argv[5], argv[6], argv[7]
    fieldName = argv[1]
    target = argv[2]

    if argv[3:]:
	earliest = argv[3]
    else:
	earliest = None

    if argv[4:]:
	latest = argv[4]
    else:
	latest = None

    imapConn = myIMAP()
    imapConn.loginDan()

    def doFolder(imapConn, flags, sep, folder,
		 fieldName=fieldName, target=target):
	selRes = imapConn.select(`folder`, 'readonly')
	if selRes[0] == 'OK':
	    try:
		uid = findFirstByField(imapConn, fieldName, target)
		print myNSAddr(folder, uid)
		return 1 # done
	    except KeyError:
		pass
	    
	    return 0
	else:
	    print "@@", selRes, folder

    eachFolder(imapConn, doFolder, None, "IMAP/Archive")


def eachFolder(imapConn, thunk, order=None, directory='""', pattern='*'):
    """@@haven't implemented the "selectable" part yet."""

    typ, dat = imapConn.list(directory, pattern)
    typSym = intern(typ)

    if typSym is OK:
	for i in range(0, len(dat)):
	    dat[i] = parseList(('(', dat[i], ')'))
	if order:
	    print "@@unsorted: ", dat
	    dat.sort(order)
	    print "@@sorted: ", dat
	for item in dat:
	    flags, sep, folderSym = item
	    print "@@", flags, sep, folderSym
	    if thunk(imapConn, flags, sep, folderSym):
		break


def oldCode():
    # iterate backwards by day/week/month

    t = time.time()
    done = 0
    dayInSeconds = (60*60*24)
    oWeek = None
    oMonth = None

    while not done:
	t = t - dayInSeconds

	ttup = time.gmtime(t)
	year = ttup[0]
	month = ttup[1]

	# ug... convert python time tuple parts 6,7:
	# weekday range [0,6], Monday is 0
        # Julian day range [1,366]
	# to date %U format: week  number  of  year 
	# with Sunday as first day of week (00..53)

	# subtract 1 from Julian day to get 0-based dayOfYear
	# shift 0=monday to 0=sunday
	# hmm... had to add one to get it to match; is %U
	# actually 1-based somehow?
	week = ( (ttup[7] - 1) + ((ttup[6] + 1) % 7) + 6) / 7
	print "DEBUG week:", week, ttup[1], ttup[2], ttup[7], ttup[6], t

	if week == oWeek and month == oMonth:
	    continue
	else:
	    oWeek = week
	    oMonth = month

	folder = "%04d-%02d%02d" % (year, month, week)
	print "@@ folder:", folder

	if (earliest and (folder < earliest)):
	    done = 1
	    break

	if (latest and (folder > latest)):
	    continue

	folder = 'Archive/' + folder

	selRes = imapConn.select(dir + folder, 'readonly')
	print "@@selRes", selRes

	if selRes[0] == 'OK':
	    try:
		print getPointer(imapConn, fieldName, target,
				 hostName, portNum, folder)
		done = 1
	    except KeyError:
		pass


# cf mit-vpn ssh tunnel thingy
MyHostName = 'localhost'
MyPortNum = 4000
MyIMAP_dir = 'IMAP/'

class myIMAP(imaplib2.IMAP4):
    def __init__(self):
	# cf mit-vpn ssh tunnel thingy

	imaplib2.IMAP4.__init__(self, MyHostName, MyPortNum)
	self.midCache = {}

    def loginDan(self):
	userName = 'connolly'
	passwd = getpass.getpass()
	
	self.login(userName, passwd)


def findFirstByField(imapConn, fieldName, target):
    # assumes imapConn logged in, folder selected

    # @@hmm... I think I'm taking advantage of a coincidental
    # alignment of message numbers and UIDs here.
    resp = imapConn.search(None, 'HEADER %s "%s"' % (fieldName, target))
    print "@@resp", resp
    hits = resp[1]

    if hits[0]:
	return hits[0]
    else:
	raise KeyError


def myNSAddr(folderSym, uid):
    folderName = str(folderSym)
    folderPath = folderName[len(MyIMAP_dir):]

    folderSeg = urllib.quote(folderPath, "")

    return 'IMAP://%s:%d?fetch>UID>/%s>%s' \
	   % (MyHostName, MyPortNum, folderSeg, uid)

INTERNALDATE = intern('INTERNALDATE')
RFC822 = intern('INTERNALDATE')
OK = intern('OK')

def writeMessage(outFp, imapConn, num):
    typ, dat = imapConn.fetch(num, `(INTERNALDATE, RFC822)`)
    typSym = intern(typ)

    if typSym is OK:
	pdat = parseList(dat[1:])
	msg = assoc(pdat, RFC822)




if __name__ == '__main__':
    import sys
    main(sys.argv)



"""
clues...

Python 1.5.2 (#0, Apr  3 2000, 14:46:48)  [GCC 2.95.2 20000313 (Debian GNU/Linux)] on linux2
Copyright 1991-1995 Stichting Mathematisch Centrum, Amsterdam
>>> import imaplib2.py
Traceback (innermost last):
  File "<stdin>", line 1, in ?
ImportError: No module named py
>>> import imaplib2
>>> tux=imaplib2.IMAP4("localhost", 4000)
>>> tux.login('connolly', XXXX)
('OK', ['LOGIN completed'])
>>> tux.select('IMAP/Archive/1999-1248')
('OK', ['508'])
>>> tux.search('HEADER Message-ID "<Pine.LNX.4.20.9912011737130.12278-100000@tux.w3.org>"')
Traceback (innermost last):
  File "<stdin>", line 1, in ?
TypeError: not enough arguments; expected 3, got 2
>>> tux.search(None, 'HEADER Message-ID "<Pine.LNX.4.20.9912011737130.12278-100000@tux.w3.org>"')
('OK', ['177'])
>>> resp = tux.search(None, 'HEADER Message-ID "<Pine.LNX.4.20.9912011737130.12278-100000@tux.w3.org>"')
>>> envs = tux.fetch(resp[1], 'ENVELOPE')
Traceback (innermost last):
  File "<stdin>", line 1, in ?
  File "imaplib2.py", line 257, in fetch
    typ, dat = self._simple_command(name, message_set, message_parts)
  File "imaplib2.py", line 657, in _simple_command
    return self._command_complete(name, self._command(name, dat1, dat2))
  File "imaplib2.py", line 526, in _command_complete
    raise self.error('%s command error: %s %s' % (name, typ, data))
imaplib2.error: FETCH command error: BAD ['Bogus sequence in FETCH']
>>> resp[1]
['177']
>>> envs = tux.fetch('177', 'ENVELOPE')
>>> envs
('OK', [['177', '(ENVELOPE ("Wed, 1 Dec 1999 17:38:46 -0500 (EST)" "Guide form" (("Dan Brickley" NIL "danbri" "w3.org")) (("Dan Brickley" NIL "danbri" "w3.org")) (("Dan Brickley" NIL "danbri" "w3.org")) ((NIL NIL "connolly" "w3.org")) ((NIL NIL "charles" "w3.org")) NIL NIL "<Pine.LNX.4.20.9912011737130.12278-100000@tux.w3.org>"))']])


hmm... netscape IMAP: URI syntax is goofy. Some documentation:

IMAP Interoperability Feature Test Plan: Messaging Server
http://www.mozilla.org/quality/mailnews/tests/sea_mn_imap_function.html
Wed, 13 Sep 2000 23:20:11 GMT

<-

http://www.google.com/search?q=IMAP+netscape+fetch+URL

"""

# $Log: mid_proxy.py,v $
# Revision 1.7  2002/02/12 15:07:04  connolly
# picked up AaronSw's Namespace trick
# started moving email namespace into swap/pim
# handle bogus headers (with non-ascii chars)
#
# Revision 1.6  2002/02/10 04:59:10  connolly
# handle non-ascii header text, per RFC1522
#
# Revision 1.5  2002/01/26 08:14:17  connolly
# added formal output in RDF.
# still thinking about what's the right schema for
# email from/to/cc properties.
#
# Revision 1.4  2002/01/26 06:45:27  connolly
# factored out HTML-writing code, in preparation for
# RDF-writing code.
#
