﻿#!/usr/bin/python
# -*- coding: utf-8 -*-
# counting the top 20 posters of a mailing list when given a mbx file
# 
#
#   You are welcome to edit this program and modify it if you say
#   what you have done here and send a copy of your comments to
#   karl@w3.org cc: w3t-archive+qareview@w3.org
#   

__author__ = "Karl Dubost <karl@w3.org>"
__copyright__ = "Copyright (c) 2006 W3C"
__license__ = "W3C Software License"
__version__ = "0.2"
__cvsversion__ = "$Revision: 1.4 $"
__date__ = "$Date: 2008/06/18 11:44:02 $"

import mailbox
import email
import email.Utils
from  operator import itemgetter
from optparse import OptionParser
from email.header import decode_header
import pprint

addresslist=[]

class groupby(dict):
   def __init__(self, seq, key=lambda x:x):
      for value in seq:
         k = key(value)
         self.setdefault(k, []).append(value)
   __iter__ = dict.iteritems

def realname(addr):
   realname, email_address = email.Utils.parseaddr(addr)
   return realname, email_address

def stats(mailboxfile, addresslist):
   """create stats on the file
   """
   for msg in mailbox.mbox(mailboxfile):
      name, emailadd = realname(msg.get("From"))
      name = getheader(name)
      if name == "":
         name = emailadd
      addresslist.append((name, emailadd))
   return addresslist

def getheader(header_text, default="ascii"):
    """Decode the specified header"""
    headers = decode_header(header_text)
    header_sections = [unicode(text, charset or default) for text, charset in headers]
    return u"".join(header_sections)
    
def main():   
   usage = "%prog [options] -f mailarchive_file\n\n"

   parser = OptionParser(usage=usage)
   parser.add_option("-f", "--file", type="string", dest="filename", help="Input filename")
   (options, args) = parser.parse_args()
   mailboxfile=options.filename
   result = stats(mailboxfile, addresslist)
   totalmail = len(result)
   fulllist = []
   for k, g in groupby(result):
      profile = {'nb':len(g),  'nom':k[0], 'email':k[1]}
      fulllist.append(profile)
   sortedlist = sorted(fulllist, key=itemgetter('nb'))
   sortedlist.reverse()
   for item in sortedlist[0:20]:
      percentmail = int(round(float(item['nb'])/float(totalmail)*100.))
      print "%3d"%percentmail,"%","%(nb)4d   %(nom)s"%item
   print "-------------------------------"
   print "Total nb of mails: ",totalmail
   print "Total nb of contributors: ",len(sortedlist)

if __name__ == "__main__":
   main()
   

