"""mfurllib - extend urllib with support for Media Fragments
Jack Jansen, <Jack.Jansen@cwi.nl>, March 2008.

Usage: import this module and call install() to install Media 
Fragment support into standard urllib classes and functions.

You can call uninstall() to revert to the original situation.
"""

import urllib
import urlparse
import sys
import re
import string
import postproc_ffmpeg

# An exception that signals syntax errors in the media fragment
class MediaFragmentError(ValueError):
    pass
    
#
# We keep the original classes. We will later replace these by our
# augmented implementations, but we need the originals.
#
orig_URLopener = urllib.URLopener
orig_FancyURLopener = urllib.FancyURLopener

#
# Helper class to add range_info support to a urlib-like extended file.
#
class addrangeinfo(urllib.addbase):
    
    def __init__(self, fp):
        urllib.addbase.__init__(self, fp)
        self.range_info = None
        
    def mf_range_info(self):
        if self.range_info == None:
            self.init_range_info()
        return self.range_info
        
    def init_range_info(self):
        self.range_info = {}
        for h in self.fp.headers:
            lower_h = h.lower()
            if h[:15] == 'content-range: ':
                cr = h[15:]
                space_pos = cr.find(' ')
                rinfo_name = cr[:space_pos].strip()
                rinfo_value = cr[space_pos:].strip()
                self.range_info[rinfo_name] = rinfo_value
#
# Classes to handle a single fragment selection axis.
#
class MFSelector:
    """Abstract base class for implementing a single axis of media fragment selection.
    
    For each axis (temporal, spatial, track) we provide a class with this
    signature. It handles parsing of the URL fragment, generation of http
    headers, post-processing, etc"""
    
    def __init__(self):
        print >>sys.stderr, "WARNING: Media Fragment: %s not implemented yet" % self.__class__.__name__
        
    def parse_mf_tag_value(self, prefix, value):
        """Parses URL fragment value for this axis. Stores relevant values."""
        pass
        
    def gen_http_request(self, opener):
        """Generate HTTP headers for this selector, stores them in opener."""
        pass
        
    def gen_selector(self, opener):
        """Call apppropriate methods on opener to select requested content."""
        pass
        
    def content_ranges_wanted(self):
        """Return a set of the HTTP Content-Range: reply headers this selector would like."""
        return set()
        
    def match_info(self, info):
        """Compare HTTP reply headers to this selector.
        
        Returns true if no post-processing is needed."""
        return False
        
    def dump(self):
        """Debug method: print human-readable description of selector"""
        pass
        
class MFSelTime(MFSelector):
    # TBD This implementation currently stores begin/end times as floating point seconds.
    # TBD begin/end time can be independently in seconds or hh:mm:ss
    
    def __init__(self):
        self.begin_ss = None
        self.end_ss = None
        
    # Regular expressions for MF parsing
    PAT_TIME_NPT_CLOCK = re.compile(r"^(?:npt:)?(?P<begin_ss>\d+(?:\.\d+)??(?:s)?)?,(?:(?P<end_ss>\d+(?:\.\d+)?)(?:s)?)?$")
    PAT_TIME_NPT_HHMMSS = re.compile(r"^(?:npt:)?(:?(?P<begin_hh>\d+):(?P<begin_mm>\d{2}):(?P<begin_ss>\d{2}(?:\.\d+)?))?,(?:(?P<end_hh>\d+):(?P<end_mm>\d{2}):(?P<end_ss>\d{2}(?:\.\d+)?))?$")
    PAT_TIME_SMPTE = re.compile(r"^(?P<format>smpte[-a-z0-9]*):(?:(?P<begin_hh>\d+):(?P<begin_mm>\d{2}):(?P<begin_ss>\d{2})(?::(?P<begin_ff>\d{2})(?:\.(?P<begin_zz>\d{2}))?)?)?,(?:(?P<end_hh>\d+):(?P<end_mm>\d{2}):(?P<end_ss>\d{2})(?::(?P<end_ff>\d{2})(?:\.(?P<end_zz>\d{2}))?)?)?$")
    
    def parse_mf_tag_value(self, prefix, value):
        # First check whether it is npt clock values
        match = self.PAT_TIME_NPT_CLOCK.match(value)
        if match:
            if match.group('begin_ss'):
                self.begin_ss = string.atof(match.group('begin_ss'))
            if match.group('end_ss'):
                self.end_ss = string.atof(match.group('end_ss'))
            return
        
        # Next check npt hh:mm:ss values
        match = self.PAT_TIME_NPT_HHMMSS.match(value)
        if match:
            if match.group('begin_hh'):
                hh = string.atoi(match.group('begin_hh'))
                mm = string.atoi(match.group('begin_mm'))
                ss = string.atof(match.group('begin_ss'))
                self.begin_ss = ss + 60*(mm + 60*hh)
            if match.group('end_hh'):
                hh = string.atoi(match.group('end_hh'))
                mm = string.atoi(match.group('end_mm'))
                ss = string.atof(match.group('end_ss'))
                self.end_ss = ss + 60*(mm + 60*hh)
            return
            
        # Finally check smpte values
        match = self.PAT_TIME_SMPTE.match(value)
        if match:
            self.begin_ss = self._smpte_to_npt(
                match.group('format'),
                match.group('begin_hh'),
                match.group('begin_mm'),
                match.group('begin_ss'),
                match.group('begin_ff'),
                match.group('begin_zz'))
            self.end_ss = self._smpte_to_npt(
                match.group('format'),
                match.group('end_hh'),
                match.group('end_mm'),
                match.group('end_ss'),
                match.group('end_ff'),
                match.group('end_zz'))
            return
        
        # Unknown format
        raise MediaFragmentError, ('Cannot parse Media Fragment time', value)
        
    def gen_http_request(self, opener):
        if self.begin_ss != None or self.end_ss != None:
            bss = ess = ''
            if self.begin_ss:
                bss = str(self.begin_ss)
            if self.end_ss:
                ess = str(self.end_ss)
            opener.addheader('Range', 'seconds=%s-%s' % (bss, ess))
    
    def match_info(self, info):
        # Trivial matches: if no range has been specified
        if self.begin_ss == None or self.end_ss == None:
            return True
        if not 'seconds' in info:
            return False
        # Format is nnn.nn-nnn.nn/xxxx where we ignore xxxx
        seconds = info['seconds']
        if '/' in seconds:
            seconds = seconds.split('/')[0]
        begin_info_ss, end_info_ss = seconds.split('-')
        # Do we need to trim from the beginning?
        if self.begin_ss:
            if begin_info_ss:
                begin_info_ss = string.atof(begin_info_ss)
                if begin_info_ss < self.begin_ss:
                    return False
                else:
                    return True
            else:
                return True
        if self.end_ss:
            if end_info_ss:
                end_info_ss = string.atof(end_info_ss)
                if end_info_ss > self.end_ss:
                    return False
                else:
                    return True
            else:
                return True
        return False
    
    def gen_selector(self, opener):
        if self.begin_ss != None or self.end_ss != None:
            opener.select_time(self.begin_ss, self.end_ss)
            
    def content_ranges_wanted(self):
        return set(['seconds'])
        
    def _smpte_to_npt(self, format, hh, mm, ss, ff, zz):
        print >>sys.stderr, 'SMPTE time not implemented yet'
        return 0
        
    def dump(self):
        if self.begin_ss != None:
            print >>sys.stderr, 'Start timestamp:', self.begin_ss
        if self.end_ss != None:
            print >>sys.stderr, 'End timestamp:', self.end_ss
        
class MFSelSpace(MFSelector):

    def __init__(self):
        self.aspect_w = None
        self.aspect_h = None
        self.x = self.y = self.h = self.w = None
        self.percent = False

    # Regular expressions:
    
    PAT_ASPECT = re.compile(r"^(?P<w>\d+):(?P<h>\d+)$")
    PAT_XYWH = re.compile(r"^(?:pixel:)?(?P<x>\d+),(?P<y>\d+),(?P<w>\d+),(?P<h>\d+)$")
    PAT_PERCENT = re.compile(r"^(?:%:)?(?P<x>\d+),(?P<y>\d+),(?P<w>\d+),(?P<h>\d+)$")
    
    def parse_mf_tag_value(self, prefix, value):
        if prefix == 'aspect':
            match = self.PAT_ASPECT.match(value)
            if match:
                self.aspect_w = string.atoi(match.group('w'))
                self.aspect_h = string.atoi(match.group('h'))
                return
            raise MediaFragmentError, ('Incorrect Media Fragment aspect specification', value)
        if prefix == 'xywh':
            match = self.PAT_XYWH.match(value)
            if match:
                self.x = string.atoi(match.group('x'))
                self.y = string.atoi(match.group('y'))
                self.w = string.atoi(match.group('w'))
                self.h = string.atoi(match.group('h'))
                return
            match = self.PAT_PERCENT.match(value)
            if match:
                self.x = string.atoi(match.group('x'))
                self.y = string.atoi(match.group('y'))
                self.w = string.atoi(match.group('w'))
                self.h = string.atoi(match.group('h'))
                self.percent = True
                return
        
    def gen_http_request(self, opener):
        if self.aspect_w != None:
            assert self.aspect_h != None
            opener.addheader('X-Jack-Spatial-Select', 'aspect %s' % ((self.aspect_w, self.aspect_h),))
        if self.x != None:
            assert self.y != None
            assert self.w != None
            assert self.h != None
            if self.percent:
                style = 'percent'
            else:
                style = 'pixel'
            opener.addheader('X-Jack-Spatial-Select', '%s %s' % (style, (self.x, self.y, self.w, self.h)))
    
    def gen_selector(self, opener):
        assert self.aspect_w == self.aspect_h == None
        assert not self.percent
        if self.x != None:
            assert self.y != None
            assert self.w != None
            assert self.h != None
            opener.select_xywh(self.x, self.y, self.w, self.h)
            
    def match_info(self, info):
        if self.x == self.y == self.w == self.h == self.aspect_w == self.aspect_h == None:
            return True
        if not 'x_jack_xywh' in info:
            return False
        xywh_info = info['x_jack_xywh']
        info_x, info_y, info_w, info_h = eval(xywh_info) # XXX Brrrr....
        # Did the user ask for aspect? Convert to to pixels
        if self.aspect_w:
            # XXX Warning: mathematics ahead. This needs to be tested!
            asp_wanted = float(self.aspect_w) / float(self.aspect_h)
            asp_got = float(info_w) / float(info_h)
            if asp_wanted == asp_got:
                return True
            # It is not correct. Convert aspect request to xywh request, based on the fact
            # that we now know the size.
            if asp_wanted > asp_got:
                # Picture we got is narrower than wanted. Need to crop off top/bottom
                new_h = int(info_w * asp_wanted / asp_got)
                self.x = 0
                self.y = (info_h - new_h + 0.5) / 2
                self.w = info_w
                self.h = new_h
            else:
                new_w = int(info_h * asp_wanted / asp_got)
                self.x = (info_w - new_w + 0.5) / 2
                self.y = 0
                self.w = new_w
                self.h = new_h
            self.aspect_w = self.aspect_h = None
            return False
                
        # Next case: user asked for percentages. These we can now also convert to pixels.
        if self.percent:
            self.x = int((self.x/100.0) * info_w + 0.5)
            self.y = int((self.y/100.0) * info_h + 0.5)
            self.w = int((self.w/100.0) * info_w + 0.5)
            self.h = int((self.h/100.0) * info_h + 0.5)
        # Finally we check whether postprocessing is needed.
        if self.x == info_x and self.y == info_y and self.w == info_w and self.h == info_h:
            return True
        return False

    def content_ranges_wanted(self):
        return set(['x_jack_xywh'])

    def dump(self):
        if self.aspect_w != None:
            print >>sys.stderr, 'Aspect ratio: %d:%d' % (self.aspect_w, self.aspect_h)
        if self.x != None:
            if self.percent:
                print >>sys.stderr, 'Spatial crop: topleft=(%d%%, %d%%), size=(%d%%, %d%%)' % (self.x, self.y, self.w, self.h)
            else:
                print >>sys.stderr, 'Spatial crop: topleft=(%d, %d), size=(%d, %d)' % (self.x, self.y, self.w, self.h)
    
class MFSelTrack(MFSelector):
    def __init__(self):
        self.track = None
        
    def parse_mf_tag_value(self, prefix, value):
        if len(value) <= 1 or value[0] != "'" or value[-1] != "'":
            raise MediaFragmentError, ("Media Fragment track value must be single-quoted string", value)
        self.track = urlparse.unquote(value[1:-1])
        
    def gen_http_request(self, opener):
        if self.track != None:
            opener.addheader('X-Jack-Track-Select', self.track)
            
    def gen_selector(self, opener):
        if self.track != None:
            opener.select_track(self.track)
            
    def match_info(self, info):
        if self.track == None:
            return True
        if not 'x_jack_track' in info:
            return False
        track_info = info['x_jack_track']
        if track_info == self.track:
            return True
        return False
        
    def content_ranges_wanted(self):
        return set(['x_jack_track'])

    def dump(self):
        if self.track != None:
            print >>sys.stderr, 'Track: %s' % self.track
    
class MFSelID(MFSelector):

    def __init__(self):
        self.id = None
        
    def parse_mf_tag_value(self, prefix, value):
        if len(value) <= 1 or value[0] != "'" or value[-1] != "'":
            raise MediaFragmentError, ("Media Fragment id value must be single-quoted string", value)
        self.id = urlparse.unquote(value[1:-1])
        
    def gen_http_request(self, opener):
        if self.id != None:
            opener.addheader('X-Jack-ID-Select', self.id)
            
    def gen_selector(self, opener):
        if self.id != None:
            opener.select_id(self.id)
            
    def match_info(self, info):
        if self.id == None:
            return True
        if not 'x_jack_id' in info:
            return False
        id_info = info['x_jack_id']
        if id_info == self.id:
            return True
        return False
        
    def content_ranges_wanted(self):
        return set(['x_jack_id'])

    def dump(self):
        if self.id != None:
            print >>sys.stderr, 'ID: %s' % self.id
    
#
# Map axis names to implementation classes
#
AXIS_TO_CLASS = {
    "t" : MFSelTime,
    "xywh" : MFSelSpace,
    "aspect" : MFSelSpace,
    "track" : MFSelTrack,
    "id" : MFSelID,
}

class MFMixIn:
    """Mixin class implementing Media Fragment handling."""
    # TBD This implementation is pretty unsafe: we store data in
    # this class, but the class may be used again for opening another URL.
    # Need to do urllib2-based implementation.

    def __init__(self):
        self.selectors = []
        
    def _prepare_mf_retrieval(self, url):
        """Split off media fragment specifier.
        
        Parses MF specifier (if any), returns url without
        media fragment specifier."""
        baseurl, tag = urllib.splittag(url)
        if tag:
            self.selectors = self._parse_mf_tag(tag)
            
            #
            # The following code is for debugging: we complain if the
            # tag isn't a media fragment. In a production system we
            # would simply continue and use the normal tag handling
            #
            if not self.selectors:
                raise MediaFragmentError, ('Not a Media Fragment', tag)
            #
            # Let the selectors put their information in the http
            # headers.
            #
            for sel in self.selectors:
                sel.gen_http_request(self)
            return baseurl
        return url
                
    def _parse_mf_tag(self, tag):
        """Parse MF tag, return list of MFSelector instances."""
        # First split into (prefix, value) dictionary, checking there
        # are no duplicates
        axes = {}
        subtaglist = tag.split('&')
        for subtag in subtaglist:
            pv = subtag.split('=')
            if len(pv) != 2:
                raise MediaFragmentError, ('Media Fragment axis not of form name=value', subtag)
            prefix, value = pv
            if prefix in axes:
                # raise MediaFragmentError, ('Media Fragment axis specified twice', prefix)
                continue
            axes[prefix] = value
            
        # Test that id axis is not combined with others
        if 'id' in axes and len(axes) != 1:
            raise MediaFragmentError, ('Media Fragment id must be used standalone', tag)
            
        # Instantiate the axis selectors
        rv = []
        for prefix, value in axes.items():
            if not prefix in AXIS_TO_CLASS:
                raise MediaFragmentError, ('Unknown media fragment prefix', prefix)
            klass = AXIS_TO_CLASS[prefix]
            inst = klass()
            inst.parse_mf_tag_value(prefix, value)
            rv.append(inst)
        
        return rv

    def _wrap_mf_connection(self, fp):
        """Wrap connection to skip unneeded media portions.
        
        MF-aware server may return a larger temporal fragment than
        requested, and a non-MF-aware server always does this.
        This wrapper skips the unneeded data.
        """
        #
        # First we check whether there is any selector that wants wrapping.
        # Selectors want this if they can see that the madia fragment returned
        # needs more processing, or if they cannot be sure yet whether more
        # processing is needed.
        #
        need_wrap = False
        wrapped_fp = addrangeinfo(fp)
        cr_wanted = set()
        info = wrapped_fp.mf_range_info()
        for sel in self.selectors:
            if not sel.match_info(info):
                need_wrap = True
                cr_wanted = cr_wanted | sel.content_ranges_wanted()
        print >>sys.stderr, "DEBUG: needwrap=", need_wrap
        if not need_wrap:
            # All selection has been handled by the server.
            # Return the connection without wrapping.
            return fp
        #
        # Wrapping is needed. Create the wrapper, and tell it the information
        # that the selectors want (so it can inspect the original data
        # and retrieve the needed information).
        #
        fp = postproc_ffmpeg.MFPostProc(fp)
        fp.ensure_content_ranges(cr_wanted)
        #
        # Now we tell the selectors everything we know about the content info,
        # and ask them to tell the wrapper what it should select.
        #
        info = fp.mf_range_info()
        for sel in self.selectors:
            sel.match_info(info)
            sel.gen_selector(fp)
        #
        # All set! Tell the post-processor to start doing its thing.
        #
        fp.process()
                    
        return fp
        
    def _addheader_mf(self, key, value):
        print >>sys.stderr, 'MF: %s=%s' % (key, value)
        
    def open_http(self, url, data=None):
        """Open an HTTP url, possibly with a Media Fragment specifier"""
        baseurl = self._prepare_mf_retrieval(url)
        fp = self.open_http_nofrag(baseurl, data)
        newfp = self._wrap_mf_connection(fp)
        return newfp

    def open_https(self, url, data=None):
        """Open an HTTPS url, possibly with a Media Fragment specifier"""
        baseurl = self._prepare_mf_retrieval(url)
        fp = self.open_https_nofrag(baseurl, data)
        newfp = self._wrap_mf_connection(fp)
        return newfp

    def open_file(self, url, data=None):
        """Open a file: url, possibly with a Media Fragment specifier"""
        assert data == None
        baseurl = self._prepare_mf_retrieval(url)
        fp = self.open_file_nofrag(baseurl)
        newfp = self._wrap_mf_connection(fp)
        return newfp
        
    def dump(self):
        for sel in self.selectors:
            sel.dump()
        
class MFURLopener(urllib.URLopener, MFMixIn):
    """Glue class to combine original URLOpener with MFMixin"""
    # TBD Need to do this dynamically, by looking up all open_*
    # methods.
    
    open_http_nofrag = orig_URLopener.open_http
    open_https_nofrag = orig_URLopener.open_https
    open_file_nofrag = orig_URLopener.open_file
    open_http = MFMixIn.open_http
    open_https = MFMixIn.open_https
    open_file = MFMixIn.open_file

    def __init__(self, proxies=None, **x509):
        orig_URLopener.__init__(self, proxies, **x509)
        MFMixIn.__init__(self)

class MFFancyURLopener(urllib.FancyURLopener, MFMixIn):
    """Glue class to combine original URLOpener with MFMixin"""
    # TBD Need to do this dynamically, by looking up all open_*
    # methods.

    open_http_nofrag = orig_URLopener.open_http
    open_https_nofrag = orig_URLopener.open_https
    open_file_nofrag = orig_URLopener.open_file
    open_http = MFMixIn.open_http
    open_https = MFMixIn.open_https
    open_file = MFMixIn.open_file

    def __init__(self, proxies=None, **x509):
        orig_URLopener.__init__(self, proxies, **x509)
        MFMixIn.__init__(self)

def install():
    urllib.URLopener = MFURLopener
    urllib.FancyURLopener = MFFancyURLopener
    
def uninstall():
    urllib.URLopener = orig_URLopener
    urllib.FancyURLopener = orig_FancyURLopener
    
def main():
    install()
    if sys.argv[1] == '--dump':
        for url in sys.argv[2:]:
            print >>sys.stderr, 'Opening %s:' % url
            conn = urllib.urlopen(url)
            urllib._urlopener.dump()
    else:
        urllib.main()
    
if __name__ == "__main__":
    main()
    
    