"""mfurllib - extend urllib with support for Media Fragments Jack Jansen, , March 2008. Usage: import this module and call install() to install Media Fragment support into standard urllib classes and functions. You can call uninstall() to revert to the original situation. """ import urllib import urlparse import sys import re import string import postproc_ffmpeg # An exception that signals syntax errors in the media fragment class MediaFragmentError(ValueError): pass # # We keep the original classes. We will later replace these by our # augmented implementations, but we need the originals. # orig_URLopener = urllib.URLopener orig_FancyURLopener = urllib.FancyURLopener # # Helper class to add range_info support to a urlib-like extended file. # class addrangeinfo(urllib.addbase): def __init__(self, fp): urllib.addbase.__init__(self, fp) self.range_info = None def mf_range_info(self): if self.range_info == None: self.init_range_info() return self.range_info def init_range_info(self): self.range_info = {} for h in self.fp.headers: lower_h = h.lower() if h[:15] == 'content-range: ': cr = h[15:] space_pos = cr.find(' ') rinfo_name = cr[:space_pos].strip() rinfo_value = cr[space_pos:].strip() self.range_info[rinfo_name] = rinfo_value # # Classes to handle a single fragment selection axis. # class MFSelector: """Abstract base class for implementing a single axis of media fragment selection. For each axis (temporal, spatial, track) we provide a class with this signature. It handles parsing of the URL fragment, generation of http headers, post-processing, etc""" def __init__(self): print >>sys.stderr, "WARNING: Media Fragment: %s not implemented yet" % self.__class__.__name__ def parse_mf_tag_value(self, prefix, value): """Parses URL fragment value for this axis. Stores relevant values.""" pass def gen_http_request(self, opener): """Generate HTTP headers for this selector, stores them in opener.""" pass def gen_selector(self, opener): """Call apppropriate methods on opener to select requested content.""" pass def content_ranges_wanted(self): """Return a set of the HTTP Content-Range: reply headers this selector would like.""" return set() def match_info(self, info): """Compare HTTP reply headers to this selector. Returns true if no post-processing is needed.""" return False def dump(self): """Debug method: print human-readable description of selector""" pass class MFSelTime(MFSelector): # TBD This implementation currently stores begin/end times as floating point seconds. # TBD begin/end time can be independently in seconds or hh:mm:ss def __init__(self): self.begin_ss = None self.end_ss = None # Regular expressions for MF parsing PAT_TIME_NPT_CLOCK = re.compile(r"^(?:npt:)?(?P\d+(?:\.\d+)??(?:s)?)?,(?:(?P\d+(?:\.\d+)?)(?:s)?)?$") PAT_TIME_NPT_HHMMSS = re.compile(r"^(?:npt:)?(:?(?P\d+):(?P\d{2}):(?P\d{2}(?:\.\d+)?))?,(?:(?P\d+):(?P\d{2}):(?P\d{2}(?:\.\d+)?))?$") PAT_TIME_SMPTE = re.compile(r"^(?Psmpte[-a-z0-9]*):(?:(?P\d+):(?P\d{2}):(?P\d{2})(?::(?P\d{2})(?:\.(?P\d{2}))?)?)?,(?:(?P\d+):(?P\d{2}):(?P\d{2})(?::(?P\d{2})(?:\.(?P\d{2}))?)?)?$") def parse_mf_tag_value(self, prefix, value): # First check whether it is npt clock values match = self.PAT_TIME_NPT_CLOCK.match(value) if match: if match.group('begin_ss'): self.begin_ss = string.atof(match.group('begin_ss')) if match.group('end_ss'): self.end_ss = string.atof(match.group('end_ss')) return # Next check npt hh:mm:ss values match = self.PAT_TIME_NPT_HHMMSS.match(value) if match: if match.group('begin_hh'): hh = string.atoi(match.group('begin_hh')) mm = string.atoi(match.group('begin_mm')) ss = string.atof(match.group('begin_ss')) self.begin_ss = ss + 60*(mm + 60*hh) if match.group('end_hh'): hh = string.atoi(match.group('end_hh')) mm = string.atoi(match.group('end_mm')) ss = string.atof(match.group('end_ss')) self.end_ss = ss + 60*(mm + 60*hh) return # Finally check smpte values match = self.PAT_TIME_SMPTE.match(value) if match: self.begin_ss = self._smpte_to_npt( match.group('format'), match.group('begin_hh'), match.group('begin_mm'), match.group('begin_ss'), match.group('begin_ff'), match.group('begin_zz')) self.end_ss = self._smpte_to_npt( match.group('format'), match.group('end_hh'), match.group('end_mm'), match.group('end_ss'), match.group('end_ff'), match.group('end_zz')) return # Unknown format raise MediaFragmentError, ('Cannot parse Media Fragment time', value) def gen_http_request(self, opener): if self.begin_ss != None or self.end_ss != None: bss = ess = '' if self.begin_ss: bss = str(self.begin_ss) if self.end_ss: ess = str(self.end_ss) opener.addheader('Range', 'seconds=%s-%s' % (bss, ess)) def match_info(self, info): # Trivial matches: if no range has been specified if self.begin_ss == None or self.end_ss == None: return True if not 'seconds' in info: return False # Format is nnn.nn-nnn.nn/xxxx where we ignore xxxx seconds = info['seconds'] if '/' in seconds: seconds = seconds.split('/')[0] begin_info_ss, end_info_ss = seconds.split('-') # Do we need to trim from the beginning? if self.begin_ss: if begin_info_ss: begin_info_ss = string.atof(begin_info_ss) if begin_info_ss < self.begin_ss: return False else: return True else: return True if self.end_ss: if end_info_ss: end_info_ss = string.atof(end_info_ss) if end_info_ss > self.end_ss: return False else: return True else: return True return False def gen_selector(self, opener): if self.begin_ss != None or self.end_ss != None: opener.select_time(self.begin_ss, self.end_ss) def content_ranges_wanted(self): return set(['seconds']) def _smpte_to_npt(self, format, hh, mm, ss, ff, zz): print >>sys.stderr, 'SMPTE time not implemented yet' return 0 def dump(self): if self.begin_ss != None: print >>sys.stderr, 'Start timestamp:', self.begin_ss if self.end_ss != None: print >>sys.stderr, 'End timestamp:', self.end_ss class MFSelSpace(MFSelector): def __init__(self): self.aspect_w = None self.aspect_h = None self.x = self.y = self.h = self.w = None self.percent = False # Regular expressions: PAT_ASPECT = re.compile(r"^(?P\d+):(?P\d+)$") PAT_XYWH = re.compile(r"^(?:pixel:)?(?P\d+),(?P\d+),(?P\d+),(?P\d+)$") PAT_PERCENT = re.compile(r"^(?:%:)?(?P\d+),(?P\d+),(?P\d+),(?P\d+)$") def parse_mf_tag_value(self, prefix, value): if prefix == 'aspect': match = self.PAT_ASPECT.match(value) if match: self.aspect_w = string.atoi(match.group('w')) self.aspect_h = string.atoi(match.group('h')) return raise MediaFragmentError, ('Incorrect Media Fragment aspect specification', value) if prefix == 'xywh': match = self.PAT_XYWH.match(value) if match: self.x = string.atoi(match.group('x')) self.y = string.atoi(match.group('y')) self.w = string.atoi(match.group('w')) self.h = string.atoi(match.group('h')) return match = self.PAT_PERCENT.match(value) if match: self.x = string.atoi(match.group('x')) self.y = string.atoi(match.group('y')) self.w = string.atoi(match.group('w')) self.h = string.atoi(match.group('h')) self.percent = True return def gen_http_request(self, opener): if self.aspect_w != None: assert self.aspect_h != None opener.addheader('X-Jack-Spatial-Select', 'aspect %s' % ((self.aspect_w, self.aspect_h),)) if self.x != None: assert self.y != None assert self.w != None assert self.h != None if self.percent: style = 'percent' else: style = 'pixel' opener.addheader('X-Jack-Spatial-Select', '%s %s' % (style, (self.x, self.y, self.w, self.h))) def gen_selector(self, opener): assert self.aspect_w == self.aspect_h == None assert not self.percent if self.x != None: assert self.y != None assert self.w != None assert self.h != None opener.select_xywh(self.x, self.y, self.w, self.h) def match_info(self, info): if self.x == self.y == self.w == self.h == self.aspect_w == self.aspect_h == None: return True if not 'x_jack_xywh' in info: return False xywh_info = info['x_jack_xywh'] info_x, info_y, info_w, info_h = eval(xywh_info) # XXX Brrrr.... # Did the user ask for aspect? Convert to to pixels if self.aspect_w: # XXX Warning: mathematics ahead. This needs to be tested! asp_wanted = float(self.aspect_w) / float(self.aspect_h) asp_got = float(info_w) / float(info_h) if asp_wanted == asp_got: return True # It is not correct. Convert aspect request to xywh request, based on the fact # that we now know the size. if asp_wanted > asp_got: # Picture we got is narrower than wanted. Need to crop off top/bottom new_h = int(info_w * asp_wanted / asp_got) self.x = 0 self.y = (info_h - new_h + 0.5) / 2 self.w = info_w self.h = new_h else: new_w = int(info_h * asp_wanted / asp_got) self.x = (info_w - new_w + 0.5) / 2 self.y = 0 self.w = new_w self.h = new_h self.aspect_w = self.aspect_h = None return False # Next case: user asked for percentages. These we can now also convert to pixels. if self.percent: self.x = int((self.x/100.0) * info_w + 0.5) self.y = int((self.y/100.0) * info_h + 0.5) self.w = int((self.w/100.0) * info_w + 0.5) self.h = int((self.h/100.0) * info_h + 0.5) # Finally we check whether postprocessing is needed. if self.x == info_x and self.y == info_y and self.w == info_w and self.h == info_h: return True return False def content_ranges_wanted(self): return set(['x_jack_xywh']) def dump(self): if self.aspect_w != None: print >>sys.stderr, 'Aspect ratio: %d:%d' % (self.aspect_w, self.aspect_h) if self.x != None: if self.percent: print >>sys.stderr, 'Spatial crop: topleft=(%d%%, %d%%), size=(%d%%, %d%%)' % (self.x, self.y, self.w, self.h) else: print >>sys.stderr, 'Spatial crop: topleft=(%d, %d), size=(%d, %d)' % (self.x, self.y, self.w, self.h) class MFSelTrack(MFSelector): def __init__(self): self.track = None def parse_mf_tag_value(self, prefix, value): if len(value) <= 1 or value[0] != "'" or value[-1] != "'": raise MediaFragmentError, ("Media Fragment track value must be single-quoted string", value) self.track = urlparse.unquote(value[1:-1]) def gen_http_request(self, opener): if self.track != None: opener.addheader('X-Jack-Track-Select', self.track) def gen_selector(self, opener): if self.track != None: opener.select_track(self.track) def match_info(self, info): if self.track == None: return True if not 'x_jack_track' in info: return False track_info = info['x_jack_track'] if track_info == self.track: return True return False def content_ranges_wanted(self): return set(['x_jack_track']) def dump(self): if self.track != None: print >>sys.stderr, 'Track: %s' % self.track class MFSelID(MFSelector): def __init__(self): self.id = None def parse_mf_tag_value(self, prefix, value): if len(value) <= 1 or value[0] != "'" or value[-1] != "'": raise MediaFragmentError, ("Media Fragment id value must be single-quoted string", value) self.id = urlparse.unquote(value[1:-1]) def gen_http_request(self, opener): if self.id != None: opener.addheader('X-Jack-ID-Select', self.id) def gen_selector(self, opener): if self.id != None: opener.select_id(self.id) def match_info(self, info): if self.id == None: return True if not 'x_jack_id' in info: return False id_info = info['x_jack_id'] if id_info == self.id: return True return False def content_ranges_wanted(self): return set(['x_jack_id']) def dump(self): if self.id != None: print >>sys.stderr, 'ID: %s' % self.id # # Map axis names to implementation classes # AXIS_TO_CLASS = { "t" : MFSelTime, "xywh" : MFSelSpace, "aspect" : MFSelSpace, "track" : MFSelTrack, "id" : MFSelID, } class MFMixIn: """Mixin class implementing Media Fragment handling.""" # TBD This implementation is pretty unsafe: we store data in # this class, but the class may be used again for opening another URL. # Need to do urllib2-based implementation. def __init__(self): self.selectors = [] def _prepare_mf_retrieval(self, url): """Split off media fragment specifier. Parses MF specifier (if any), returns url without media fragment specifier.""" baseurl, tag = urllib.splittag(url) if tag: self.selectors = self._parse_mf_tag(tag) # # The following code is for debugging: we complain if the # tag isn't a media fragment. In a production system we # would simply continue and use the normal tag handling # if not self.selectors: raise MediaFragmentError, ('Not a Media Fragment', tag) # # Let the selectors put their information in the http # headers. # for sel in self.selectors: sel.gen_http_request(self) return baseurl return url def _parse_mf_tag(self, tag): """Parse MF tag, return list of MFSelector instances.""" # First split into (prefix, value) dictionary, checking there # are no duplicates axes = {} subtaglist = tag.split('&') for subtag in subtaglist: pv = subtag.split('=') if len(pv) != 2: raise MediaFragmentError, ('Media Fragment axis not of form name=value', subtag) prefix, value = pv if prefix in axes: # raise MediaFragmentError, ('Media Fragment axis specified twice', prefix) continue axes[prefix] = value # Test that id axis is not combined with others if 'id' in axes and len(axes) != 1: raise MediaFragmentError, ('Media Fragment id must be used standalone', tag) # Instantiate the axis selectors rv = [] for prefix, value in axes.items(): if not prefix in AXIS_TO_CLASS: raise MediaFragmentError, ('Unknown media fragment prefix', prefix) klass = AXIS_TO_CLASS[prefix] inst = klass() inst.parse_mf_tag_value(prefix, value) rv.append(inst) return rv def _wrap_mf_connection(self, fp): """Wrap connection to skip unneeded media portions. MF-aware server may return a larger temporal fragment than requested, and a non-MF-aware server always does this. This wrapper skips the unneeded data. """ # # First we check whether there is any selector that wants wrapping. # Selectors want this if they can see that the madia fragment returned # needs more processing, or if they cannot be sure yet whether more # processing is needed. # need_wrap = False wrapped_fp = addrangeinfo(fp) cr_wanted = set() info = wrapped_fp.mf_range_info() for sel in self.selectors: if not sel.match_info(info): need_wrap = True cr_wanted = cr_wanted | sel.content_ranges_wanted() print >>sys.stderr, "DEBUG: needwrap=", need_wrap if not need_wrap: # All selection has been handled by the server. # Return the connection without wrapping. return fp # # Wrapping is needed. Create the wrapper, and tell it the information # that the selectors want (so it can inspect the original data # and retrieve the needed information). # fp = postproc_ffmpeg.MFPostProc(fp) fp.ensure_content_ranges(cr_wanted) # # Now we tell the selectors everything we know about the content info, # and ask them to tell the wrapper what it should select. # info = fp.mf_range_info() for sel in self.selectors: sel.match_info(info) sel.gen_selector(fp) # # All set! Tell the post-processor to start doing its thing. # fp.process() return fp def _addheader_mf(self, key, value): print >>sys.stderr, 'MF: %s=%s' % (key, value) def open_http(self, url, data=None): """Open an HTTP url, possibly with a Media Fragment specifier""" baseurl = self._prepare_mf_retrieval(url) fp = self.open_http_nofrag(baseurl, data) newfp = self._wrap_mf_connection(fp) return newfp def open_https(self, url, data=None): """Open an HTTPS url, possibly with a Media Fragment specifier""" baseurl = self._prepare_mf_retrieval(url) fp = self.open_https_nofrag(baseurl, data) newfp = self._wrap_mf_connection(fp) return newfp def open_file(self, url, data=None): """Open a file: url, possibly with a Media Fragment specifier""" assert data == None baseurl = self._prepare_mf_retrieval(url) fp = self.open_file_nofrag(baseurl) newfp = self._wrap_mf_connection(fp) return newfp def dump(self): for sel in self.selectors: sel.dump() class MFURLopener(urllib.URLopener, MFMixIn): """Glue class to combine original URLOpener with MFMixin""" # TBD Need to do this dynamically, by looking up all open_* # methods. open_http_nofrag = orig_URLopener.open_http open_https_nofrag = orig_URLopener.open_https open_file_nofrag = orig_URLopener.open_file open_http = MFMixIn.open_http open_https = MFMixIn.open_https open_file = MFMixIn.open_file def __init__(self, proxies=None, **x509): orig_URLopener.__init__(self, proxies, **x509) MFMixIn.__init__(self) class MFFancyURLopener(urllib.FancyURLopener, MFMixIn): """Glue class to combine original URLOpener with MFMixin""" # TBD Need to do this dynamically, by looking up all open_* # methods. open_http_nofrag = orig_URLopener.open_http open_https_nofrag = orig_URLopener.open_https open_file_nofrag = orig_URLopener.open_file open_http = MFMixIn.open_http open_https = MFMixIn.open_https open_file = MFMixIn.open_file def __init__(self, proxies=None, **x509): orig_URLopener.__init__(self, proxies, **x509) MFMixIn.__init__(self) def install(): urllib.URLopener = MFURLopener urllib.FancyURLopener = MFFancyURLopener def uninstall(): urllib.URLopener = orig_URLopener urllib.FancyURLopener = orig_FancyURLopener def main(): install() if sys.argv[1] == '--dump': for url in sys.argv[2:]: print >>sys.stderr, 'Opening %s:' % url conn = urllib.urlopen(url) urllib._urlopener.dump() else: urllib.main() if __name__ == "__main__": main()