/*
 * app_multiplexed.c -- creates a multiplexed document for XHTML-Print UA
 *
 * References:
 *  Applendix B.2 MIME type Application/Vnd.pwg-multiplexed
 *   http://www.ietf.org/rfc/rfc3391.txt
 *   http://www.w3.org/TR/xhtml-print/
 *
 * Copyright (c) 2004 Hewlett-Packard Development Company, L.P. 
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */


#include <stdlib.h>
#if defined (_WIN32)
#include <io.h>
#endif
#include <fcntl.h>
#include <stdio.h>
#include <string.h>
#include <assert.h>
#include <stddef.h>
#include <ctype.h>

#define TRUE  1
#define FALSE 0

/* type of urls */
typedef enum {
  URL_abs_e,   	/* complete */
  URL_rel_e,	/* needs base */
  URL_frag_e,	/* portion */
  URL_any_e,	/* all of the above ? */
  URL_illegal_e	/* invalid */
} URL_type_t;

/* url manipulation functions */

/* convert a potentially escaped URL to plain text */
int URL_convert_to_plain (const unsigned char *escaped_p,
			  unsigned char *plain_p,
			  int max_size);
/* check the type of the url */
URL_type_t URL_check_url (const unsigned char *url_p);

/* Combine base (base_p) and relative (relative_p) URLs into url_p.*/
int URL_combine_abs_and_rel (const unsigned char *base_p,
			     const unsigned char *relative_p,
			     unsigned char *url_p,
			     int max_size);

/* Extract base URL from url_p (plain) */
int URL_extract_base (const unsigned char *url_p,
		      unsigned char *base_p, 
		      int max_size);


#ifndef MIN
#define MIN(a,b) (((a) <= (b)) ? (a) : (b)) 
#endif

static int encodeHeaders = FALSE;
static int OneObj = FALSE;
static int ForceBeforeRoot = FALSE;
static char *defaultBaseP;
static int reuseMsgNumbers = FALSE;
static char *Content_Disposition = NULL;

static int ChunkSize = 1024;

static void scan_xhtml_file (void);
static void scan_css_file (void);
static void scan_jpeg_file (void);

/* double-linked list node */
typedef struct list_node_st {
  struct list_node_st *prev_p;
  struct list_node_st *next_p;
  void * data;  /* allow list to hold any data object */
} list_node_t;

/* initialize list */
void list_init(list_node_t* lst_p)
{
  lst_p->prev_p = lst_p;
  lst_p->next_p = lst_p;
}

/* insert node before another */
void list_insert_before(list_node_t* node_p, list_node_t* new_p)
{
  new_p->next_p = node_p;
  new_p->prev_p = node_p->prev_p;
  new_p->prev_p->next_p = new_p;
  node_p->prev_p = new_p;
}

/* remove node */
list_node_t* list_unlink(list_node_t* node_p)
{
  node_p->next_p->prev_p = node_p->prev_p;
  node_p->prev_p->next_p = node_p->next_p;
  node_p->prev_p = node_p->next_p = node_p;
  return (node_p);
}
/* remove a node from the front of the list */
list_node_t* list_remove_front(list_node_t* lst_p)
{
  assert(lst_p->prev_p != lst_p);
  return (list_unlink(lst_p->next_p));
}

/* -------------------------------------- */
/* list of referenced objects */
typedef struct refobj_st {
  struct list_node_st children;
  struct refobj_st *parent_obj_p;
  int parent_offs;
  const struct extension_st *ext_p;	/* how to manage this object */
  const char *url_p;			/* url of object*/
  const char *type_p;
  int scanned;				/* has been scanned ? */
  int root;				/* is root doc ? */
  unsigned char *file_p;		/* the contents */
  unsigned int file_len;		/* size of content */
  char *content_hdr_p;
  int content_hdr_len;			/* size of the MIME doc content header */
  int id;				/* obj id (used in the chunk header */
  int msg;
  int output_offs;
  int img_hdr_size;
} refobj_t;
/* the list of referenced objects */
list_node_t refobjs;

typedef struct interleave_group_st {
  /* unique members */
  char **urls_p;
  int urls_cnt;
} interleave_group_t;
/* a list of interleaved chunks? */
list_node_t interleave_groups;

/* a message chunk */
typedef struct chunk_node_st {
  int parent_offs;
  int length;
} chunk_node_t;

/* a squence of chunks */
typedef struct chunk_seq_st {
  refobj_t *obj_p;
  int chunks_cnt;
  chunk_node_t *chunks_p;
  int output_idx;
  int output_offs;
} chunk_seq_t;

/* prototypes to chucking functions */
static int image_get_parent_offs (refobj_t *obj_p);
static int image_get_chunk_size (refobj_t *obj_p);
static void image_get_chunk_seq (refobj_t *obj_p, chunk_seq_t *seq_p);

/* how to scan and chunk a particular type of file */
typedef struct extension_st {
  const char *ext_p;	/* file extension: content hint */
  const char *type_p;	/* MIME content type,  see http://www.ietf.org/rfc/rfc1341.txt */
  void (*scan)(void);   /* scanning function */
  /* chunking functions */
  void (*get_chunk_seq)(refobj_t *obj_p, chunk_seq_t *seq_p); 
  int (*get_parent_offs)(refobj_t *obj_p);
  int (*get_chunk_size)(refobj_t *obj_p);
} extension_t;

/* MIME content types, see http://www.ietf.org/rfc/rfc1341.txt */
#define TYPE_TEXT "text/plain"
#define TYPE_CSS "text/css"
#define TYPE_XHTML "application/vnd.pwg-xhtml-print+xml"
#define TYPE_JPEG "image/jpeg"
#define TYPE_GIF "image/gif"
#define TYPE_PNG "image/png"
#define TYPE_MULTIPLEXED "application/vnd.pwg-multiplexed" /* http://www.ietf.org/rfc/rfc3391.txt */

static const extension_t extensions[] =
{
  { ".xhtml;.html;.htm;.HTM;.HTML;.XHTML", TYPE_XHTML, scan_xhtml_file, NULL, NULL, NULL },
  { ".css;.CSS", TYPE_CSS, scan_css_file, NULL, NULL, NULL },
  { ".jpg;.jpeg;.JPG;.JPEG", TYPE_JPEG, scan_jpeg_file, image_get_chunk_seq, image_get_parent_offs, image_get_chunk_size },
  { ".gif;.GIF", TYPE_GIF, NULL, image_get_chunk_seq, image_get_parent_offs, image_get_chunk_size },
  { ".png;.PNG", TYPE_PNG, NULL, image_get_chunk_seq, image_get_parent_offs, image_get_chunk_size },
  { ".txt;.TXT", TYPE_TEXT, NULL, NULL, NULL, NULL },
  { NULL, TYPE_TEXT, NULL, NULL, NULL, NULL }
};

char *CharTranslateTbl[] = {
	"=00",  /* U+0000 <control> */
	"=01",  /* U+0001 <control> */
	"=02",  /* U+0002 <control> */
	"=03",  /* U+0003 <control> */
	"=04",  /* U+0004 <control> */
	"=05",  /* U+0005 <control> */
	"=06",  /* U+0006 <control> */
	"=07",  /* U+0007 <control> */
	"=08",  /* U+0008 <control> */
	"=09",  /* U+0009 <control> */
	"=0A",  /* U+000A <control> */
	"=0B",  /* U+000B <control> */
	"=0C",  /* U+000C <control> */
	"=0D",  /* U+000D <control> */
	"=0E",  /* U+000E <control> */
	"=0F",  /* U+000F <control> */
	"=10",  /* U+0010 <control> */
	"=11",  /* U+0011 <control> */
	"=12",  /* U+0012 <control> */
	"=13",  /* U+0013 <control> */
	"=14",  /* U+0014 <control> */
	"=15",  /* U+0015 <control> */
	"=16",  /* U+0016 <control> */
	"=17",  /* U+0017 <control> */
	"=18",  /* U+0018 <control> */
	"=19",  /* U+0019 <control> */
	"=1A",  /* U+001A <control> */
	"=1B",  /* U+001B <control> */
	"=1C",  /* U+001C <control> */
	"=1D",  /* U+001D <control> */
	"=1E",  /* U+001E <control> */
	"=1F",  /* U+001F <control> */
	"%20",  /* U+0020 SPACE */
	"=21",  /* U+0021 EXCLAMATION MARK */
	"=22",  /* U+0022 QUOTATION MARK */
	"%23",  /* U+0023 NUMBER SIGN */
	"=24",  /* U+0024 DOLLAR SIGN */
	"%25",  /* U+0025 PERCENT SIGN */
	"%26",  /* U+0026 AMPERSAND */
	"%27",  /* U+0027 APOSTROPHE */
	"=28",  /* U+0028 LEFT PARENTHESIS */
	"=29",  /* U+0029 RIGHT PARENTHESIS */
	"=2A",  /* U+002A ASTERISK */
	"=2B",  /* U+002B PLUS SIGN */
	"=2C",  /* U+002C COMMA */
	"=2D",  /* U+002D HYPHEN-MINUS */
	"=2E",  /* U+002E FULL STOP */
	"=2F",  /* U+002F SOLIDUS */
	"=30",  /* U+0030 DIGIT ZERO */
	"=31",  /* U+0031 DIGIT ONE */
	"=32",  /* U+0032 DIGIT TWO */
	"=33",  /* U+0033 DIGIT THREE */
	"=34",  /* U+0034 DIGIT FOUR */
	"=35",  /* U+0035 DIGIT FIVE */
	"=36",  /* U+0036 DIGIT SIX */
	"=37",  /* U+0037 DIGIT SEVEN */
	"=38",  /* U+0038 DIGIT EIGHT */
	"=39",  /* U+0039 DIGIT NINE */
	"=3A",  /* U+003A COLON */
	"=3B",  /* U+003B SEMICOLON */
	"=3C",  /* U+003C LESS-THAN SIGN */
	"=3D",  /* U+003D EQUALS SIGN */
	"=3E",  /* U+003E GREATER-THAN SIGN */
	"=3F",  /* U+003F QUESTION MARK */
	"=40",  /* U+0040 COMMERCIAL AT */
	"=41",  /* U+0041 LATIN CAPITAL LETTER A */
	"=42",  /* U+0042 LATIN CAPITAL LETTER B */
	"=43",  /* U+0043 LATIN CAPITAL LETTER C */
	"=44",  /* U+0044 LATIN CAPITAL LETTER D */
	"=45",  /* U+0045 LATIN CAPITAL LETTER E */
	"=46",  /* U+0046 LATIN CAPITAL LETTER F */
	"=47",  /* U+0047 LATIN CAPITAL LETTER G */
	"=48",  /* U+0048 LATIN CAPITAL LETTER H */
	"=49",  /* U+0049 LATIN CAPITAL LETTER I */
	"=4A",  /* U+004A LATIN CAPITAL LETTER J */
	"=4B",  /* U+004B LATIN CAPITAL LETTER K */
	"=4C",  /* U+004C LATIN CAPITAL LETTER L */
	"=4D",  /* U+004D LATIN CAPITAL LETTER M */
	"=4E",  /* U+004E LATIN CAPITAL LETTER N */
	"=4F",  /* U+004F LATIN CAPITAL LETTER O */
	"=50",  /* U+0050 LATIN CAPITAL LETTER P */
	"=51",  /* U+0051 LATIN CAPITAL LETTER Q */
	"=52",  /* U+0052 LATIN CAPITAL LETTER R */
	"=53",  /* U+0053 LATIN CAPITAL LETTER S */
	"=54",  /* U+0054 LATIN CAPITAL LETTER T */
	"=55",  /* U+0055 LATIN CAPITAL LETTER U */
	"=56",  /* U+0056 LATIN CAPITAL LETTER V */
	"=57",  /* U+0057 LATIN CAPITAL LETTER W */
	"=58",  /* U+0058 LATIN CAPITAL LETTER X */
	"=59",  /* U+0059 LATIN CAPITAL LETTER Y */
	"=5A",  /* U+005A LATIN CAPITAL LETTER Z */
	"%5B",  /* U+005B LEFT SQUARE BRACKET */
	"%5C",  /* U+005C REVERSE SOLIDUS */
	"%5D",  /* U+005D RIGHT SQUARE BRACKET */
	"%5E",  /* U+005E CIRCUMFLEX ACCENT */
	"=5F",  /* U+005F LOW LINE */
	"=60",  /* U+0060 GRAVE ACCENT */
	"=61",  /* U+0061 LATIN SMALL LETTER A */
	"=62",  /* U+0062 LATIN SMALL LETTER B */
	"=63",  /* U+0063 LATIN SMALL LETTER C */
	"=64",  /* U+0064 LATIN SMALL LETTER D */
	"=65",  /* U+0065 LATIN SMALL LETTER E */
	"=66",  /* U+0066 LATIN SMALL LETTER F */
	"=67",  /* U+0067 LATIN SMALL LETTER G */
	"=68",  /* U+0068 LATIN SMALL LETTER H */
	"=69",  /* U+0069 LATIN SMALL LETTER I */
	"=6A",  /* U+006A LATIN SMALL LETTER J */
	"=6B",  /* U+006B LATIN SMALL LETTER K */
	"=6C",  /* U+006C LATIN SMALL LETTER L */
	"=6D",  /* U+006D LATIN SMALL LETTER M */
	"=6E",  /* U+006E LATIN SMALL LETTER N */
	"=6F",  /* U+006F LATIN SMALL LETTER O */
	"=70",  /* U+0070 LATIN SMALL LETTER P */
	"=71",  /* U+0071 LATIN SMALL LETTER Q */
	"=72",  /* U+0072 LATIN SMALL LETTER R */
	"=73",  /* U+0073 LATIN SMALL LETTER S */
	"=74",  /* U+0074 LATIN SMALL LETTER T */
	"=75",  /* U+0075 LATIN SMALL LETTER U */
	"=76",  /* U+0076 LATIN SMALL LETTER V */
	"=77",  /* U+0077 LATIN SMALL LETTER W */
	"=78",  /* U+0078 LATIN SMALL LETTER X */
	"=79",  /* U+0079 LATIN SMALL LETTER Y */
	"=7A",  /* U+007A LATIN SMALL LETTER Z */
	"%7B",  /* U+007B LEFT CURLY BRACKET */
	"%7C",  /* U+007C VERTICAL LINE */
	"%7D",  /* U+007D RIGHT CURLY BRACKET */
	"%7E",  /* U+007E TILDE */
	"=7F",  /* U+007F <control> */
	"=80",  /* U+0080 <control> */
	"=81",  /* U+0081 <control> */
	"=82",  /* U+0082 <control> */
	"=83",  /* U+0083 <control> */
	"=84",  /* U+0084 <control> */
	"=85",  /* U+0085 <control> */
	"=86",  /* U+0086 <control> */
	"=87",  /* U+0087 <control> */
	"=88",  /* U+0088 <control> */
	"=89",  /* U+0089 <control> */
	"=8A",  /* U+008A <control> */
	"=8B",  /* U+008B <control> */
	"=8C",  /* U+008C <control> */
	"=8D",  /* U+008D <control> */
	"=8E",  /* U+008E <control> */
	"=8F",  /* U+008F <control> */
	"=90",  /* U+0090 <control> */
	"=91",  /* U+0091 <control> */
	"=92",  /* U+0092 <control> */
	"=93",  /* U+0093 <control> */
	"=94",  /* U+0094 <control> */
	"=95",  /* U+0095 <control> */
	"=96",  /* U+0096 <control> */
	"=97",  /* U+0097 <control> */
	"=98",  /* U+0098 <control> */
	"=99",  /* U+0099 <control> */
	"=9A",  /* U+009A <control> */
	"=9B",  /* U+009B <control> */
	"=9C",  /* U+009C <control> */
	"=9D",  /* U+009D <control> */
	"=9E",  /* U+009E <control> */
	"=9F",  /* U+009F <control> */
	"=A0",  /* U+00A0 NO-BREAK SPACE */
	"=A1",  /* U+00A1 INVERTED EXCLAMATION MARK */
	"=A2",  /* U+00A2 CENT SIGN */
	"=A3",  /* U+00A3 POUND SIGN */
	"=A4",  /* U+00A4 CURRENCY SIGN */
	"=A5",  /* U+00A5 YEN SIGN */
	"=A6",  /* U+00A6 BROKEN BAR */
	"=A7",  /* U+00A7 SECTION SIGN */
	"=A8",  /* U+00A8 DIAERESIS */
	"=A9",  /* U+00A9 COPYRIGHT SIGN */
	"=AA",  /* U+00AA FEMININE ORDINAL INDICATOR */
	"=AB",  /* U+00AB LEFT-POINTING DOUBLE ANGLE QUOTATION MARK */
	"=AC",  /* U+00AC NOT SIGN */
	"=AD",  /* U+00AD SOFT HYPHEN */
	"=AE",  /* U+00AE REGISTERED SIGN */
	"=AF",  /* U+00AF MACRON */
	"=B0",  /* U+00B0 DEGREE SIGN */
	"=B1",  /* U+00B1 PLUS-MINUS SIGN */
	"=B2",  /* U+00B2 SUPERSCRIPT TWO */
	"=B3",  /* U+00B3 SUPERSCRIPT THREE */
	"=B4",  /* U+00B4 ACUTE ACCENT */
	"=B5",  /* U+00B5 MICRO SIGN */
	"=B6",  /* U+00B6 PILCROW SIGN */
	"=B7",  /* U+00B7 MIDDLE DOT */
	"=B8",  /* U+00B8 CEDILLA */
	"=B9",  /* U+00B9 SUPERSCRIPT ONE */
	"=BA",  /* U+00BA MASCULINE ORDINAL INDICATOR */
	"=BB",  /* U+00BB RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK */
	"=BC",  /* U+00BC VULGAR FRACTION ONE QUARTER */
	"=BD",  /* U+00BD VULGAR FRACTION ONE HALF */
	"=BE",  /* U+00BE VULGAR FRACTION THREE QUARTERS */
	"=BF",  /* U+00BF INVERTED QUESTION MARK */
	"=C0",  /* U+00C0 LATIN CAPITAL LETTER A WITH GRAVE */
	"=C1",  /* U+00C1 LATIN CAPITAL LETTER A WITH ACUTE */
	"=C2",  /* U+00C2 LATIN CAPITAL LETTER A WITH CIRCUMFLEX */
	"=C3",  /* U+00C3 LATIN CAPITAL LETTER A WITH TILDE */
	"=C4",  /* U+00C4 LATIN CAPITAL LETTER A WITH DIAERESIS */
	"=C5",  /* U+00C5 LATIN CAPITAL LETTER A WITH RING ABOVE */
	"=C6",  /* U+00C6 LATIN CAPITAL LETTER AE */
	"=C7",  /* U+00C7 LATIN CAPITAL LETTER C WITH CEDILLA */
	"=C8",  /* U+00C8 LATIN CAPITAL LETTER E WITH GRAVE */
	"=C9",  /* U+00C9 LATIN CAPITAL LETTER E WITH ACUTE */
	"=CA",  /* U+00CA LATIN CAPITAL LETTER E WITH CIRCUMFLEX */
	"=CB",  /* U+00CB LATIN CAPITAL LETTER E WITH DIAERESIS */
	"=CC",  /* U+00CC LATIN CAPITAL LETTER I WITH GRAVE */
	"=CD",  /* U+00CD LATIN CAPITAL LETTER I WITH ACUTE */
	"=CE",  /* U+00CE LATIN CAPITAL LETTER I WITH CIRCUMFLEX */
	"=CF",  /* U+00CF LATIN CAPITAL LETTER I WITH DIAERESIS */
	"=D0",  /* U+00D0 LATIN CAPITAL LETTER ETH */
	"=D1",  /* U+00D1 LATIN CAPITAL LETTER N WITH TILDE */
	"=D2",  /* U+00D2 LATIN CAPITAL LETTER O WITH GRAVE */
	"=D3",  /* U+00D3 LATIN CAPITAL LETTER O WITH ACUTE */
	"=D4",  /* U+00D4 LATIN CAPITAL LETTER O WITH CIRCUMFLEX */
	"=D5",  /* U+00D5 LATIN CAPITAL LETTER O WITH TILDE */
	"=D6",  /* U+00D6 LATIN CAPITAL LETTER O WITH DIAERESIS */
	"=D7",  /* U+00D7 MULTIPLICATION SIGN */
	"=D8",  /* U+00D8 LATIN CAPITAL LETTER O WITH STROKE */
	"=D9",  /* U+00D9 LATIN CAPITAL LETTER U WITH GRAVE */
	"=DA",  /* U+00DA LATIN CAPITAL LETTER U WITH ACUTE */
	"=DB",  /* U+00DB LATIN CAPITAL LETTER U WITH CIRCUMFLEX */
	"=DC",  /* U+00DC LATIN CAPITAL LETTER U WITH DIAERESIS */
	"=DD",  /* U+00DD LATIN CAPITAL LETTER Y WITH ACUTE */
	"=DE",  /* U+00DE LATIN CAPITAL LETTER THORN */
	"=DF",  /* U+00DF LATIN SMALL LETTER SHARP S */
	"=E0",  /* U+00E0 LATIN SMALL LETTER A WITH GRAVE */
	"=E1",  /* U+00E1 LATIN SMALL LETTER A WITH ACUTE */
	"=E2",  /* U+00E2 LATIN SMALL LETTER A WITH CIRCUMFLEX */
	"=E3",  /* U+00E3 LATIN SMALL LETTER A WITH TILDE */
	"=E4",  /* U+00E4 LATIN SMALL LETTER A WITH DIAERESIS */
	"=E5",  /* U+00E5 LATIN SMALL LETTER A WITH RING ABOVE */
	"=E6",  /* U+00E6 LATIN SMALL LETTER AE */
	"=E7",  /* U+00E7 LATIN SMALL LETTER C WITH CEDILLA */
	"=E8",  /* U+00E8 LATIN SMALL LETTER E WITH GRAVE */
	"=E9",  /* U+00E9 LATIN SMALL LETTER E WITH ACUTE */
	"=EA",  /* U+00EA LATIN SMALL LETTER E WITH CIRCUMFLEX */
	"=EB",  /* U+00EB LATIN SMALL LETTER E WITH DIAERESIS */
	"=EC",  /* U+00EC LATIN SMALL LETTER I WITH GRAVE */
	"=ED",  /* U+00ED LATIN SMALL LETTER I WITH ACUTE */
	"=EE",  /* U+00EE LATIN SMALL LETTER I WITH CIRCUMFLEX */
	"=EF",  /* U+00EF LATIN SMALL LETTER I WITH DIAERESIS */
	"=F0",  /* U+00F0 LATIN SMALL LETTER ETH */
	"=F1",  /* U+00F1 LATIN SMALL LETTER N WITH TILDE */
	"=F2",  /* U+00F2 LATIN SMALL LETTER O WITH GRAVE */
	"=F3",  /* U+00F3 LATIN SMALL LETTER O WITH ACUTE */
	"=F4",  /* U+00F4 LATIN SMALL LETTER O WITH CIRCUMFLEX */
	"=F5",  /* U+00F5 LATIN SMALL LETTER O WITH TILDE */
	"=F6",  /* U+00F6 LATIN SMALL LETTER O WITH DIAERESIS */
	"=F7",  /* U+00F7 DIVISION SIGN */
	"=F8",  /* U+00F8 LATIN SMALL LETTER O WITH STROKE */
	"=F9",  /* U+00F9 LATIN SMALL LETTER U WITH GRAVE */
	"=FA",  /* U+00FA LATIN SMALL LETTER U WITH ACUTE */
	"=FB",  /* U+00FB LATIN SMALL LETTER U WITH CIRCUMFLEX */
	"=FC",  /* U+00FC LATIN SMALL LETTER U WITH DIAERESIS */
	"=FD",  /* U+00FD LATIN SMALL LETTER Y WITH ACUTE */
	"=FE",  /* U+00FE LATIN SMALL LETTER THORN */
	"=FF"  /* U+00FF LATIN SMALL LETTER Y WITH DIAERESIS */
};

#define IS_SAFE_CHAR(c) ((c == '!') || (c == '$') || \
					 (c == '*') || (c == '+') || \
					 (c == '-') || (c == '_') || \
					 (c >= '0'  &&  c <= '9') || \
					 (c >= 'A'  &&  c <= 'Z') || \
					 (c >= 'a'  &&  c <= 'z') )

int encodeChar(const char *str , char **p)
{
	int c = *str;
	
	assert(c >= 0 && c < 0x0100); /* in the range of U+0000 - U+00FF */

	if ( IS_SAFE_CHAR(c)  ) {
	  *p = (char *)str;
	  return 1;
	}

	/* translate all other characters */
	*p = CharTranslateTbl[c];
	return 3;
}
	  


void encodeUrl(const char *url, char *eurl)
{
	char *src;
	int len;
	int i;
	int isEncoded = 0;
	char *prefix = "=?UTF-8?Q?";
	char *suffix = "?=";
	
	/* scan the url to see if it has to be encoded */
	for(src= (char *)url; *src; src++) {
		if(!IS_SAFE_CHAR(*src)) {
			isEncoded = 1;
			break;
		}
	}
	if(isEncoded)  {
		/* copy the encoded string prefix */
		for(src = prefix; *src; ) {
			*eurl++ = *src++;
		}
	}

	while(*url) { 
		len = encodeChar(url++, &src);
		for (i=0;i< len; i++)
			*eurl++ = *src++;
	}

	if(isEncoded) {
		/* copy the encoded string suffix */
		for (src = suffix; *src; ) {
			*eurl++ = *src++;
		}
	}
	*eurl = '\0';
}

static char content_hdr[1024];

#define CONTENT (content_hdr + strlen(content_hdr))
int IncludeContentID = 0;
int ContentID=0;

static void setup_content_hdr (refobj_t *obj_p)
{
  char encodedUrlStr[1024];
  char buf[41];
  int offset = 0;
  int len;
  int size;
  int i;
  char *src, *dest;
  


  content_hdr[0] = '\0';
  sprintf(CONTENT, "Content-Type: %s\r\n", obj_p->ext_p->type_p);

  if(encodeHeaders) {
	  encodeUrl(obj_p->url_p, encodedUrlStr);
	  if (strlen(encodedUrlStr) > 40) {
		  /* fold url */
		  sprintf(CONTENT, "Content-Location:");
		  len = strlen(encodedUrlStr);
		  offset = 0;
		  while (len > 0) {
			  if(len < 40) 
				  size = len;
			  else
				  size = 40;
			  
			  for (i=0,src=encodedUrlStr+offset,dest=buf;i<size;i++)
				  *dest++ = *src++;
			  *dest = '\0';
			  sprintf(CONTENT, " %s", buf);
			  sprintf(CONTENT, "\r\n");

			  offset += size;
			  len -= size;
		  }
	  } else {
		  sprintf(CONTENT, "Content-Location: %s\r\n", encodedUrlStr);
	  }
  
  } else { /* no encoding, send url in the clear */
	  sprintf(CONTENT, "Content-Location: %s\r\n", obj_p->url_p);
  }

  if(IncludeContentID)
    sprintf(CONTENT, "Content-ID: <%s-%d>\r\n", obj_p->url_p, obj_p->id);

  if(Content_Disposition)
	  sprintf(CONTENT, "Content-Disposition: %s\r\n", Content_Disposition);

  sprintf(CONTENT, "\r\n");
  obj_p->content_hdr_len = strlen(content_hdr);
  obj_p->content_hdr_p = malloc(obj_p->content_hdr_len + 1);
  strcpy(obj_p->content_hdr_p, content_hdr);
}

#undef CONTENT

/* get content type based on filename extension */
static const extension_t *match_extension (const char *filename_p)
{
  int idx;
  int len = strlen(filename_p);
  /* for each know file extension */
  for (idx = 0; extensions[idx].ext_p != NULL; ++idx) {
    const char *cur_p, *next_p;
    cur_p = extensions[idx].ext_p;
    do {
      int tlen;
      next_p = strchr(cur_p, ';');
      if (next_p == NULL)
	tlen = strlen(cur_p);
      else
	(tlen = next_p - cur_p), ++next_p;
      if (tlen < len && !strncmp(filename_p + len - tlen, cur_p, tlen))
	return (&extensions[idx]);
    } while ((cur_p = next_p) != NULL);
  }
  return (&extensions[idx]);
}

const char *scan_file_p;

/* load file into memory */
static int load_file (const char *filename_p,
		       unsigned char **data_pp,
		       unsigned int *data_len_p)
{
  FILE *file_p;
  int len = strlen(filename_p);
  char *fn_p = malloc(len+1);

  strcpy(fn_p, filename_p);
  (void)URL_convert_to_plain((unsigned char*)filename_p, (unsigned char*)fn_p, len+1); 

  /* assuming file accessable, local file system? */
  file_p = fopen(fn_p, "rb");  /* read, binary */
  if (file_p == NULL) {
    /* open failed */
    free(fn_p);
    return (FALSE);
  }
  /* get size of file and allocate memory to hold it */
  fseek(file_p, 0, SEEK_END);
  *data_len_p = ftell(file_p);
  *data_pp = malloc(*data_len_p + 1);
  if (*data_pp == NULL)
    /* alloc failed */
    return (FALSE);

  /* return to beginning of file and read it in */
  fseek(file_p, 0, SEEK_SET);
  fread(*data_pp, 1, *data_len_p, file_p);
  (*data_pp)[*data_len_p] = '\0';
  assert(*data_pp != NULL);

  /* clean up */
  fclose(file_p);
  free(fn_p);

  return (TRUE);
}

/* skip whitespace */
static void skip_ws (const char **str_pp)
{
  const char *str_p = *str_pp;
  while (isspace(*str_p))
    ++str_p;
  *str_pp = str_p;
}

static const char *filename_base_p;

static char filename_buf[256];

refobj_t *scan_obj_p, *root_obj_p;


static const char *build_filename (const char *name_p)
{
  char *p;

  if(strncmp(name_p, "cid:", 4) == 0 ) {
		return name_p+4;
  }

  if(URL_abs_e == URL_check_url((unsigned char*)name_p)) {
    p = strchr(name_p, '/');
    return (p+1);
  }
  else {
    sprintf(filename_buf, "%s/%s", filename_base_p, name_p);
    return (filename_buf);
  }
}

/* scan objects */
static void scan_objs (void)
{
  list_node_t *node_p;
  refobj_t * obj_p;
  int scanned;
 
  do {
    scanned = FALSE;
    /* traverse list of referenced objects */
    for(node_p = refobjs.next_p; node_p != &refobjs; node_p = node_p->next_p) {
      scan_obj_p = (refobj_t *)node_p->data;
      obj_p = scan_obj_p;
      scan_file_p = (const char*) scan_obj_p->file_p;
      if (obj_p->scanned) {
	     continue;
      }
      obj_p->scanned = TRUE;
      if (obj_p->ext_p->scan != NULL) {
	    fprintf(stderr, "Scanning URL: %s%s\n", obj_p->url_p, 
		        obj_p->root ? " (root)" : "");
	    (obj_p->ext_p->scan)();
	    scanned = TRUE;
	    break;
      }
    }  
  } while (scanned);
}

/* locate object based on url */
static refobj_t *find_url (const char *url_p)
{
  list_node_t *node_p;
  refobj_t *obj_p;

  /* traverse list */
  for (node_p = refobjs.next_p; 
       node_p != &refobjs;
       node_p = node_p->next_p) {
    obj_p =  (refobj_t *)node_p->data;
    if (strcmp(obj_p->url_p, url_p) == 0)
      return (obj_p);
  }
  return (NULL);
}

static int obj_cnt;
static int msg_number;

static int get_obj_len (refobj_t *obj_p)
{
  return (obj_p->file_len + obj_p->content_hdr_len);
}
#define MAXBUFF 4*1024
/* add a URL as a reference */
static void add_url (const char *start_p, const char *end_p, int root, int offs)
{
  refobj_t *obj_p;
  list_node_t* node_p;
  unsigned char *file_p;
  unsigned int file_len;
  char *url_p = (char*)start_p;
  int len = end_p - start_p;
  unsigned char buffer[MAXBUFF];
  unsigned char normalized_url[MAXBUFF]; 
  if (url_p == NULL || strlen(url_p) == 0) {
    fprintf(stderr, "Didn't add URL (no string)\n");
    return;
  }
  memcpy(buffer, url_p, len);
  buffer[len] = '\0';
  len = URL_convert_to_plain(buffer, normalized_url, MAXBUFF);

  url_p =  (char *)normalized_url;

  if (root) {
    const char *last_slash_p = url_p;

    while (strchr(last_slash_p + 1, '/') != NULL)
      last_slash_p = strchr(last_slash_p + 1, '/');

    if (url_p < last_slash_p) {
      char *base_p = malloc(last_slash_p - url_p + 1);
      memcpy(base_p, url_p, last_slash_p - url_p);
      base_p[last_slash_p - url_p] = '\0';
      filename_base_p = base_p, url_p = (char*)last_slash_p + 1;
    } else
      filename_base_p = "./";
    
    fprintf(stderr, "Filename base: %s\n", filename_base_p);

  } else {
    char *temp_p;

    url_p = malloc(len+1);
    assert(url_p);
      
    memcpy(url_p, normalized_url, len+1);
    url_p[len] = '\0';

    if(defaultBaseP != NULL) {
      if(URL_illegal_e != URL_check_url((unsigned char*)url_p)) {
		len = strlen(defaultBaseP) + len + 1;
		temp_p = malloc(len);

		/* check if this is a cid or http scheme */
		if (strcmp(defaultBaseP, "cid:") == 0) {
		        /* cid scheme, just copy the base and content id */
			strcpy(temp_p, defaultBaseP);
			strcpy(temp_p+4,url_p);
		} else {
		        /* http scheme */
			len = URL_combine_abs_and_rel((unsigned char *)defaultBaseP,
				      (unsigned char *)url_p, 
				      (unsigned char *)temp_p, 
				      len);
		}
		free(url_p);
		url_p = temp_p;
      }
    }
  }

  if (find_url(url_p) && OneObj) {
    fprintf(stderr, "URL already referenced: %s\n", url_p);
    return;
  }

  if (!load_file(build_filename(url_p), &file_p, &file_len)) {
    fprintf(stderr, "Failed to add URL: %s%s - no file.\n", url_p, root ? " (root)" : "");
    return;
  }

  /* if parent object is root object */
  if (scan_obj_p && (root_obj_p == scan_obj_p)) {
    if (ForceBeforeRoot)
      offs = 0;
  }

  obj_p = (refobj_t*) malloc(sizeof(refobj_t));
  obj_p->url_p = url_p;
  obj_p->ext_p = match_extension(obj_p->url_p);
  obj_p->scanned = FALSE;
  obj_p->root = root;
  obj_p->parent_obj_p = scan_obj_p;
  obj_p->parent_offs  = offs + (scan_obj_p != NULL ? scan_obj_p->content_hdr_len : 0);
  obj_p->file_p = file_p;
  obj_p->file_len = file_len;
  obj_p->id = ++obj_cnt;
  obj_p->output_offs = 0;

  setup_content_hdr(obj_p);


  list_init(&(obj_p->children));

  if (obj_p->parent_obj_p != NULL) {
    /* add obj to the parent's list of children */
      node_p = (list_node_t *)malloc (sizeof(list_node_t));
      assert(node_p);
      node_p->data = (void *)obj_p;
    list_insert_before(&obj_p->parent_obj_p->children, 
		       node_p);
  }

  fprintf(stderr, 
	  "Added URL: %s%s; type=\"%s\" (parent offset %d)\n", 
	  url_p, 
	  root ? " (root)" : "", obj_p->ext_p->type_p,
	  obj_p->parent_offs);

  if (root)
    root_obj_p = obj_p;

  node_p = (list_node_t *)malloc(sizeof(list_node_t));
  assert(node_p);
  node_p->data = (void *)obj_p;

  list_insert_before(&refobjs, node_p);
}

static int match_tag (const char *str_p,
		      const char *tag_id_p,
		      const char **tag_pp,
		      int *tag_len_p)
{
  const char *end_p;
  if (*str_p == '<' &&
      strncmp(str_p + 1, tag_id_p, strlen(tag_id_p)) == 0 &&
      (isspace(str_p[strlen(tag_id_p) + 1]) || 
       str_p[strlen(tag_id_p) + 1] == '>' ||
       strncmp("/>", str_p + strlen(tag_id_p) + 1, 2) == 0) &&
      (end_p = strchr(str_p, '>')) != NULL)
  {
    *tag_pp = str_p - 1;
    *tag_len_p = end_p + 1 - (str_p - 1);
    return (TRUE);
  }
  else
    return (FALSE);
}

static int match_end_tag (const char *str_p,
			  const char *tag_id_p,
			  const char **tag_pp,
			  int *tag_len_p)
{
  if (strncmp(str_p, "</", 2) == 0 &&
      strncmp(str_p + 2, tag_id_p, strlen(tag_id_p)) == 0 &&
      str_p[strlen(tag_id_p) + 2] == '>')
  {
    *tag_pp = str_p;
    *tag_len_p = str_p + strlen(tag_id_p) + 3 - str_p;
    return (TRUE);
  }
  else
    return (FALSE);
}


static int find_attr (const char *attr_id_p,
		      const char *tag_p,
		      int tag_len,
		      const char **attr_pp,
		      int *attr_len_p)
{
  const char *attr_p, *attr_end_p;
  char ch = tag_p[tag_len];
  ((char*) tag_p)[tag_len] = '\0';
  
  if ((attr_p = strstr(tag_p, attr_id_p)) != NULL &&
      strncmp("=\"", attr_p += strlen(attr_id_p), 2) == 0 &&
      (attr_end_p = strchr(attr_p += 2, '\"')) != NULL)
  {
    *attr_pp = attr_p;
    *attr_len_p = attr_end_p - attr_p;
  }
  else
    attr_p = NULL;

  ((char*) tag_p)[tag_len] = ch;
  return (attr_p != NULL);
}

/* find tag content */
static int find_tag_content (const char *str_p, 
			     const char *tag_id_p,
			     const char **content_pp,
			     int *content_len)
{
  const char *tag_p;
  const char *tag_end_p;
  const char *next_p;
  int tag_len;
  int tag_end_len;

  while ((str_p = strchr(str_p, '<')) != NULL) {
    if (match_tag(str_p, "style", &tag_p, &tag_len)) {
      next_p = str_p;
      while ((next_p = strstr(next_p, "</")) != NULL) {
	if (match_end_tag(next_p, "style", &tag_end_p, &tag_end_len)) {
	  *content_pp = tag_p + tag_len;
	  *content_len = tag_end_p - (tag_p + tag_len);
	  return (TRUE);
	}
	next_p += 1;
      }
    }
    str_p += 1;
  }
  return (FALSE);
}

/* get qouted string */
static const char *get_qstring (const char **in_pp)
{
  char q = **in_pp;
  int len = 0;
  char *new_str_p = NULL;
  const char *str_p;

  if (q != '\'' && q != '\"')
    return (NULL);

  do {
    str_p = *in_pp + 1;
    if (len > 0)
      new_str_p = malloc(len + 1), len = 0;

    while (*str_p != q) {
      char ch = *str_p++;
      if (ch == '\\') {
	ch = *str_p++;
	if (new_str_p != NULL)
	  new_str_p[len] = ch;
      }
      else if (new_str_p != NULL)
	new_str_p[len] = ch;
      len++;
    }
  } while (new_str_p == NULL);
  *in_pp = str_p + 1;

  if(new_str_p)
    new_str_p[len] = '\0';

  return (new_str_p);
}

/* get url string */
static const char *get_url (const char **in_pp)
{
  const char *str_p = *in_pp;
  const char *start_p = str_p;
  const char *end_p = str_p;
  char *new_str_p;

  while (*str_p != ')') {
    while (!isspace(*str_p) && *str_p != ')') str_p++;
    end_p = str_p;
    skip_ws(&str_p);
  }

  *in_pp = end_p;
  new_str_p = malloc(end_p - start_p + 1);
  assert(new_str_p);
  memcpy(new_str_p, start_p, end_p - start_p);
  new_str_p[end_p - start_p] = '\0';
  return (new_str_p);
}

static int find_prop(const char *prop_id_p,
		     const char *tag_p,
		     int tag_len,
		     const char **prop_pp,
		     int *prop_len_p)
{
  const char *prop_p, *prop_end_p;
  char ch = tag_p[tag_len];
  ((char*) tag_p)[tag_len] = '\0';
  
  if ((prop_p = strstr(tag_p, prop_id_p)) != NULL &&
      (strncmp("(", prop_p += strlen(prop_id_p), 1) == 0) &&
      (prop_end_p = strchr(prop_p += 1, ')')) != NULL)
  {
    *prop_pp = prop_p;
    *prop_len_p = prop_end_p - prop_p;
  }
  else
    prop_p = NULL;

  ((char*) tag_p)[tag_len] = ch;
  return (prop_p != NULL);
}

/* scan CSS content for URLs */
static void scan_css (const char *style_p, const char *ref_point_p)
{
	const char *p, *url_p;
	for (p = (const char*) style_p; p < ref_point_p && (*p != '<' && *p != '/' && *p != 's'); p++) {
		if ((p+2 <= ref_point_p) && *p == 'u' && *(p+1) == 'r' && *(p+2) == 'l') {
			p += 4; /* lenght of "url(" */
			skip_ws(&p);
			if (*p == '\'' || *p == '\"') {
  				url_p = get_qstring(&p);
			} else {
				url_p = get_url(&p);
			}
			skip_ws(&p);
			if (*p++ == ')') {
				if (url_p != NULL)
					add_url(url_p, 
					       (url_p + strlen(url_p) + 1), 
							FALSE, 
							ref_point_p - scan_file_p);
			}
		}
	}
}


/* scan css file for external references */
static void scan_css_file (void)
{
  scan_css((const char*) scan_obj_p->file_p, 
	    (const char*) scan_obj_p->file_p + scan_obj_p->file_len);
}

/* scan JPEG content */
static void scan_jpeg_file (void)
{
  unsigned char *data_p = scan_obj_p->file_p, type;
  unsigned int len = scan_obj_p->file_len;
  unsigned int offset = 0;
  int done = FALSE;

  do {
    if (data_p[offset] != 0xff) {
      fprintf(stderr, 
	      "scan_jpeg_file: %s - unexpected byte 0x%02x at offset %d -- looking for marker\n",
	      scan_obj_p->url_p,
	      data_p[offset],
	      offset);
      ++offset;
      continue;
    }

    offset++;

    switch ((type = data_p[offset++])) {
      case 0xda: {
	unsigned int len = data_p[offset] << 8 | data_p[offset + 1];
	offset += len;
	done = TRUE;
	break;
      }

      case 0xc0:
      case 0xc1:
      case 0xc2:
      case 0xc3:
      case 0xc5:
      case 0xc6:
      case 0xc7:
      case 0xc9:
      case 0xca:
      case 0xcb:
      case 0xcd:
      case 0xce:
      case 0xcf: 

	/* app markers */
      case 0xe0:
      case 0xe1:
      case 0xe2:
      case 0xe3:
      case 0xe4:
      case 0xe5:
      case 0xe6:
      case 0xe7:
      case 0xe8:
      case 0xe9:
      case 0xea:
      case 0xeb:
      case 0xec:
      case 0xed:
      case 0xee:
      case 0xef:
	/* comment */
      case 0xff:
	/* DAC */
      case 0xcc:
      case 0xde:
      case 0xc4:
      case 0x01:
      case 0xd9:
      case 0xdb:
      case 0xfe: {
	unsigned int len = data_p[offset] << 8 | data_p[offset + 1];
	offset += len;
	break;
      }

      case 0xf0:
      case 0xf1:
      case 0xf2:
      case 0xf3:
      case 0xf4:
      case 0xf5:
      case 0xf6:
      case 0xf7:
      case 0xf8:
      case 0xf9:
      case 0xfa:
      case 0xfb:
      case 0xfc:
      case 0xfd:
      case 0xc8:
      case 0xd8:
	break;
	
      case 0xdc:
      case 0xdd:
	offset += 4;
	break;
      case 0xdf:
	offset += 3;
	break;
	
      case 0x00:
	break;
      default:
	break;

    } 

  } while (!done && offset < len);

  if (done)
      scan_obj_p->img_hdr_size = offset;
  else
      scan_obj_p->img_hdr_size = 1024;

  fprintf(stderr, "%s : image hdr size estimated %d bytes\n",
	  scan_obj_p->url_p, scan_obj_p->img_hdr_size);
}

/* scan root xhtml for referenced data - 
 * images/objects and external style sheets */
static void scan_xhtml (const char *str_p)
{
  char *tag_p;
  char *next_p;
  char * p;
  int tag_len;
  int offset;
  
  /* we need to find <style> </style> tags and scan 
   * within that area for @import followed by the URL */
  for (next_p = (char*) str_p; find_tag_content(next_p, "style", &tag_p, &tag_len); next_p = tag_p + tag_len)
  {
    assert(tag_p[tag_len] == '<');
    /* do a simplistic search for url */
    tag_p[tag_len] = '\0';
    scan_css(tag_p, tag_p + tag_len + 9); /* 9 is for "</style> " */
    tag_p[tag_len] = '<';
  }

  while (str_p != NULL && *str_p != '\0') {
    if ((str_p = strchr(str_p, '<')) == NULL)
      break;

    /* check each start tag for a style property containing a url */
	if ((*(str_p+1) != '/') && (*(str_p+1) != '?') && (*(str_p+1) != '!')){
		/* this is a start tag, not an end tag or procesing instruction */
		tag_p = (char *)(str_p+1);
		skip_ws(&tag_p);
		/* don't look in style elements, that's been done already */
		if(strncmp(tag_p, "style", 5) != 0) {
			for (tag_len = 0,p = tag_p; *p != '>' ;p++)
				tag_len++;
			offset = tag_p + tag_len - scan_file_p;
			next_p = tag_p + tag_len;
			if(find_attr("style", tag_p, tag_len, &tag_p, &tag_len)) {
				if (find_prop("url", tag_p, tag_len, &tag_p, &tag_len)) {
					add_url(tag_p, tag_p + tag_len, FALSE, offset);
				}
			}
		}
	}
    if (match_tag(str_p, "img", &tag_p, &tag_len)) {
      offset = tag_p + tag_len - scan_file_p;
      next_p = tag_p + tag_len;
      if (find_attr("src", tag_p, tag_len, &tag_p, &tag_len))
	     add_url(tag_p, tag_p + tag_len, FALSE, offset);
      str_p = tag_p + tag_len;
    }
    else if (match_tag(str_p, "object", &tag_p, &tag_len)) {
      offset = tag_p + tag_len - scan_file_p;
      next_p = tag_p + tag_len;
      if (find_attr("data", tag_p, tag_len, &tag_p, &tag_len))
	     add_url(tag_p, tag_p + tag_len, FALSE, offset);
      str_p = tag_p + tag_len;
    }
    else if (match_tag(str_p, "link", &tag_p, &tag_len)) {
      offset = tag_p + tag_len - scan_file_p;
      next_p = tag_p + tag_len;
      if (find_attr("href", tag_p, tag_len, &tag_p, &tag_len))
	     add_url(tag_p, tag_p + tag_len, FALSE, offset);
      str_p = tag_p + tag_len;
    }
    else if (match_tag(str_p, "base", &tag_p, &tag_len)) {
      offset = tag_p + tag_len - scan_file_p;
      next_p = tag_p + tag_len;
      if (find_attr("href", tag_p, tag_len, &tag_p, &tag_len)) {
	    char *url_p = malloc(tag_len+1);
	    memcpy(url_p, tag_p, tag_len);
	    url_p[tag_len] = '\0';
	    if(URL_abs_e == URL_check_url((unsigned char*)url_p)) {
	     char *temp_p = malloc(tag_len + 1);
	     int len = URL_extract_base((unsigned char*)url_p, (unsigned char*)temp_p, tag_len+1);
	     if(len >= 0) {
	       free(defaultBaseP);
	       defaultBaseP = temp_p;
		 }
	     else {
	      free(temp_p);
		 }
		}
	  }
      str_p = tag_p + tag_len;
    }
    else
      str_p = strchr(str_p, '>');
  }
}

/* scan root xhtml for referenced data - 
 * images/objects and external style sheets */
static void scan_xhtml_file (void)
{
  scan_xhtml((const char*) scan_obj_p->file_p);
}

static int is_in_group (interleave_group_t *group_p, refobj_t *obj_p)
{
  int i;

  for (i = 0; i < group_p->urls_cnt; i++) {
    if (strcmp(group_p->urls_p[i], obj_p->url_p) == 0)
      break;
  }
  
  return (i < group_p->urls_cnt);
}

static refobj_t *get_group_obj (interleave_group_t *group_p, int i)
{
  refobj_t *obj_p;
  list_node_t *node_p;

  /* traverse list */
  for ( node_p = refobjs.next_p; node_p != &refobjs;
	    node_p = node_p->next_p) {
	obj_p = (refobj_t*)node_p->data;
    if (strcmp(obj_p->url_p, group_p->urls_p[i]) == 0)
      return (obj_p);
  }

  return (NULL);
}

static int query_interleave_size (refobj_t *obj_p, int *chunk_size_p)
{
  interleave_group_t *group_p;
  list_node_t *node_p;

  /* find the object URL in an interleave group */
  /* traverse list */
  for (node_p = interleave_groups.next_p; 
       node_p != &interleave_groups;
       node_p = node_p->next_p) {
	group_p = (interleave_group_t*)node_p->data;
    if (is_in_group(group_p, obj_p)) {
      int smallest_size = 0, i;

      /* determine chunk size based on size
       * relative to smallest object */
      for (i = 0; i < group_p->urls_cnt; i++) {
   	     refobj_t *group_obj_p = get_group_obj(group_p, i);
	
	if (group_obj_p != NULL) {
	  int size = get_obj_len(group_obj_p) - group_obj_p->img_hdr_size;
	  if (size < smallest_size || i == 0)
	    smallest_size = size;
	}
      }
      
      *chunk_size_p = 
	(get_obj_len(obj_p) - obj_p->img_hdr_size) * ChunkSize / smallest_size;

      return (TRUE);
    }
  }

  return (FALSE);
}

static void image_get_chunk_seq (refobj_t *obj_p, chunk_seq_t *seq_p)
{
  int chunk_size;
  int parent_offs = MIN(obj_p->parent_offs + 8 * 1024, get_obj_len(obj_p->parent_obj_p));

  if (!query_interleave_size(obj_p, &chunk_size)) {
    seq_p->chunks_p = (chunk_node_t*) malloc(sizeof(chunk_node_t) * 2);
    assert(seq_p->chunks_p);
    seq_p->chunks_cnt = 2;

    seq_p->chunks_p[0].parent_offs = obj_p->parent_offs;
    seq_p->chunks_p[0].length = obj_p->img_hdr_size + obj_p->content_hdr_len;
    seq_p->chunks_p[1].parent_offs = parent_offs;
    seq_p->chunks_p[1].length = get_obj_len(obj_p) - seq_p->chunks_p[0].length;
  }
  else {
    int size = get_obj_len(obj_p) - obj_p->img_hdr_size - obj_p->content_hdr_len;
    int num_chunks = (size + chunk_size - 1) / chunk_size + 1;
    int i;

    seq_p->chunks_p = (chunk_node_t*) malloc(sizeof(chunk_node_t) * num_chunks);
    assert(seq_p->chunks_p);
    seq_p->chunks_cnt = num_chunks;

    seq_p->chunks_p[0].parent_offs = obj_p->parent_offs;
    seq_p->chunks_p[0].length = obj_p->img_hdr_size + obj_p->content_hdr_len;

    for (i = 1; i < num_chunks; i++) {
      seq_p->chunks_p[i].parent_offs = parent_offs;
      seq_p->chunks_p[i].length = MIN(chunk_size, size);
      size -= seq_p->chunks_p[i].length;
    }
    
    assert(size == 0);
  }
}

/* get parent offset for images - typically we
 * will want to output 512 bytes (headers) of the
 * image data very close to the reference. We'd want
 * to output the remainder approximate "one page" 
 * later */
static int image_get_parent_offs (refobj_t *obj_p)
{
  if (obj_p->output_offs < get_obj_len(obj_p)) {
    if (obj_p->output_offs < obj_p->img_hdr_size + obj_p->content_hdr_len)
      return (obj_p->parent_offs);
    else 
      return (obj_p->parent_offs + 4 * 1024);
  }
  else
    return (-1);
}

static int image_get_chunk_size (refobj_t *obj_p)
{
  if (obj_p->output_offs < obj_p->img_hdr_size + obj_p->content_hdr_len)
    return (MIN(obj_p->img_hdr_size + obj_p->content_hdr_len - obj_p->output_offs, 
		get_obj_len(obj_p) - obj_p->output_offs));
  else
    return (get_obj_len(obj_p) - obj_p->output_offs);
}

static refobj_t *find_first_child (refobj_t *obj_p, int offs, int *child_offs_p)
{
  refobj_t *first_p = NULL;
  refobj_t *child_p;
  list_node_t *node_p;
  int first_offs = -1;
  int child_offs;

  /* traverse list */
  for(node_p = obj_p->children.next_p; 
      node_p != &obj_p->children;
      node_p = node_p->next_p) {
	child_p = (refobj_t *)node_p->data;
    if (child_p->output_offs == get_obj_len(child_p))
      continue;
    if (child_p->ext_p->get_parent_offs != NULL)
      child_offs = (child_p->ext_p->get_parent_offs)(child_p);
    else
      child_offs = child_p->output_offs < get_obj_len(child_p) ? 
	child_p->parent_offs : -1;
    if (child_offs >= 0 && (first_p == NULL || child_offs < first_offs))
      first_p = child_p, first_offs = child_offs;
  }

  if (ForceBeforeRoot)
    first_offs = 0;

  *child_offs_p = first_offs > 0 ? obj_p->content_hdr_len + first_offs : 0;
  return (first_p);
}

/* output a chunk */
static void output_multiplex_chk (FILE *out_p, refobj_t *obj_p, int chunk_size)
{
  if(obj_p->output_offs == 0)
	  obj_p->msg = ++msg_number;
 
  fprintf(out_p, "CHK %d %d %s\r\n", 
	  (reuseMsgNumbers ? obj_p->msg : obj_p->id), 
	  chunk_size, 
	  obj_p->output_offs + chunk_size < get_obj_len(obj_p) ? "MORE" : "LAST");

  if (obj_p->output_offs + chunk_size >= get_obj_len(obj_p))
	  --msg_number;
 
  if (obj_p->output_offs < obj_p->content_hdr_len) {
    int size = MIN(obj_p->content_hdr_len - obj_p->output_offs, chunk_size);
    fwrite(obj_p->content_hdr_p + obj_p->output_offs, 1, size, out_p);
    obj_p->output_offs += size;
    chunk_size -= size;
  }

  if (chunk_size > 0) {
    assert(chunk_size == MIN(chunk_size, get_obj_len(obj_p) - obj_p->output_offs));
    fwrite(obj_p->file_p + obj_p->output_offs - obj_p->content_hdr_len, 1, chunk_size, out_p);
    obj_p->output_offs += chunk_size;
  }

  fprintf(out_p, "\r\n");
}

static list_node_t chunk_seqs;

static void prep_chunk_seqs (void)
{
  list_node_t *node_p;
  refobj_t *obj_p;
  chunk_seq_t *seq_p;  
  list_node_t *nu_seq;
  int i, size;

  /* foreach object create a chunk sequence */
  list_init((&(chunk_seqs)));

  /* traverse list */
  for(node_p = refobjs.next_p; 
      node_p != &refobjs;
	  node_p = node_p->next_p) {
	obj_p = (refobj_t*)node_p->data;
    seq_p = (chunk_seq_t*) malloc(sizeof(chunk_seq_t));
    assert(seq_p);
 

    seq_p->obj_p = obj_p;
    seq_p->output_idx = 0;
    seq_p->output_offs = 0;

    if (obj_p->ext_p->get_chunk_seq != NULL) 
      (obj_p->ext_p->get_chunk_seq)(obj_p, seq_p);
    else {
      seq_p->chunks_p = (chunk_node_t*) malloc(sizeof(chunk_node_t) * 1);
      assert(seq_p->chunks_p);
      seq_p->chunks_cnt = 1;
      seq_p->chunks_p[0].parent_offs = obj_p->parent_offs;
      seq_p->chunks_p[0].length      = get_obj_len(obj_p);
    }
    
    for (i = 0, size = 0; i < seq_p->chunks_cnt; i++)
      size += seq_p->chunks_p[i].length;

    assert(size == get_obj_len(seq_p->obj_p));
    nu_seq = (list_node_t *)malloc(sizeof(list_node_t));
	assert(nu_seq);
	nu_seq->data = (void *)seq_p;
    list_insert_before(&chunk_seqs, nu_seq);
  }
}

static void destroy_chunk_seqs (void)
{
  chunk_seq_t *seq_p;
  list_node_t *node_p;
  /* destroy chunk sequences */
  while (chunk_seqs.prev_p != &chunk_seqs) {
	  node_p = chunk_seqs.next_p;	
	  node_p = list_unlink(node_p);
	  seq_p = (chunk_seq_t *)node_p->data;
	  free(node_p);
	  if(seq_p) {
        free(seq_p->chunks_p);
        free(seq_p);
	  }
  }
}

static int find_next_child_offset (refobj_t *obj_p, int old_offs)
{
  int new_offs = get_obj_len(obj_p);
  list_node_t *node_p;
  chunk_seq_t *seq_p;
  int parent_offs;

  for(node_p = chunk_seqs.next_p; node_p != &chunk_seqs; node_p = node_p->next_p) {
	seq_p = (chunk_seq_t *)node_p->data;
    if (seq_p->obj_p->parent_obj_p == obj_p) {
      if (seq_p->output_idx < seq_p->chunks_cnt) {
	parent_offs = seq_p->chunks_p[seq_p->output_idx].parent_offs;
	if (parent_offs >= old_offs && parent_offs < new_offs)
	  new_offs = parent_offs;
      }
    }
  }

  return (new_offs);
}

static int find_child_chunk (refobj_t *parent_p, 
							int offs, 
							int offs_idx,
							chunk_seq_t **v_seq_p, 
							chunk_node_t **v_chunk_p)
{
  list_node_t *node_p;
  chunk_seq_t *seq_p;
  int i, cnt;

  for(node_p = chunk_seqs.next_p; node_p != &chunk_seqs; node_p = node_p->next_p) {
	seq_p = (chunk_seq_t *)node_p->data;
    if (seq_p->obj_p->parent_obj_p != parent_p)
      continue;

	/* set i */
    for(i = 0; 
		i < seq_p->chunks_cnt && seq_p->chunks_p[i].parent_offs < offs; 
		i++)
		/* move to next */	;

    for(cnt = offs_idx; 
		(cnt > 0 && i < seq_p->chunks_cnt &&
			seq_p->chunks_p[i].parent_offs == offs); 
		i++, cnt--)
			/* move to next */;

    if (i >= seq_p->output_idx && 
		i < seq_p->chunks_cnt && 
		seq_p->chunks_p[i].parent_offs == offs) {
      *v_seq_p = seq_p;
      *v_chunk_p = &seq_p->chunks_p[i];
      return (TRUE);
    }
  }

  return (FALSE);
}

static void output_multiplex_obj (FILE *out_p, 
				  refobj_t *obj_p,
				  int length)
{
  int offs;
  int offs_idx;
  int next_offs;
  int progress;
  int end_offs;
  chunk_seq_t *seq_p;
  chunk_node_t *chunk_p;

  offs = obj_p->output_offs;
  end_offs = offs + length;
  offs_idx = 0;

  do {
    progress = FALSE;
    next_offs = find_next_child_offset(obj_p, offs);

    if (offs < next_offs) {
      if (offs == end_offs)
	     break;
      if (next_offs > end_offs)
	     next_offs = end_offs;

      output_multiplex_chk(out_p, obj_p, next_offs - offs);
      progress = TRUE;
      offs = next_offs;
      offs_idx = 0;
    } else {
      /* attempt to find next chunk */
      progress = find_child_chunk(obj_p, offs, offs_idx, &seq_p, &chunk_p);
      if (!progress) {
	    offs_idx++;
	    progress = find_child_chunk(obj_p, offs, offs_idx, &seq_p, &chunk_p);
      }
      
      if (progress) {
	    output_multiplex_obj(out_p, seq_p->obj_p, chunk_p->length);
	    seq_p->output_idx++;
      }
    }
  } while (progress);

}

/* output multiplexed */
static void output_multiplex (FILE *out_p)
{
  /* output header */
  fprintf(out_p, "Content-Type: %s; type=%s\r\n", 
	      TYPE_MULTIPLEXED, root_obj_p->ext_p->type_p);
  fprintf(out_p, "\r\n");

  prep_chunk_seqs();
  output_multiplex_obj (out_p, root_obj_p, get_obj_len(root_obj_p));
  destroy_chunk_seqs();
  fprintf(out_p, "CHK 0 0 LAST\r\n");
}

/* match assignment option */
static const char *match_opt_assign (int *argc_p, 
				     char *argv_p[], 
				     const char *opt_p) 
{
  int argc = *argc_p, i;
  const char *val_p;

  for (i = 1; i < argc; i++) {
    if (strncmp(argv_p[i], opt_p, strlen(opt_p)) == 0) {
      val_p = argv_p[i] + strlen(opt_p);
      for (i++; i < argc; i++)
	     argv_p[i-1] = argv_p[i];
      *argc_p = argc - 1;
      return (val_p);
    }
  }
  return (NULL);
}

/* match assignment option */
static int find_opt (int *argc_p, char *argv_p[], const char *opt_p) 
{
  int argc = *argc_p, i;
  const char *val_p;

  for (i = 1; i < argc; i++) {
    if (strcmp(argv_p[i], opt_p) == 0) {
      val_p = argv_p[i] + strlen(opt_p);
      for (i++; i < argc; i++)
	     argv_p[i-1] = argv_p[i];
      *argc_p = argc - 1;
      return (TRUE);
    }
  }
  return (FALSE);
}

/* init */
static void init (void)
{
  list_init(&(refobjs));
 
  scan_file_p = NULL;
  scan_obj_p = NULL;
  root_obj_p = NULL;
  filename_base_p = NULL;
  obj_cnt = 0;
  msg_number = 0;
}

/* cleanup */
static void cleanup (void)
{
  list_node_t *node_p;
  refobj_t *obj_p ;
  interleave_group_t *group_p;

  while (refobjs.next_p != &refobjs) {
	node_p = refobjs.next_p; 
    list_unlink(node_p);
	obj_p = (refobj_t*)node_p->data;
	free(node_p);
    free(obj_p->file_p);
    free(obj_p->content_hdr_p);
    free(obj_p);
  }

  while(interleave_groups.next_p != &interleave_groups) {
	node_p = interleave_groups.next_p;
    list_unlink(node_p);
	group_p = (interleave_group_t *)node_p->data;
	free(node_p);
    free(group_p->urls_p[0]);
    free(group_p->urls_p);
    free(group_p);
  }

  if (defaultBaseP) 
    free(defaultBaseP);

}

static void create_interleave_group (const char *urls_p)
{
  interleave_group_t *group_p;
  list_node_t * node_p;

  int cnt;
  int len;

  const char *str_p, *next_p;
  char *buf_p;

  /* count the number of URLs and string length */
  str_p = urls_p;
  cnt = 0;
  len = 0;
  do {
    next_p = strchr(str_p, ',');
    cnt++;
    if (next_p != NULL)
      len += (next_p - str_p) + 1;
    else
      len += strlen(str_p) + 1;
    if (next_p != NULL)
      str_p = next_p + 1;
    else
      str_p = NULL;
  } while (str_p != NULL);

  group_p = (interleave_group_t*) malloc(sizeof(interleave_group_t));
  assert(group_p);
  group_p->urls_cnt = cnt;
  group_p->urls_p = (char**) malloc(sizeof(const char*) * cnt);
  assert(group_p->urls_p);
  buf_p = (char*) malloc(len);

  str_p = urls_p;
  cnt = 0;
  do {
    next_p = strchr(str_p, ',');
    if (next_p != NULL)
      len = (next_p - str_p) + 1;
    else
      len = strlen(str_p) + 1;

    group_p->urls_p[cnt] = buf_p;

    strncpy(group_p->urls_p[cnt], str_p, len - 1);
    group_p->urls_p[cnt][len - 1] = '\0';
    
    buf_p += len;
    cnt++;

    if (next_p != NULL)
      str_p = next_p + 1;
    else
      str_p = NULL;
  } while (str_p != NULL);

  node_p = (list_node_t*)malloc(sizeof(list_node_t));
  node_p->data = (void *)group_p;
  list_insert_before(&interleave_groups, node_p);
}

/* main */
int main (int argc, char *argv_p[])
{
  const char *base_opt_p;
  const char *output_prefix_p;
  int i, base_len;
  int to_stdout = TRUE;
  int base_on = TRUE;
  int content_disp_option = FALSE;
  const char *group_p;



  list_init((&(interleave_groups)));

  defaultBaseP = NULL;
  if((base_opt_p = match_opt_assign(&argc, argv_p, "-base=")) != NULL) {
    base_len = strlen(base_opt_p) + 1;
    if(URL_abs_e != URL_check_url((unsigned char*)base_opt_p)) {
      fprintf(stderr, "Failed to set base uri (%s)\n", base_opt_p);
      base_opt_p = NULL;
    }
  }

  if(find_opt(&argc, argv_p, "-encode"))
	  encodeHeaders = TRUE;


  Content_Disposition = (char *)match_opt_assign(&argc, argv_p, "-content-disp=");

  if (find_opt(&argc, argv_p, "-cid"))
	  IncludeContentID = 1;
 
  reuseMsgNumbers = find_opt(&argc, argv_p, "-reusemsg");

  ForceBeforeRoot = find_opt(&argc, argv_p, "-force-before-root");
  OneObj = find_opt(&argc, argv_p, "-one-obj");

  output_prefix_p = match_opt_assign(&argc, argv_p, "-output-prefix=");


  /* extract interleave groups */
  while ((group_p = match_opt_assign(&argc, argv_p, "-interleave=")) != NULL) {
      /* group should be a comma seperated list of objects */
      create_interleave_group(group_p);
  }

  to_stdout = (argc == 2);
  if (argc > 1 && !strcmp(argv_p[argc - 1], "-"))
    --argc, to_stdout = TRUE;

  for (i = 1; i < argc; i++) {
    init();
    add_url(argv_p[i], argv_p[i] + strlen(argv_p[i]), TRUE, 0);

    if(base_opt_p) {
      defaultBaseP = malloc(base_len);
      URL_extract_base((unsigned char*)base_opt_p, 
		       (unsigned char*)defaultBaseP, base_len);
    }

    if (root_obj_p) {
      FILE *out_p = stdout;
      scan_objs();
      if (!to_stdout) {
		char output_name[512];
		sprintf(output_name, "%s%s%s.%s", 
				output_prefix_p ? output_prefix_p : "", 
				(output_prefix_p && output_prefix_p[strlen(output_prefix_p) - 1] != '/') ? "/" : "",
				argv_p[i],  "mx");
		if ((out_p = fopen(output_name, "wb")) == NULL)
			fprintf(stderr, "Failed to open output file (%s)\n", output_name);
	  }

     if (out_p) {
#if defined (_WIN32)
		int result;

		result = _setmode(_fileno(stdout), _O_BINARY);
		if(result == -1)
			perror("Cannot set binary mode, using default text mode for stdout.");  
#endif
		output_multiplex(out_p);

		fflush(out_p);
		if (!to_stdout)
		  fclose(out_p);
	 }
   }
    else
      fprintf(stderr, "No root obj?! (%s)\n", argv_p[i]);

    if(defaultBaseP) {
      free(defaultBaseP);
      defaultBaseP = NULL;
    }
    cleanup();
  }

  return (0);
}

struct EntityData {
	char * entity_name;
	int codept;
};
struct EntityData EntityDataLookupTable[] = {
	"aacute",	225,	//  latin small letter a with acute, U+00E1 ISOlat1
	"acirc",	226,	//  latin small letter a with circumflex, U+00E2 ISOlat1
	"acute",	180,	//  acute accent = spacing acute, U+00B4 ISOdia
	"aelig",	230,	//  latin small letter ae = latin small ligature ae, U+00E6 ISOlat1
	"agrave",	224,	//  latin small letter a with grave = latin small letter a grave, U+00E0 ISOlat1
	"alefsym",	8501,	//  alef symbol = first transfinite cardinal, U+2135 NEW
	"alpha",	945,	//  greek small letter alpha, U+03B1 ISOgrk3
	"amp",		 38,	//  ampersand, U+0026 ISOnum
	"and",		8743,	//  logical and = wedge, U+2227 ISOtech
	"ang",		8736,	//  angle, U+2220 ISOamso
	"apos",		 39,	//  apostrophe = APL quote, U+0027 ISOnum
	"aring",	229,	//  latin small letter a with ring above = latin small letter a ring, U+00E5 ISOlat1
	"asymp",	8776,	//  almost equal to = asymptotic
	"atilde",	 227,	//  latin small letter a with tilde, U+00E3 ISOlat1
	"auml",		228,	//  latin small letter a with diaeresis, U+00E4 ISOlat1
	"bdquo",	8222,	//  double low-9 quotation mark, U+201E NEW
	"beta",		946,	//  greek small letter beta, U+03B2 ISOgrk3
	"brvbar",	166,	//  broken bar = broken vertical bar, U+00A6 ISOnum
	"bull",		8226,	//  bullet = black small circle, U+2022 ISOpub
	"cap",		8745,	//  intersection = cap, U+2229 ISOtech
	"ccedil",	231,	//  latin small letter c with cedilla, U+00E7 ISOlat1
	"cedil",	184,	//  cedilla = spacing cedilla, U+00B8 ISOdia
	"cent",		162,	//  cent sign, U+00A2 ISOnum
	"chi",		967,	//  greek small letter chi, U+03C7 ISOgrk3
	"circ",		710,	//  modifier letter circumflex accent, U+02C6 ISOpub
	"clubs",	9827,	//  black club suit = shamrock, U+2663 ISOpub
	"cong",		8773,	//  approximately equal to, U+2245 ISOtech
	"copy",		169,	//  copyright sign, U+00A9 ISOnum
	"crarr",	8629,	//  downwards arrow with corner leftwards = carriage return, U+21B5 NEW
	"cup",		8746,	//  union = cup, U+222A ISOtech
	"curren",	164,	//  currency sign, U+00A4 ISOnum
	"dArr",		8659,	//  downwards double arrow, U+21D3 ISOamsa
	"dagger",	8224,	//  dagger, U+2020 ISOpub
	"darr",		8595,	//  downwards arrow, U+2193 ISOnum
	"deg",		176,	//  degree sign, U+00B0 ISOnum
	"delta",	948,	//  greek small letter delta, U+03B4 ISOgrk3
	"diams",	9830,	//  black diamond suit, U+2666 ISOpub
	"divide",	247,	//  division sign, U+00F7 ISOnum
	"eacute",	233,	//  latin small letter e with acute, U+00E9 ISOlat1
	"ecirc",	234,	//  latin small letter e with circumflex, U+00EA ISOlat1
	"egrave",	232,	//  latin small letter e with grave, U+00E8 ISOlat1
	"empty",	8709,	//  empty set = null set, U+2205 ISOamso
	"emsp",		8195,	//  em space, U+2003 ISOpub
	"ensp",		8194,	//  en space, U+2002 ISOpub
	"epsilon",	949,	//  greek small letter epsilon, U+03B5 ISOgrk3
	"equiv",	8801,	//  identical to, U+2261 ISOtech
	"eta",		951,	//  greek small letter eta, U+03B7 ISOgrk3
	"eth",		240,	//  latin small letter eth, U+00F0 ISOlat1
	"euml",		235,	//  latin small letter e with diaeresis, U+00EB ISOlat1
	"euro",		8364,	//  euro sign, U+20AC NEW
	"exist",	8707,	//  there exists, U+2203 ISOtech
	"fnof",		402,	//  latin small letter f with hook =
	"forall",	8704,	//  for all, U+2200 ISOtech
	"frac12",	189,	//  vulgar fraction one half = fraction one half, U+00BD ISOnum
	"frac14",	188,	//  vulgar fraction one quarter = fraction one quarter, U+00BC ISOnum
	"frac34",	190,	//  vulgar fraction three quarters = fraction three quarters, U+00BE ISOnum
	"frasl",	8260,	//  fraction slash, U+2044 NEW
	"gamma",	947,	//  greek small letter gamma, U+03B3 ISOgrk3
	"ge",		8805,	//  greater-than or equal to, U+2265 ISOtech
	"gt",		 62,	//  greater-than sign, U+003E ISOnum
	"hArr",		8660,	//  left right double arrow, U+21D4 ISOamsa
	"harr",		8596,	//  left right arrow, U+2194 ISOamsa
	"hearts",	9829,	//  black heart suit = valentine, U+2665 ISOpub
	"hellip",	8230,	//  horizontal ellipsis = three dot leader, U+2026 ISOpub
	"iacute",	237,	//  latin small letter i with acute, U+00ED ISOlat1
	"icirc",	238,	//  latin small letter i with circumflex, U+00EE ISOlat1
	"iexcl",	161,	//  inverted exclamation mark, U+00A1 ISOnum
	"igrave",	236,	//  latin small letter i with grave, U+00EC ISOlat1
	"image",	8465,	//  black-letter capital I = imaginary part, U+2111 ISOamso
	"infin",	8734,	//  infinity, U+221E ISOtech
	"int",		8747,	//  integral, U+222B ISOtech
	"iota",		953,	//  greek small letter iota, U+03B9 ISOgrk3
	"iquest",	191,	//  inverted question mark = turned question mark, U+00BF ISOnum
	"isin",		8712,	//  element of, U+2208 ISOtech
	"iuml",		239,	//  latin small letter i with diaeresis, U+00EF ISOlat1
	"kappa",	954,	//  greek small letter kappa, U+03BA ISOgrk3
	"lArr",		8656,	//  leftwards double arrow, U+21D0 ISOtech
	"lambda",	955,	//  greek small letter lamda, U+03BB ISOgrk3
	"lang",		9001,	//  left-pointing angle bracket =
	"laquo",	171,	//  left-pointing double angle quotation mark = left pointing guillemet, U+00AB ISOnum
	"larr",		8592,	//  leftwards arrow, U+2190 ISOnum
	"lceil",	8968,	//  left ceiling = APL upstile, U+2308 ISOamsc
	"ldquo",	8220,	//  left double quotation mark, U+201C ISOnum
	"le",		8804,	//  less-than or equal to, U+2264 ISOtech
	"lfloor",	8970,	//  left floor = APL downstile, U+230A ISOamsc
	"lowast",	8727,	//  asterisk operator, U+2217 ISOtech
	"loz",		9674,	//  lozenge, U+25CA ISOpub
	"lrm",		8206,	//  left-to-right mark, U+200E NEW RFC 2070
	"lsaquo",	8249,	//  single left-pointing angle quotation mark, U+2039 ISO proposed
	"lsquo",	8216,	//  left single quotation mark, U+2018 ISOnum
	"lt",		 38,	//  less-than sign, U+003C ISOnum
	"macr",		175,	//  macron = spacing macron = overline = APL overbar, U+00AF ISOdia
	"mdash",	8212,	//  em dash, U+2014 ISOpub
	"micro",	181,	//  micro sign, U+00B5 ISOnum
	"middot",	183,	//  middle dot = Georgian comma = Greek middle dot, U+00B7 ISOnum
	"minus",	8722,	//  minus sign, U+2212 ISOtech
	"mu",		956,	//  greek small letter mu, U+03BC ISOgrk3
	"nabla",	8711,	//  nabla = backward difference, U+2207 ISOtech
	"nbsp",		160,	//  no-break space = non-breaking space, U+00A0 ISOnum
	"ndash",	8211,	//  en dash, U+2013 ISOpub
	"ne",		8800,	//  not equal to, U+2260 ISOtech
	"ni",		8715,	//  contains as member, U+220B ISOtech
	"not",		172,	//  not sign = angled dash, U+00AC ISOnum
	"notin",	8713,	//  not an element of, U+2209 ISOtech
	"nsub",		8836,	//  not a subset of, U+2284 ISOamsn
	"ntilde",	241,	//  latin small letter n with tilde, U+00F1 ISOlat1
	"nu",		957,	//  greek small letter nu, U+03BD ISOgrk3
	"oacute",	243,	//  latin small letter o with acute, U+00F3 ISOlat1
	"ocirc",	244,	//  latin small letter o with circumflex, U+00F4 ISOlat1
	"oelig",	339,	//  latin small ligature oe, U+0153 ISOlat2
	"ograve",	242,	//  latin small letter o with grave, U+00F2 ISOlat1
	"oline",	8254,	//  overline = spacing overscore, U+203E NEW
	"omega",	969,	//  greek small letter omega, U+03C9 ISOgrk3
	"omicron",	959,	//  greek small letter omicron, U+03BF NEW
	"oplus",	8853,	//  circled plus = direct sum, U+2295 ISOamsb
	"or",		8744,	//  logical or = vee, U+2228 ISOtech
	"ordf",		170,	//  feminine ordinal indicator, U+00AA ISOnum
	"ordm",		186,	//  masculine ordinal indicator, U+00BA ISOnum
	"oslash",	248,	//  latin small letter o with stroke, = latin small letter o slash, U+00F8 ISOlat1
	"otilde",	245,	//  latin small letter o with tilde, U+00F5 ISOlat1
	"otimes",	8855,	//  circled times = vector
	"ouml",		246,	//  latin small letter o with diaeresis, U+00F6 ISOlat1
	"para",		182,	//  pilcrow sign = paragraph sign, U+00B6 ISOnum
	"part",		8706,	//  partial differential, U+2202 ISOtech
	"permil",	8240,	//  per mille sign, U+2030 ISOtech
	"perp",		8869,	//  up tack = orthogonal to =
	"phi",		966,	//  greek small letter phi, U+03C6 ISOgrk3
	"pi",		960,	//  greek small letter pi, U+03C0 ISOgrk3
	"piv",		982,	//  greek pi symbol, U+03D6 ISOgrk3
	"plusmn",	177,	//  plus-minus sign = plus-or-minus sign, U+00B1 ISOnum
	"pound",	163,	//  pound sign, U+00A3 ISOnum
	"prime",	8242,	//  prime = minutes = feet, U+2032 ISOtech
	"prod",		8719,	//  n-ary product = product sign, U+220F ISOamsb
	"prop",		8733,	//  proportional to, U+221D ISOtech
	"psi",		968,	//  greek small letter psi, U+03C8 ISOgrk3
	"quot",		 34,	//  quotation mark, U+0022 ISOnum
	"rArr",		8658,	//  rightwards double arrow, U+21D2 ISOtech
	"radic",	8730,	//  square root = radical sign, U+221A ISOtech
	"rang",		9002,	//  right-pointing angle bracket =
	"raquo",	187,	//  right-pointing double angle quotation mark = right pointing guillemet, U+00BB ISOnum
	"rarr",		8594,	//  rightwards arrow, U+2192 ISOnum
	"rceil",	8969,	//  right ceiling, U+2309 ISOamsc
	"rdquo",	8221,	//  right double quotation mark, U+201D ISOnum
	"real",		8476,	//  black-letter capital R = real part symbol, U+211C ISOamso
	"reg",		174,	//  registered sign = registered trade mark sign, U+00AE ISOnum
	"rfloor",	8971,	//  right floor, U+230B ISOamsc
	"rho",		961,	//  greek small letter rho, U+03C1 ISOgrk3
	"rlm",		8207,	//  right-to-left mark, U+200F NEW RFC 2070
	"rsaquo",	8250,	//  single right-pointing angle quotation mark, U+203A ISO proposed
	"rsquo",	8217,	//  right single quotation mark, U+2019 ISOnum
	"sbquo",	8218,	//  single low-9 quotation mark, U+201A NEW
	"scaron",	353,	//  latin small letter s with caron, U+0161 ISOlat2
	"sdot",		8901,	//  dot operator, U+22C5 ISOamsb
	"sect",		167,	//  section sign, U+00A7 ISOnum
	"shy",		173,	//  soft hyphen = discretionary hyphen, U+00AD ISOnum
	"sigma",	963,	//  greek small letter sigma, U+03C3 ISOgrk3
	"sigmaf",	962,	//  greek small letter final sigma,U+03C2 ISOgrk3
	"sim",		8764,	//  tilde operator = varies with = similar
	"spades",	9824,	//  black spade suit, U+2660 ISOpub
	"sub",		8834,	//  subset of, U+2282 ISOtech
	"sube",		8838,	//  subset of or equal to, U+2286 ISOtech
	"sum",		8721,	//  n-ary summation, U+2211 ISOamsb
	"sup1",		185,	//  superscript one = superscript digit one, U+00B9 ISOnum
	"sup2",		178,	//  superscript two = superscript digit two = squared, U+00B2 ISOnum
	"sup3",		179,	//  superscript three = superscript digit three = cubed, U+00B3 ISOnum
	"sup",		8835,	//  superset of, U+2283 ISOtech
	"supe",		8839,	//  superset of or equal to, U+2287 ISOtech
	"szlig",	223,	//  latin small letter sharp s = ess-zed, U+00DF ISOlat1
	"tau",		964,	//  greek small letter tau, U+03C4 ISOgrk3
	"there4",	8756,	//  therefore, U+2234 ISOtech
	"theta",	952,	//  greek small letter theta, U+03B8 ISOgrk3
	"thetasym",	977,	//  greek theta symbol, U+03D1 NEW
	"thinsp",	8201,	//  thin space, U+2009 ISOpub
	"thorn",	254,	//  latin small letter thorn, U+00FE ISOlat1
	"tilde",	732,	//  small tilde, U+02DC ISOdia
	"times",	215,	//  multiplication sign, U+00D7 ISOnum
	"trade",	8482,	//  trade mark sign, U+2122 ISOnum
	"uArr",		8657,	//  upwards double arrow, U+21D1 ISOamsa
	"uacute",	250,	//  latin small letter u with acute, U+00FA ISOlat1
	"uarr",		8593,	//  upwards arrow, U+2191
	"ucirc",	251,	//  latin small letter u with circumflex, U+00FB ISOlat1
	"ugrave",	249,	//  latin small letter u with grave, U+00F9 ISOlat1
	"uml",		168,	//  diaeresis = spacing diaeresis, U+00A8 ISOdia
	"upsih",	978,	//  greek upsilon with hook symbol,U+03D2 NEW
	"upsilon",	965,	//  greek small letter upsilon, U+03C5 ISOgrk3
	"uuml",		252,	//  latin small letter u with diaeresis, U+00FC ISOlat1
	"weierp",	8472,	//  script capital P = power set = Weierstrass p, U+2118 ISOamso
	"xi",		958,	//  greek small letter xi, U+03BE ISOgrk3
	"yacute",	253,	//  latin small letter y with acute, U+00FD ISOlat1
	"yen",		165,	//  yen sign = yuan sign, U+00A5 ISOnum
	"yuml",		255,	//  latin small letter y with diaeresis, U+00FF ISOlat1
	"zeta",		950,	//  greek small letter zeta, U+03B6 ISOgrk3
	"zwj",		8205,	//  zero width joiner, U+200D NEW RFC 2070
	"zwnj",		8204	//  zero width non-joiner, U+200C NEW RFC 2070
};
#define END(v) (v-1 + sizeof v / sizeof v[0])

int lookupCharRef(unsigned char *ref) /* compare chars starting at ref until the ';' char */
{
	struct EntityData *low  = EntityDataLookupTable;
	struct EntityData *high = END(EntityDataLookupTable);
	struct EntityData *mid;
	int len;
	char *p;
	int comparison;

	for (p= (char *)ref, len=0; *p != ';'; p++)
		len++;

	// binary search in table 
	while (low <= high) {
		mid = low + (high - low)/2;
		if((comparison = strncmp(mid->entity_name, (char *)ref, len)) == 0)
			return mid->codept;
		else if (comparison < 0)
			low = mid + 1;
		else
			high = mid - 1;
	}
	return -1; // unknown entity
}


/* convert a hex number to a 4bit nibble */

int hex2nib(const unsigned char c) 
{
  int nib;

  if('0' <= c && c <= '9') 
    nib = c - '0';
  else if('a' <= c && c <= 'f')
    nib = c - 'a' + 0xa;
  else if('A' <= c && c <= 'F')
    nib = c - 'A' + 0xa;
  else
    nib = -1;

  return (nib);
}

/* 
// convert a potentially escaped url to plain text 
// also converts number and character references to text.
*/
int URL_convert_to_plain (const unsigned char *escaped_p,
			  unsigned char *plain_p,
			  int max_size)
{
  unsigned char *e = (unsigned char*)escaped_p;
  unsigned char *p = plain_p;
  int high, low;
  int url_len = 0;

  assert(escaped_p && plain_p && max_size);

  /* Now, unescape everything after the scheme. */  while(*e != '\0') {
    /* % HEX HEX */ 
    if(*e == '%') {
      /* Get the high nibble of the byte. */
      high = hex2nib(e[1]);
      if(high == -1) { 
  	     url_len = 0;
	     break;
      }

      /* Get the low nibble of the byte. */
      low = hex2nib(e[2]);
      if(low == -1) {
	     url_len = 0;
	     break;
      }

      *p++ = (high << 4) | low;
      url_len++;
      e+= 3;
    } if (*e == '&') {
		/* either a numeric or character reference */
		if (*(++e) == '#') {
			/* numeric reference, convert to code point */

			if (*(++e) == 'x') { // hex digits
				/* Get the high nibble of the byte. */
				high = hex2nib(*(++e));
				if(high == -1) { 
					url_len = 0;
					break;
				}
			
				/* Get the low nibble of the byte. */
				low = hex2nib(*(++e));
				if(low == -1) {
					url_len = 0;
					break;
				}
			
				*p++ = (high << 4) | low;

			} else {  // decimal number
				low = atoi((char *)e);
				*p++ = low;
			}

		} else {
			/* lookup a character reference */
			low = lookupCharRef(++e);
			if(low == -1) {
				url_len = 0;
				break;
			}
			*p++ = low;
		}
		url_len++;
		/* move to ending ';' */
		while(*e && (*e != ';'))
			e++;
		/* move passed the ';' */
		if( *e )
			e++;
		
    } else {
      *p++ = *e++;
      url_len++;
    }

    /* Reached maximum capacity, so just stop here. */
    if((p - plain_p) >= max_size) {
      url_len = max_size;
      break;
    }
  }

  plain_p[url_len] = '\0';

  return (url_len);
}

typedef struct url_components_t {
  unsigned char *scheme_p;     /* Scheme component    */
  unsigned char *auth_p;       /* Authority component */   
  unsigned char *path_p;       /* Path component      */
  unsigned char *query_p;      /* Query component     */
  unsigned char *frag_p;       /* Fragment            */
  unsigned char *end_p;        /* End pointer         */
  unsigned char *last_slash_p; /* Last path segment   */
  unsigned char *opaque_p;     /* Opaque part         */
} URL_components_t;

/* Valid url characters */
#define IS_URL_CHARS(ch) \
	((ch >= '*' && ch <= '~') || \
	 ch == '!' || ch == '#' || ch == '$' || ch == '%' || ch == '&')

/* Look for ".." or "." as a complete segment. */
#define URL_COUNT_DOTS(ss_p,end_p) \
dot_count = 0; \
for(dot_p=ss_p; dot_p<=end_p; dot_p++) { \
  if(*dot_p != '.' || ++dot_count > 2) { \
    if(*dot_p != '/') \
      dot_count = 0; \
	break; \
  }\
}

/* decompse a url into its components */
URL_type_t assign_components(URL_components_t *comps, const unsigned char *url_p)
{
  unsigned char *p = (unsigned char *)url_p;
  URL_type_t url_type = URL_any_e;

  assert(comps && url_p);

  /* Check for a scheme. */
  if(isalpha(*p)) {
    while(*p != '\0') {
      if(*p == ':') {
	comps->scheme_p = p;
	p++;
	url_type == URL_abs_e;
	break;
      }

      if(!isalnum(*p) && *p != '+' && *p != '-' && *p != '.') {
	p = (unsigned char *)url_p;
	break;
      }
      p++;
    }
  }

  /* No scheme exists, reset pointer to beginning. */
  if(!comps->scheme_p)
    p = (unsigned char *)url_p;

  /* Check for authority or path. */
  if(p[0] == '/') {
    if(p[1] == '/') {
      comps->auth_p = p;
      p+=2;
    }
    else {
      comps->path_p = p++;
    }
  }
  else {
    /* If the absolute uri has an opaque part, put all bytes up to the first '/'
       begin path marker into the opaque part.
     */
    if(comps->scheme_p && *p != '\0') {
      comps->opaque_p = p++;
      while(*p != '\0') {
	if(!IS_URL_CHARS(*p))
	  return (URL_illegal_e);
	if(*p == '/')
	  break;
	p++;
      }
    }
    else if(*p != '?' && *p != '#' && *p != '\0') {
      if(!IS_URL_CHARS(*p))
        return (URL_illegal_e);
      comps->path_p = p;
    }
  }

  /* Check for path/query/fragment. */
  while(*p != '\0') {
    if(!IS_URL_CHARS(*p))
      return (URL_illegal_e);

    switch(*p) {
      case '/':
	if(!comps->path_p && !comps->query_p && !comps->frag_p)
	  comps->path_p = p;
	else
	  comps->last_slash_p = p;
	break;
      case '?':
	if(!comps->query_p && !comps->frag_p)
	  comps->query_p = p;
	break;
      case '#':
	if(!comps->frag_p)
	  comps->frag_p = p;
	break;
    }
    p++;
  }

  comps->end_p = p;
  return (URL_any_e);
}

/* returns the type of the given url, based on its components */
URL_type_t check_type(URL_components_t *comps, const unsigned char *url_p) 
{
  URL_type_t url_type;

  assert(comps && url_p);

  if(*url_p == '\0')
    url_type = URL_any_e;
  else if(comps->scheme_p)
    url_type = URL_abs_e;
  else if(comps->frag_p && !comps->auth_p && !comps->path_p)
    url_type = URL_frag_e;
  else 
    url_type = URL_rel_e;

  return (url_type);
}


/*
 * URL_check_url -- Check the type of the URL
 *
 * Examples of absolute URLs:
 *   "http://example.com/some/text"
 * Examples of relative URLs:
 *   "../images/photo-1.jpg"
 *   ".././images/photo-1.jpg"
 */
URL_type_t URL_check_url (const unsigned char *url_p)
{
  URL_components_t url_comps;
  URL_type_t url_type;

  assert(url_p);

  memset(&url_comps, 0, sizeof(url_comps));
  url_type = assign_components(&url_comps, url_p);
  if(url_type != URL_illegal_e) 
    url_type = check_type(&url_comps, (unsigned char *)url_p);
  return (url_type);
}

/*
 * resolve_relative_path_reference
 *
 * Purpose: Handle rules 6(c-h) of RFC 2396.
 *          c) Remove occurrences of "./" where "." is a complete path segment.
 *          d) Remove trailing complete "." segments.
 *          e) Remove "<segment>/../" segments.
 *          f) Remove trailing complete "<segment>/.." segments.
 *          g) If buffer still begins with "..", then we will just accept it.
 *             This is an error condition, but this is a viable option.
 *          h) The remaining buffer is the new URI's path component.
 *
 * Params: start and end point to the beginning and ending path URI buffer
 *         inclusively.
 *
 * Return: Number of bytes removed from the buffer.
 */
int resolve_relative_path_reference(unsigned char *start, unsigned char *end)
{
  unsigned char *dot_p; /* Dot counter pointer.          */
  unsigned char *cur_p; /* Current pointer.              */
  unsigned char *ss_p;  /* Start segment pointer.        */
  unsigned char *ps_p;  /* Prev segment pointer.         */
  unsigned char *end_p; /* Running end pointer.          */
  int dot_count = 0;     /* How many dot have been found. */
  int other_found = 0;

  assert(start && end && (start <= end) && (*start == '/'));

  ps_p = ss_p = cur_p = start;
  end_p = end;
  cur_p++;
  while(cur_p <= end_p) {
    if (*cur_p == '/') {
      other_found = 0;
      /* Found start of a segment. */
      if(dot_count == 1 || (dot_count == 2 && ps_p != ss_p)) {
	if(dot_count == 1) {
	  /* Remove all occurences of "./", where "." is a complete path segment. */
	  memmove(ss_p, cur_p, (end_p - cur_p)+1);
	  end_p -= 2;
	  cur_p = ss_p;
	}
	else {
	  URL_COUNT_DOTS(ps_p+1, ss_p);
	  if(dot_count != 2) {
	    /* Remove <segment>/../, where <segment> != "..". */
	    memmove(ps_p, cur_p, (end_p - cur_p)+1);
	    end_p -= (cur_p - ps_p);
	    ss_p = ps_p;
	    cur_p = ss_p;

	    /* Find the previous segment. */
	    while(ps_p > start)
	      if(*--ps_p == '/')
		break;
	  }
	  else {
	    ps_p = ss_p;
	    ss_p = cur_p;
	  }
	}
	cur_p++;
      }
      else {
	/* Set the previous segment pointer and continue. */
	ps_p = ss_p;
	ss_p = cur_p++;
      }
      dot_count = 0;
    }
    else if(*cur_p == '.') {
      if(!other_found)
	dot_count++;
      cur_p++;
    }
    else {
      other_found = 1;
      dot_count = 0;
      cur_p++;
    }
  }

  URL_COUNT_DOTS(ss_p+1, end_p);
  if(dot_count == 1) {
    /* Remove "." if at end. */
    end_p -= (end_p - ss_p);
  }
  else if(dot_count == 2 && ps_p) {
    URL_COUNT_DOTS(ps_p+1, ss_p);
    /* Remove <segment>/.. if a end and where <segment> != "..". */
    if(dot_count != 2)
      end_p -= (end_p - ps_p);
  }

  /* Remove all segments of ".." at the beginning. */
  ss_p = cur_p = start;
  cur_p++;
  while(cur_p <= end_p) {
    if (*cur_p == '/') {
      URL_COUNT_DOTS(ss_p+1, cur_p);
      if(dot_count == 2) {
	memmove(ss_p, cur_p, (end_p - cur_p)+1);
	cur_p = ss_p + 1;
	end_p -= 3;
      }
      else {
	break;
      }
    }
    else {
      cur_p++;
    }
  }

  /* Return the byte count that we removed. */
  return (end - end_p);
}

/*
 * _add_component 
 * Purpose: Add the buffer to the current buffer.  If the current buffer
 *          length is larger than the maximum buffer size, stop.
 *
 * Params: start and end point to the beginning and ending path URI buffer
 *         inclusively.
 *         current buffer pointer.
 *         curlen is the current buffer length.
 *         maxlen is the maximum length that the current buffer can be
 *
 * Return: Number of bytes copied.
 */
int add_component(const unsigned char *start, 
		   const unsigned char *end, 
		   unsigned char *current,
		   int curlen, 
		   int maxlen)
{
  int url_len;

  assert(start && end && (start <= end) && current);

  url_len = end - start;
  if((url_len + curlen) > maxlen) {
    url_len = maxlen - curlen;
    curlen = maxlen; 
  }

  if(url_len)
    memcpy(current, start, url_len);

  return (url_len);
}

#define ADD_COMP(start_p,end_p) \
  url_len = add_component((unsigned char*)start_p, (unsigned char*)end_p, up, total_len, max_size); \
  total_len += url_len; \
  up += url_len;


/*
 * URL_combine_base_and_rel *
 * Combine base (base_p) and relative (relative_p) URLs into url_p. 
 * Return: Length of the combined uri, 0 if no length -1 if error.  
 */
int URL_combine_abs_and_rel (const unsigned char *base_p,
			     const unsigned char *relative_p,
			     unsigned char *url_p,
			     int max_size)
{
  URL_components_t base_comps;
  URL_components_t rel_comps;
  unsigned char *up = url_p;
  unsigned char *temp_p, *end_p;
  unsigned char slash_p[] = "/";
  int url_len, total_len;
  URL_type_t url_type;

  memset(&base_comps, 0, sizeof(base_comps));
  memset(&rel_comps, 0, sizeof(rel_comps));
  total_len = url_len = 0;
  *up = '\0';
  
  /* Parse URI base & reference into the potential four components and fragment identifier. */
  if( (URL_illegal_e == assign_components(&base_comps, base_p)) ||
      (URL_illegal_e == assign_components(&rel_comps, relative_p)) ||
      (URL_illegal_e == (url_type = check_type(&rel_comps, (unsigned char*)relative_p))))
    return (-1);

  /* Verify that we can combine the two urls. */
  if((url_type == URL_abs_e) || (URL_abs_e != check_type(&base_comps, (unsigned char *)base_p))) {
    ADD_COMP(relative_p, rel_comps.end_p);
    *up = '\0';
    return (url_len);
  }

  /* Is a reference to the current document? This will return the fragment
     identifier or a zero length uri. 
   */
  if( !rel_comps.scheme_p && !rel_comps.path_p && !rel_comps.auth_p  &&
      !rel_comps.opaque_p && !rel_comps.query_p) 
  { 
    ADD_COMP(relative_p, rel_comps.end_p);
    *up = '\0';
    return (url_len);
  }

  ADD_COMP(base_p, base_comps.scheme_p+1);

  /* If the authority component is defined. */
  if(!rel_comps.auth_p) {

    if((temp_p = (base_comps.auth_p ? 
		  base_comps.auth_p : 
		  (base_comps.opaque_p ? 
		   base_comps.opaque_p : 
		   NULL))))
    {
      end_p = (base_comps.path_p ? 
	       base_comps.path_p : 
	       (base_comps.query_p ? 
		base_comps.query_p : 
		(base_comps.frag_p ? 
		 base_comps.frag_p : 
		 base_comps.end_p)));
      ADD_COMP(temp_p, end_p);
    }
	
		
    /* If the path is a network-path or absolute-path then heirarchical. */
    if(rel_comps.path_p && *rel_comps.path_p != '/') {
      /* The relative path needs to be merged with the base URI's path. */
      temp_p = up;

      /* Add the path delimiter if it dne add it. */
      if(!((base_comps.path_p && *base_comps.path_p == '/') || *rel_comps.path_p == '/')) {
           url_len = 
				add_component((unsigned char*)slash_p, (unsigned char*)(slash_p+1), 
				              up, total_len, max_size); 
           total_len += url_len; 
           up += url_len;
     }

      /* All but the last segment is copied. So, include the last slash, but
	 no more. 
       */
      if(base_comps.path_p) {
	end_p = (base_comps.last_slash_p ? 
		 (base_comps.last_slash_p + 1) :
		 (base_comps.query_p ? 
		  base_comps.query_p :
		  (base_comps.frag_p ? 
		   base_comps.frag_p : 
		   base_comps.end_p))); 
	ADD_COMP(base_comps.path_p, end_p);
      }

      /* The reference path component is appended. */
      end_p = (rel_comps.query_p ? 
	       rel_comps.query_p : 
	       (rel_comps.frag_p ? 
		rel_comps.frag_p : 
		rel_comps.end_p));
      ADD_COMP(rel_comps.path_p, end_p);

      url_len = resolve_relative_path_reference(temp_p, up-1);
      total_len-= url_len;
      up-= url_len;

      /* The remaining buffer is the new URI's path component. */
    }
  }

  /* Add the authority component if it exists. */
  if(rel_comps.auth_p) {
    end_p = (rel_comps.path_p ? 
	     rel_comps.path_p : 
	     (rel_comps.query_p ? 
	      rel_comps.query_p : 
	      (rel_comps.frag_p ? 
	       rel_comps.frag_p : 
	       rel_comps.end_p)));
    ADD_COMP(rel_comps.auth_p, end_p);
  }

  /* Add the relative path if it is an absolute path. */
  if(rel_comps.path_p && *rel_comps.path_p == '/') {
    end_p = (rel_comps.query_p ? 
	     rel_comps.query_p :
	     (rel_comps.frag_p ? 
	      rel_comps.frag_p : 
	      rel_comps.end_p));
    ADD_COMP(rel_comps.path_p, end_p);
  }

  /* Add the relative query path including any fragment.
     Add the base path is the relative path dne. 
     Add the path delimiter if it dne.
   */
  if(rel_comps.query_p) {
    if(!rel_comps.path_p && base_comps.path_p) {
      if(*base_comps.path_p != '/') {
	ADD_COMP(slash_p, (slash_p+1));
      }
      end_p = (base_comps.last_slash_p ? 
	       base_comps.last_slash_p + 1:
	       (base_comps.query_p ? 
		base_comps.query_p :
		(base_comps.frag_p ? 
		 base_comps.frag_p : 
		 base_comps.end_p))); 
      ADD_COMP(base_comps.path_p, end_p);
    }
    end_p = rel_comps.frag_p ? rel_comps.frag_p : rel_comps.end_p;
    ADD_COMP(rel_comps.query_p, end_p);
  }

  /* Add the relative fragment path including any fragment. */
  if(rel_comps.frag_p) {
    ADD_COMP(rel_comps.frag_p, rel_comps.end_p);
  }

  *up = '\0';
  return (total_len);
}

/*
 * URL_extract_base -- Extract base URL from url_p (plain)
 * Extract base URL from url_p (must be escaped) into base_p (buffer max-size
 * bytes). It is only safe for the buffers to overlap at the beginning
 * (url_p == base_p).
 *
 * Return: Length of the extracted base uri, zero if no lenght of error.
 */
int URL_extract_base (const unsigned char *url_p,
		      unsigned char *base_p, 
		      int max_size)
{
  unsigned char *end_p;
  URL_components_t base_comps;
  URL_type_t url_type = URL_any_e;
  int url_len = 0;

  assert(url_p && base_p && max_size);

  memset(&base_comps, 0, sizeof(base_comps));

  url_type = assign_components(&base_comps, url_p);
  if(url_type != URL_illegal_e)
    url_type = check_type(&base_comps, (unsigned char *)url_p);

  if(url_type != URL_abs_e) 
    return (0);

  end_p = (base_comps.query_p ? base_comps.query_p :
          (base_comps.frag_p ? base_comps.frag_p : base_comps.end_p));
  url_len = end_p - url_p;
  if(url_len > max_size) 
    url_len = max_size;
  memcpy(base_p, url_p, url_len); 
  base_p[url_len] = '\0';

  return (url_len);
}