/* tags.c -- recognize HTML tags
(c) 1998-2000 (W3C) MIT, INRIA, Keio University
See tidy.c for the copyright notice.
The HTML tags are stored as 8 bit ASCII strings.
Use lookupw() to find a tag given a wide char string.
*/
#include "platform.h" /* platform independent stuff */
#include "html.h" /* to pull in definition of nodes */
#define HASHSIZE 357
extern Bool XmlTags;
Dict *tag_html;
Dict *tag_head;
Dict *tag_title;
Dict *tag_base;
Dict *tag_meta;
Dict *tag_body;
Dict *tag_frameset;
Dict *tag_frame;
Dict *tag_noframes;
Dict *tag_hr;
Dict *tag_h1;
Dict *tag_h2;
Dict *tag_pre;
Dict *tag_listing;
Dict *tag_p;
Dict *tag_ul;
Dict *tag_ol;
Dict *tag_dl;
Dict *tag_dir;
Dict *tag_li;
Dict *tag_dt;
Dict *tag_dd;
Dict *tag_td;
Dict *tag_th;
Dict *tag_tr;
Dict *tag_col;
Dict *tag_br;
Dict *tag_a;
Dict *tag_link;
Dict *tag_b;
Dict *tag_i;
Dict *tag_strong;
Dict *tag_em;
Dict *tag_big;
Dict *tag_small;
Dict *tag_param;
Dict *tag_option;
Dict *tag_optgroup;
Dict *tag_img;
Dict *tag_map;
Dict *tag_area;
Dict *tag_nobr;
Dict *tag_wbr;
Dict *tag_font;
Dict *tag_layer;
Dict *tag_spacer;
Dict *tag_center;
Dict *tag_style;
Dict *tag_script;
Dict *tag_noscript;
Dict *tag_table;
Dict *tag_caption;
Dict *tag_form;
Dict *tag_textarea;
Dict *tag_blockquote;
Dict *tag_applet;
Dict *tag_object;
Dict *tag_div;
Dict *tag_span;
Dict *xml_tags; /* dummy for xml tags */
static Dict *hashtab[HASHSIZE];
static struct tag
{
char *name;
unsigned versions;
unsigned model;
Parser *parser;
CheckAttribs *chkattrs;
} tags[] =
{
{"html", (VERS_ALL|VERS_FRAMES), (CM_HTML|CM_OPT|CM_OMITST), ParseHTML, CheckHTML},
{"head", (VERS_ALL|VERS_FRAMES), (CM_HTML|CM_OPT|CM_OMITST), ParseHead, null},
{"title", (VERS_ALL|VERS_FRAMES), CM_HEAD, ParseTitle, null},
{"base", (VERS_ALL|VERS_FRAMES), (CM_HEAD|CM_EMPTY), null, null},
{"link", (VERS_ALL|VERS_FRAMES), (CM_HEAD|CM_EMPTY), null, CheckLINK},
{"meta", (VERS_ALL|VERS_FRAMES), (CM_HEAD|CM_EMPTY), null, null},
{"style", (VERS_FROM32|VERS_FRAMES), CM_HEAD, ParseScript, CheckSTYLE},
{"script", (VERS_FROM32|VERS_FRAMES), (CM_HEAD|CM_MIXED|CM_BLOCK|CM_INLINE), ParseScript, CheckSCRIPT},
{"server", VERS_NETSCAPE, (CM_HEAD|CM_MIXED|CM_BLOCK|CM_INLINE), ParseScript, null},
{"body", VERS_ALL, (CM_HTML|CM_OPT|CM_OMITST), ParseBody, null},
{"frameset", VERS_FRAMES, (CM_HTML|CM_FRAMES), ParseFrameSet, null},
{"p", VERS_ALL, (CM_BLOCK|CM_OPT), ParseInline, null},
{"h1", VERS_ALL, (CM_BLOCK|CM_HEADING), ParseInline, null},
{"h2", VERS_ALL, (CM_BLOCK|CM_HEADING), ParseInline, null},
{"h3", VERS_ALL, (CM_BLOCK|CM_HEADING), ParseInline, null},
{"h4", VERS_ALL, (CM_BLOCK|CM_HEADING), ParseInline, null},
{"h5", VERS_ALL, (CM_BLOCK|CM_HEADING), ParseInline, null},
{"h6", VERS_ALL, (CM_BLOCK|CM_HEADING), ParseInline, null},
{"ul", VERS_ALL, CM_BLOCK, ParseList, null},
{"ol", VERS_ALL, CM_BLOCK, ParseList, null},
{"dl", VERS_ALL, CM_BLOCK, ParseDefList, null},
{"dir", VERS_LOOSE, (CM_BLOCK|CM_OBSOLETE), ParseList, null},
{"menu", VERS_LOOSE, (CM_BLOCK|CM_OBSOLETE), ParseList, null},
{"pre", VERS_ALL, CM_BLOCK, ParsePre, null},
{"listing", VERS_ALL, (CM_BLOCK|CM_OBSOLETE), ParsePre, null},
{"xmp", VERS_ALL, (CM_BLOCK|CM_OBSOLETE), ParsePre, null},
{"plaintext", VERS_ALL, (CM_BLOCK|CM_OBSOLETE), ParsePre, null},
{"address", VERS_ALL, CM_BLOCK, ParseBlock, null},
{"blockquote", VERS_ALL, CM_BLOCK, ParseBlock, null},
{"form", VERS_ALL, CM_BLOCK, ParseBlock, null},
{"isindex", VERS_LOOSE, (CM_BLOCK|CM_EMPTY), null, null},
{"fieldset", VERS_HTML40, CM_BLOCK, ParseBlock, null},
{"table", VERS_FROM32, CM_BLOCK, ParseTableTag, CheckTABLE},
{"hr", VERS_ALL, (CM_BLOCK|CM_EMPTY), null, CheckHR},
{"div", VERS_FROM32, CM_BLOCK, ParseBlock, null},
{"multicol", VERS_NETSCAPE, CM_BLOCK, ParseBlock, null},
{"nosave", VERS_NETSCAPE, CM_BLOCK, ParseBlock, null},
{"layer", VERS_NETSCAPE, CM_BLOCK, ParseBlock, null},
{"ilayer", VERS_NETSCAPE, CM_INLINE, ParseInline, null},
{"nolayer", VERS_NETSCAPE, (CM_BLOCK|CM_INLINE|CM_MIXED), ParseBlock, null},
{"align", VERS_NETSCAPE, CM_BLOCK, ParseBlock, null},
{"center", VERS_LOOSE, CM_BLOCK, ParseBlock, null},
{"ins", VERS_HTML40, (CM_INLINE|CM_BLOCK|CM_MIXED), ParseInline, null},
{"del", VERS_HTML40, (CM_INLINE|CM_BLOCK|CM_MIXED), ParseInline, null},
{"li", VERS_ALL, (CM_LIST|CM_OPT|CM_NO_INDENT), ParseBlock, null},
{"dt", VERS_ALL, (CM_DEFLIST|CM_OPT|CM_NO_INDENT), ParseInline, null},
{"dd", VERS_ALL, (CM_DEFLIST|CM_OPT|CM_NO_INDENT), ParseBlock, null},
{"caption", VERS_FROM32, CM_TABLE, ParseInline, CheckCaption},
{"colgroup", VERS_HTML40, (CM_TABLE|CM_OPT), ParseColGroup, null},
{"col", VERS_HTML40, (CM_TABLE|CM_EMPTY), null, null},
{"thead", VERS_HTML40, (CM_TABLE|CM_ROWGRP|CM_OPT), ParseRowGroup, null},
{"tfoot", VERS_HTML40, (CM_TABLE|CM_ROWGRP|CM_OPT), ParseRowGroup, null},
{"tbody", VERS_HTML40, (CM_TABLE|CM_ROWGRP|CM_OPT), ParseRowGroup, null},
{"tr", VERS_FROM32, (CM_TABLE|CM_OPT), ParseRow, null},
{"td", VERS_FROM32, (CM_ROW|CM_OPT|CM_NO_INDENT), ParseBlock, CheckTableCell},
{"th", VERS_FROM32, (CM_ROW|CM_OPT|CM_NO_INDENT), ParseBlock, CheckTableCell},
{"q", VERS_HTML40, CM_INLINE, ParseInline, null},
{"a", VERS_ALL, CM_INLINE, ParseInline, CheckAnchor},
{"br", VERS_ALL, (CM_INLINE|CM_EMPTY), null, null},
{"img", VERS_ALL, (CM_INLINE|CM_IMG|CM_EMPTY), null, CheckIMG},
{"object", VERS_HTML40, (CM_OBJECT|CM_HEAD|CM_IMG|CM_INLINE|CM_PARAM), ParseBlock, null},
{"applet", VERS_LOOSE, (CM_OBJECT|CM_IMG|CM_INLINE|CM_PARAM), ParseBlock, null},
{"servlet", VERS_SUN, (CM_OBJECT|CM_IMG|CM_INLINE|CM_PARAM), ParseBlock, null},
{"param", VERS_FROM32, (CM_INLINE|CM_EMPTY), null, null},
{"embed", VERS_NETSCAPE, (CM_INLINE|CM_IMG|CM_EMPTY), null, null},
{"noembed", VERS_NETSCAPE, CM_INLINE, ParseInline, null},
{"iframe", VERS_HTML40_LOOSE, CM_INLINE, ParseBlock, null},
{"frame", VERS_FRAMES, (CM_FRAMES|CM_EMPTY), null, null},
{"noframes", VERS_IFRAMES, (CM_BLOCK|CM_FRAMES), ParseNoFrames, null},
{"noscript", (VERS_FRAMES|VERS_HTML40), (CM_BLOCK|CM_INLINE|CM_MIXED), ParseBlock, null},
{"b", VERS_ALL, CM_INLINE, ParseInline, null},
{"i", VERS_ALL, CM_INLINE, ParseInline, null},
{"u", VERS_LOOSE, CM_INLINE, ParseInline, null},
{"tt", VERS_ALL, CM_INLINE, ParseInline, null},
{"s", VERS_LOOSE, CM_INLINE, ParseInline, null},
{"strike", VERS_LOOSE, CM_INLINE, ParseInline, null},
{"big", VERS_FROM32, CM_INLINE, ParseInline, null},
{"small", VERS_FROM32, CM_INLINE, ParseInline, null},
{"sub", VERS_FROM32, CM_INLINE, ParseInline, null},
{"sup", VERS_FROM32, CM_INLINE, ParseInline, null},
{"em", VERS_ALL, CM_INLINE, ParseInline, null},
{"strong", VERS_ALL, CM_INLINE, ParseInline, null},
{"dfn", VERS_ALL, CM_INLINE, ParseInline, null},
{"code", VERS_ALL, CM_INLINE, ParseInline, null},
{"samp", VERS_ALL, CM_INLINE, ParseInline, null},
{"kbd", VERS_ALL, CM_INLINE, ParseInline, null},
{"var", VERS_ALL, CM_INLINE, ParseInline, null},
{"cite", VERS_ALL, CM_INLINE, ParseInline, null},
{"abbr", VERS_HTML40, CM_INLINE, ParseInline, null},
{"acronym", VERS_HTML40, CM_INLINE, ParseInline, null},
{"span", VERS_FROM32, CM_INLINE, ParseInline, null},
{"blink", VERS_PROPRIETARY, CM_INLINE, ParseInline, null},
{"nobr", VERS_PROPRIETARY, CM_INLINE, ParseInline, null},
{"wbr", VERS_PROPRIETARY, (CM_INLINE|CM_EMPTY), null, null},
{"marquee", VERS_MICROSOFT, (CM_INLINE|CM_OPT), ParseInline, null},
{"bgsound", VERS_MICROSOFT, (CM_HEAD|CM_EMPTY), null, null},
{"comment", VERS_MICROSOFT, CM_INLINE, ParseInline, null},
{"spacer", VERS_NETSCAPE, (CM_INLINE|CM_EMPTY), null, null},
{"keygen", VERS_NETSCAPE, (CM_INLINE|CM_EMPTY), null, null},
{"nolayer", VERS_NETSCAPE, (CM_BLOCK|CM_INLINE|CM_MIXED), ParseBlock, null},
{"ilayer", VERS_NETSCAPE, CM_INLINE, ParseInline, null},
{"map", VERS_FROM32, CM_INLINE, ParseBlock, CheckMap},
{"area", VERS_ALL, (CM_BLOCK|CM_EMPTY), null, CheckAREA},
{"input", VERS_ALL, (CM_INLINE|CM_IMG|CM_EMPTY), null, null},
{"select", VERS_ALL, (CM_INLINE|CM_FIELD), ParseSelect, null},
{"option", VERS_ALL, (CM_FIELD|CM_OPT), ParseText, null},
{"optgroup", VERS_HTML40, (CM_FIELD|CM_OPT), ParseOptGroup, null},
{"textarea", VERS_ALL, (CM_INLINE|CM_FIELD), ParseText, null},
{"label", VERS_HTML40, CM_INLINE, ParseInline, null},
{"legend", VERS_HTML40, CM_INLINE, ParseInline, null},
{"button", VERS_HTML40, CM_INLINE, ParseInline, null},
{"basefont", VERS_LOOSE, (CM_INLINE|CM_EMPTY), null, null},
{"font", VERS_LOOSE, CM_INLINE, ParseInline, null},
{"bdo", VERS_HTML40, CM_INLINE, ParseInline, null},
/* this must be the final entry */
{null, 0, 0, 0, 0}
};
/* choose what version to use for new doctype */
int HTMLVersion(Lexer *lexer)
{
uint versions;
versions = lexer->versions;
if (versions & VERS_HTML20)
return VERS_HTML20;
if (versions & VERS_HTML32)
return VERS_HTML32;
if (versions & VERS_HTML40_STRICT)
return VERS_HTML40_STRICT;
if (versions & VERS_HTML40_LOOSE)
return VERS_HTML40_LOOSE;
if (versions & VERS_FRAMES)
return VERS_FRAMES;
return VERS_UNKNOWN;
}
static unsigned hash(char *s)
{
unsigned hashval;
for (hashval = 0; *s != '\0'; s++)
hashval = *s + 31*hashval;
return hashval % HASHSIZE;
}
static Dict *lookup(char *s)
{
Dict *np;
for (np = hashtab[hash(s)]; np != null; np = np->next)
if (wstrcmp(s, np->name) == 0)
return np;
return null;
}
static Dict *install(char *name, uint versions, uint model,
Parser *parser, CheckAttribs *chkattrs)
{
Dict *np;
unsigned hashval;
if ((np = lookup(name)) == null)
{
np = (Dict *)MemAlloc(sizeof(*np));
if (np == null || (np->name = wstrdup(name)) == null)
return null;
hashval = hash(name);
np->next = hashtab[hashval];
np->model = 0;
hashtab[hashval] = np;
}
np->versions = versions;
np->model |= model;
np->parser = parser;
np->chkattrs = chkattrs;
return np;
}
/* public interface for finding tag by name */
Bool FindTag(Node *node)
{
Dict *np;
if (XmlTags)
{
node->tag = xml_tags;
return yes;
}
if (node->element && (np = lookup(node->element)))
{
node->tag = np;
return yes;
}
return no;
}
Parser *FindParser(Node *node)
{
Dict *np;
if (node->element && (np = lookup(node->element)))
return np->parser;
return null;
}
void DefineEmptyTag(char *name)
{
install(name, VERS_PROPRIETARY, (CM_EMPTY|CM_NO_INDENT|CM_NEW), ParseBlock, null);
}
void DefineInlineTag(char *name)
{
install(name, VERS_PROPRIETARY, (CM_INLINE|CM_NO_INDENT|CM_NEW), ParseBlock, null);
}
void DefineBlockTag(char *name)
{
install(name, VERS_PROPRIETARY, (CM_BLOCK|CM_NO_INDENT|CM_NEW), ParseBlock, null);
}
void DefinePreTag(char *name)
{
install(name, VERS_PROPRIETARY, (CM_BLOCK|CM_NO_INDENT|CM_NEW), ParsePre, null);
}
void InitTags(void)
{
struct tag *tp;
for(tp = tags; tp->name != null; ++tp)
install(tp->name, tp->versions, tp->model, tp->parser, tp->chkattrs);
tag_html = lookup("html");
tag_head = lookup("head");
tag_body = lookup("body");
tag_frameset = lookup("frameset");
tag_frame = lookup("frame");
tag_noframes = lookup("noframes");
tag_meta = lookup("meta");
tag_title = lookup("title");
tag_base = lookup("base");
tag_hr = lookup("hr");
tag_pre = lookup("pre");
tag_listing = lookup("listing");
tag_h1 = lookup("h1");
tag_h2 = lookup("h2");
tag_p = lookup("p");
tag_ul = lookup("ul");
tag_ol = lookup("ol");
tag_dir = lookup("dir");
tag_li = lookup("li");
tag_dl = lookup("dl");
tag_dt = lookup("dt");
tag_dd = lookup("dd");
tag_td = lookup("td");
tag_th = lookup("th");
tag_tr = lookup("tr");
tag_col = lookup("col");
tag_br = lookup("br");
tag_a = lookup("a");
tag_link = lookup("link");
tag_b = lookup("b");
tag_i = lookup("i");
tag_strong = lookup("strong");
tag_em = lookup("em");
tag_big = lookup("big");
tag_small = lookup("small");
tag_param = lookup("param");
tag_option = lookup("option");
tag_optgroup = lookup("optgroup");
tag_img = lookup("img");
tag_map = lookup("map");
tag_area = lookup("area");
tag_nobr = lookup("nobr");
tag_wbr = lookup("wbr");
tag_font = lookup("font");
tag_spacer = lookup("spacer");
tag_layer = lookup("layer");
tag_center = lookup("center");
tag_style = lookup("style");
tag_script = lookup("script");
tag_noscript = lookup("noscript");
tag_table = lookup("table");
tag_caption = lookup("caption");
tag_form = lookup("form");
tag_textarea = lookup("textarea");
tag_blockquote = lookup("blockquote");
tag_applet = lookup("applet");
tag_object = lookup("object");
tag_div = lookup("div");
tag_span = lookup("span");
/* create dummy entry for all xml tags */
xml_tags = (Dict *)MemAlloc(sizeof(*xml_tags));
xml_tags->name = null;
xml_tags->versions = VERS_ALL;
xml_tags->model = CM_BLOCK;
xml_tags->parser = null;
xml_tags->chkattrs = null;
}
void FreeTags(void)
{
Dict *prev, *next;
int i;
MemFree(xml_tags);
for (i = 0; i < HASHSIZE; ++i)
{
prev = null;
next = hashtab[i];
while(next)
{
prev = next->next;
MemFree(next->name);
MemFree(next);
next = prev;
}
hashtab[i] = null;
}
}