/*
localize.c
(c) 1998-2000 (W3C) MIT, INRIA, Keio University
See tidy.c for the copyright notice.
You should only need to edit this file and tidy.c
to localize HTML tidy.
*/
#include "platform.h"
#include "html.h"
/* used to point to Web Accessibility Guidelines */
#define ACCESS_URL "http://www.w3.org/WAI/GL"
char *release_date = "4th August 2000";
static char *currentFile; /* sasdjb 01May00 for GNU Emacs error parsing */
extern uint optionerrors;
/*
This routine is the single point via which
all output is written and as such is a good
way to interface Tidy to other code when
embedding Tidy in a GUI application.
*/
void tidy_out(FILE *fp, const char* msg, ...)
{
va_list args;
va_start(args, msg);
vfprintf(fp, msg, args);
va_end(args);
}
void ReadingFromStdin(void)
{
fprintf(stderr, "Reading markup from standard input ...\n");
}
void ShowVersion(FILE *fp)
{
tidy_out(fp, "HTML Tidy release date: %s\n"
"See http://www.w3.org/People/Raggett for details\n", release_date);
}
void FileError(FILE *fp, const char *file)
{
tidy_out(fp, "Can't open \"%s\"\n", file);
}
static void ReportTag(Lexer *lexer, Node *tag)
{
if (tag)
{
if (tag->type == StartTag)
tidy_out(lexer->errout, "<%s>", tag->element);
else if (tag->type == EndTag)
tidy_out(lexer->errout, "%s>", tag->element);
else if (tag->type == DocTypeTag)
tidy_out(lexer->errout, "");
else if (tag->type == TextNode)
tidy_out(lexer->errout, "plain text");
else
tidy_out(lexer->errout, "%s", tag->element);
}
}
/* lexer is not defined when this is called */
void ReportUnknownOption(char *option)
{
optionerrors++;
fprintf(stderr, "Warning - unknown option: %s\n", option);
}
/* lexer is not defined when this is called */
void ReportBadArgument(char *option)
{
optionerrors++;
fprintf(stderr, "Warning - missing or malformed argument for option: %s\n", option);
}
static void NtoS(int n, char *str)
{
char buf[40];
int i;
for (i = 0;; ++i)
{
buf[i] = (n % 10) + '0';
n = n /10;
if (n == 0)
break;
}
n = i;
while (i >= 0)
{
str[n-i] = buf[i];
--i;
}
str[n+1] = '\0';
}
static void ReportPosition(Lexer *lexer)
{
/* Change formatting to be parsable by GNU Emacs */
if (Emacs)
{
tidy_out(lexer->errout, "%s", currentFile);
tidy_out(lexer->errout, ":%d:", lexer->lines);
tidy_out(lexer->errout, "%d: ", lexer->columns);
}
else /* traditional format */
{
tidy_out(lexer->errout, "line %d", lexer->lines);
tidy_out(lexer->errout, " column %d - ", lexer->columns);
}
}
void ReportEncodingError(Lexer *lexer, uint code, uint c)
{
char buf[32];
lexer->warnings++;
if (ShowWarnings)
{
ReportPosition(lexer);
if (code == WINDOWS_CHARS)
{
NtoS(c, buf);
lexer->badChars |= WINDOWS_CHARS;
tidy_out(lexer->errout, "Warning: replacing illegal character code %s", buf);
}
tidy_out(lexer->errout, "\n");
}
}
void ReportEntityError(Lexer *lexer, uint code, char *entity, int c)
{
lexer->warnings++;
if (ShowWarnings)
{
ReportPosition(lexer);
if (code == MISSING_SEMICOLON)
{
tidy_out(lexer->errout, "Warning: entity \"%s\" doesn't end in ';'", entity);
}
else if (code == UNKNOWN_ENTITY)
{
tidy_out(lexer->errout, "Warning: unescaped & or unknown entity \"%s\"", entity);
}
else if (code == UNESCAPED_AMPERSAND)
{
tidy_out(lexer->errout, "Warning: unescaped & which should be written as &");
}
tidy_out(lexer->errout, "\n");
}
}
void ReportAttrError(Lexer *lexer, Node *node, char *attr, uint code)
{
lexer->warnings++;
/* keep quiet after 6 errors */
if (lexer->errors > 6)
return;
if (ShowWarnings)
{
/* on end of file adjust reported position to end of input */
if (code == UNEXPECTED_END_OF_FILE)
{
lexer->lines = lexer->in->curline;
lexer->columns = lexer->in->curcol;
}
ReportPosition(lexer);
if (code == UNKNOWN_ATTRIBUTE)
tidy_out(lexer->errout, "Warning: unknown attribute \"%s\"", attr);
else if (code == MISSING_ATTRIBUTE)
{
tidy_out(lexer->errout, "Warning: ");
ReportTag(lexer, node);
tidy_out(lexer->errout, " lacks \"%s\" attribute", attr);
}
else if (code == MISSING_ATTR_VALUE)
{
tidy_out(lexer->errout, "Warning: ");
ReportTag(lexer, node);
tidy_out(lexer->errout, " attribute \"%s\" lacks value", attr);
}
else if (code == MISSING_IMAGEMAP)
{
tidy_out(lexer->errout, "Warning: ");
ReportTag(lexer, node);
tidy_out(lexer->errout, " should use client-side image map");
lexer->badAccess |= MISSING_IMAGE_MAP;
}
else if (code == BAD_ATTRIBUTE_VALUE)
{
tidy_out(lexer->errout, "Warning: ");
ReportTag(lexer, node);
tidy_out(lexer->errout, " unknown attribute value \"%s\"", attr);
}
else if (code == XML_ATTRIBUTE_VALUE)
{
tidy_out(lexer->errout, "Warning: ");
ReportTag(lexer, node);
tidy_out(lexer->errout, " has XML attribute \"%s\"", attr);
}
else if (code == UNEXPECTED_GT)
{
tidy_out(lexer->errout, "Error: ");
ReportTag(lexer, node);
tidy_out(lexer->errout, " missing '>' for end of tag");
lexer->errors++;;
}
else if (code == UNEXPECTED_QUOTEMARK)
{
tidy_out(lexer->errout, "Warning: ");
ReportTag(lexer, node);
tidy_out(lexer->errout, " unexpected or duplicate quote mark");
}
else if (code == REPEATED_ATTRIBUTE)
{
tidy_out(lexer->errout, "Warning: ");
ReportTag(lexer, node);
tidy_out(lexer->errout, " repeated attribute \"%s\"", attr);
}
else if (code == PROPRIETARY_ATTR_VALUE)
{
tidy_out(lexer->errout, "Warning: ");
ReportTag(lexer, node);
tidy_out(lexer->errout, " proprietary attribute value \"%s\"", attr);
}
else if (code == UNEXPECTED_END_OF_FILE)
{
tidy_out(lexer->errout, "Warning: end of file while parsing attributes");
}
else if (code == ID_NAME_MISMATCH)
{
tidy_out(lexer->errout, "Warning: ");
ReportTag(lexer, node);
tidy_out(lexer->errout, " id and name attribute value mismatch");
}
tidy_out(lexer->errout, "\n");
}
else if (code == UNEXPECTED_GT)
{
ReportPosition(lexer);
tidy_out(lexer->errout, "Error: ");
ReportTag(lexer, node);
tidy_out(lexer->errout, " missing '>' for end of tag\n");
lexer->errors++;;
}
}
void ReportWarning(Lexer *lexer, Node *element, Node *node, uint code)
{
lexer->warnings++;
/* keep quiet after 6 errors */
if (lexer->errors > 6)
return;
if (ShowWarnings)
{
/* on end of file adjust reported position to end of input */
if (code == UNEXPECTED_END_OF_FILE)
{
lexer->lines = lexer->in->curline;
lexer->columns = lexer->in->curcol;
}
ReportPosition(lexer);
if (code == MISSING_ENDTAG_FOR)
tidy_out(lexer->errout, "Warning: missing %s>", element->element);
else if (code == MISSING_ENDTAG_BEFORE)
{
tidy_out(lexer->errout, "Warning: missing %s> before ", element->element);
ReportTag(lexer, node);
}
else if (code == DISCARDING_UNEXPECTED)
{
tidy_out(lexer->errout, "Warning: discarding unexpected ");
ReportTag(lexer, node);
}
else if (code == NESTED_EMPHASIS)
{
tidy_out(lexer->errout, "Warning: nested emphasis ");
ReportTag(lexer, node);
}
else if (code == COERCE_TO_ENDTAG)
{
tidy_out(lexer->errout, "Warning: <%s> is probably intended as %s>",
node->element, node->element);
}
else if (code == NON_MATCHING_ENDTAG)
{
tidy_out(lexer->errout, "Warning: replacing unexpected ");
ReportTag(lexer, node);
tidy_out(lexer->errout, " by %s>", element->element);
}
else if (code == TAG_NOT_ALLOWED_IN)
{
tidy_out(lexer->errout, "Warning: ");
ReportTag(lexer, node);
tidy_out(lexer->errout, " isn't allowed in <%s> elements", element->element);
}
else if (code == DOCTYPE_AFTER_TAGS)
{
tidy_out(lexer->errout, "Warning: isn't allowed after elements");
}
else if (code == MISSING_STARTTAG)
tidy_out(lexer->errout, "Warning: missing <%s>", node->element);
else if (code == UNEXPECTED_ENDTAG)
{
tidy_out(lexer->errout, "Warning: unexpected %s>", node->element);
if (element)
tidy_out(lexer->errout, " in <%s>", element->element);
}
else if (code == TOO_MANY_ELEMENTS)
{
tidy_out(lexer->errout, "Warning: too many %s elements", node->element);
if (element)
tidy_out(lexer->errout, " in <%s>", element->element);
}
else if (code == USING_BR_INPLACE_OF)
{
tidy_out(lexer->errout, "Warning: using
in place of ");
ReportTag(lexer, node);
}
else if (code == INSERTING_TAG)
tidy_out(lexer->errout, "Warning: inserting implicit <%s>", node->element);
else if (code == CANT_BE_NESTED)
{
tidy_out(lexer->errout, "Warning: ");
ReportTag(lexer, node);
tidy_out(lexer->errout, " can't be nested");
}
else if (code == PROPRIETARY_ELEMENT)
{
tidy_out(lexer->errout, "Warning: ");
ReportTag(lexer, node);
tidy_out(lexer->errout, " is not approved by W3C");
if (node->tag == tag_layer)
lexer->badLayout |= USING_LAYER;
else if (node->tag == tag_spacer)
lexer->badLayout |= USING_SPACER;
else if (node->tag == tag_nobr)
lexer->badLayout |= USING_NOBR;
}
else if (code == OBSOLETE_ELEMENT)
{
if (element->tag && (element->tag->model & CM_OBSOLETE))
tidy_out(lexer->errout, "Warning: replacing obsolete element ");
else
tidy_out(lexer->errout, "Warning: replacing element ");
ReportTag(lexer, element);
tidy_out(lexer->errout, " by ");
ReportTag(lexer, node);
}
else if (code == TRIM_EMPTY_ELEMENT)
{
tidy_out(lexer->errout, "Warning: trimming empty ");
ReportTag(lexer, element);
}
else if (code == MISSING_TITLE_ELEMENT)
tidy_out(lexer->errout, "Warning: inserting missing 'title' element");
else if (code == ILLEGAL_NESTING)
{
tidy_out(lexer->errout, "Warning: ");
ReportTag(lexer, element);
tidy_out(lexer->errout, " shouldn't be nested");
}
else if (code == NOFRAMES_CONTENT)
{
tidy_out(lexer->errout, "Warning: ");
ReportTag(lexer, node);
tidy_out(lexer->errout, " not inside 'noframes' element");
}
else if (code == INCONSISTENT_VERSION)
{
tidy_out(lexer->errout, "Warning: html doctype doesn't match content");
}
else if (code == MALFORMED_DOCTYPE)
{
tidy_out(lexer->errout, "Warning: expected \"html PUBLIC\" or \"html SYSTEM\"");
}
else if (code == CONTENT_AFTER_BODY)
{
tidy_out(lexer->errout, "Warning: content occurs after end of body");
}
else if (code == MALFORMED_COMMENT)
{
tidy_out(lexer->errout, "Warning: adjacent hyphens within comment");
}
else if (code == BAD_COMMENT_CHARS)
{
tidy_out(lexer->errout, "Warning: expecting -- or >");
}
else if (code == BAD_XML_COMMENT)
{
tidy_out(lexer->errout, "Warning: XML comments can't contain --");
}
else if (code == BAD_CDATA_CONTENT)
{
tidy_out(lexer->errout, "Warning: '<' + '/' + letter not allowed here");
}
else if (code == INCONSISTENT_NAMESPACE)
{
tidy_out(lexer->errout, "Warning: html namespace doesn't match content");
}
else if (code == DTYPE_NOT_UPPER_CASE)
{
tidy_out(lexer->errout, "Warning: SYSTEM, PUBLIC, W3C, DTD, EN must be upper case");
}
else if (code == UNEXPECTED_END_OF_FILE)
{
tidy_out(lexer->errout, "Warning: unexpected end of file");
ReportTag(lexer, element);
}
tidy_out(lexer->errout, "\n");
}
}
void ReportError(Lexer *lexer, Node *element, Node *node, uint code)
{
lexer->warnings++;
/* keep quiet after 6 errors */
if (lexer->errors > 6)
return;
lexer->errors++;
ReportPosition(lexer);
if (code == SUSPECTED_MISSING_QUOTE)
{
tidy_out(lexer->errout, "Error: missing quotemark for attribute value");
}
else if (code == DUPLICATE_FRAMESET)
{
tidy_out(lexer->errout, "Error: repeated FRAMESET element");
}
else if (code == UNKNOWN_ELEMENT)
{
tidy_out(lexer->errout, "Error: ");
ReportTag(lexer, node);
tidy_out(lexer->errout, " is not recognized!");
}
else if (code == UNEXPECTED_ENDTAG) /* generated by XML docs */
{
tidy_out(lexer->errout, "Warning: unexpected %s>", node->element);
if (element)
tidy_out(lexer->errout, " in <%s>", element->element);
}
tidy_out(lexer->errout, "\n");
}
void ErrorSummary(Lexer *lexer)
{
/* adjust badAccess to that its null if frames are ok */
if (lexer->badAccess & (USING_FRAMES | USING_NOFRAMES))
{
if (!((lexer->badAccess & USING_FRAMES) && !(lexer->badAccess & USING_NOFRAMES)))
lexer->badAccess &= ~(USING_FRAMES | USING_NOFRAMES);
}
if (lexer->badChars)
{
if (lexer->badChars & WINDOWS_CHARS)
{
tidy_out(lexer->errout, "Characters codes for the Microsoft Windows fonts in the range\n");
tidy_out(lexer->errout, "128 - 159 may not be recognized on other platforms. You are\n");
tidy_out(lexer->errout, "instead recommended to use named entities, e.g. ™ rather\n");
tidy_out(lexer->errout, "than Windows character code 153 (0x2122 in Unicode). Note that\n");
tidy_out(lexer->errout, "as of February 1998 few browsers support the new entities.\n\n");
}
}
if (lexer->badForm)
{
tidy_out(lexer->errout, "You may need to move one or both of the