# GRC changes # 1 - handle comments not on line boundaries # 2 - handle and symbols in (etc) and handle # 3 - handle # 4 - handle # 5 - handle # 6 - handle # 7 - handle tags within tags (can't make work) # 8 - fix <code_example> to handle "immediate" form # 9 - add a load more tags to explicitly ignore # 10 - add forward references, add <newterm>, add footnotes # 11 - add alphabetic list type and handle unrecognised list types # 12 - add space when using getline to handle long tag arguments # 13 - handle figure tags # 14 - handle <argument> <literal> <syntax> # 15 - add example tags and <line_art> # 16 - add contents support # tomc@osi.curtin.edu.au (Tom Crawley) changes # 17 - handle <include> BEGIN { EOF = 0; title_page = ""; copyright = ""; footnotes = ""; in_comment = 0; in_code_example = 0; IGNORECASE = 1; product = ""; contents_needed = 0; contents = "<h1><a name=\"ContentsListing\">Contents</a></h1>\n"; contents_level = 0; contents_list_type = "ul"; } END { if (title_page != "") print title_page; if (copyright_page != "") print copyright_page; for ( ; contents_level > 0; contents_level--) { contents = contents "</" contents_list_type ">\n"; } if (contents_needed) print contents; if (footnotes != "") { print "<h1>Footnotes</h1>"; print "<dl>"; print footnotes; print "</dl>"; } } # # The main function has degenerated into a call to the recursive # function do_file. # { do_file(""); exit 0; } # # The real work is done in the do_line function # filename is used to pass the file name to the recursive do_file function # function do_line(filename) { oline = ""; line = $0; if(line == "") # skip empty line return; # optimisation -- ignore complete line if it starts in a comment and does not end the comment if (in_comment) { if (index(tolower(line), "<endcomment>") == 0) return; } while (line != "") { lab = index(line, "<"); if (lab == 0) break; rab = index(substr(line, lab + 1, length(line) - lab), ">") + lab; if (lab > 1 && !in_comment) { text = substr(line, 1, lab - 1); if (in_code_example == 0) gsub("\\\\[A-Za-z0-9_]*", " ", text); oline = oline text; text = ""; } tag = tolower(substr(line, lab + 1, rab - lab - 1)); line = substr(line, rab + 1, length(line) - rab); if (tag == "") continue; if (substr(line, 1, 1) == "(") { balance = 1; text = ""; line = substr(line, 2, length(line) - 1); rp = index(line, ")"); lp = index(line, "("); if (rp == 0 || (lp > 0 && lp < rp)) { parens = 1; while (parens > 0) { if (line == "") { read_line(filename); if (EOF) return; line = " " $0; } c = substr(line, 1, 1); if (c == ")") { if (parens == 1) { line = substr(line, 2, length(line) - 1); break; } parens--; } else if (c == "(") { parens++; } text = text c; line = substr(line, 2, length(line) - 1); } } else { text = substr(line, 1, rp - 1); line = substr(line, rp + 1, length(line) - rp); } } else { text = ""; } if (in_comment && tag != "endcomment") { } else if (substr(tag,1,1) == "/" || substr(tag,1,2) == "a ") { # these have already been converted to html oline = oline "<" tag ">"; } else if (tag == "p" || tag == "b" || tag == "/b") { oline = oline "<" tag ">"; } else if (tag == "emphasis") { n = split(text, parts, "\\"); if (n > 1) { parts[2] = tolower(parts[2]); if (parts[2] == "italic") { oline = oline "<em>"; line = parts[1] "</em>" line; } else if (parts[2] == "bold") { oline = oline "<b>"; line = parts[1] "</b>" line; } else { oline = oline "<em>"; line = text "</em>" line; } } else { oline = oline "<em>"; line = text "</em>" line; } } else if (tag == "le") { if (list_type[nlists] == "ul" || list_type[nlists] == "ol") oline = oline "<li>"; else oline = oline "<dt>"; } else if (tag == "keep") { line = text " " line; } else if (tag == "center_line") { line = text "<p>" line ; } else if (tag == "ultrix_keyword") { oline = oline "<b>"; line = text "</b>" line; } else if (tag == "chapter" || tag == "appendix") { n = split(text, parts, "\\"); if (n > 1) { parts[2] = tolower(parts[2]); oline = oline "<h1><a name=\"" parts[2] "\"> "; line = parts[1] "</a></h1>" line; symbol[parts[2]] = "<a href=\"#" parts[2] "\">" parts[1] "</a>"; for ( ; contents_level < 1; contents_level++) { contents = contents "<" contents_list_type ">\n"; } for ( ; contents_level > 1; contents_level--) { contents = contents "</" contents_list_type ">\n"; } contents = contents "<li><a href=\"#" parts[2] "\">" parts[1] "</a>\n"; } else { oline = oline "<h1>"; line = text "</h1>" line; for ( ; contents_level < 1; contents_level++) { contents = contents "<" contents_list_type ">\n"; } for ( ; contents_level > 1; contents_level--) { contents = contents "</" contents_list_type ">\n"; } contents = contents "<li>" text "\n"; } } else if (tag == "head1") { n = split(text, parts, "\\"); if (n > 1) { parts[2] = tolower(parts[2]); oline = oline "<h2><a name=\"" parts[2] "\"> "; line = parts[1] "</a></h2>" line; symbol[parts[2]] = "<a href=\"#" parts[2] "\">" parts[1] "</a>"; for ( ; contents_level < 2; contents_level++) { contents = contents "<" contents_list_type ">\n"; } for ( ; contents_level > 2; contents_level--) { contents = contents "</" contents_list_type ">\n"; } contents = contents "<li><a href=\"#" parts[2] "\">" parts[1] "</a>\n"; } else { oline = oline "<h2>"; line = text "</h2>" line; for ( ; contents_level < 2; contents_level++) { contents = contents "<" contents_list_type ">\n"; } for ( ; contents_level > 2; contents_level--) { contents = contents "</" contents_list_type ">\n"; } contents = contents "<li>" text "\n"; } } else if (tag == "head2") { n = split(text, parts, "\\"); if (n > 1) { parts[2] = tolower(parts[2]); oline = oline "<h3><a name=\"" parts[2] "\"> "; line = parts[1] "</a></h3>" line; symbol[parts[2]] = "<a href=\"#" parts[2] "\">" parts[1] "</a>"; for ( ; contents_level < 3; contents_level++) { contents = contents "<" contents_list_type ">\n"; } for ( ; contents_level > 3; contents_level--) { contents = contents "</" contents_list_type ">\n"; } contents = contents "<li><a href=\"#" parts[2] "\">" parts[1] "</a>\n"; } else { oline = oline "<h3>"; line = text "</h3>" line; for ( ; contents_level < 3; contents_level++) { contents = contents "<" contents_list_type ">\n"; } for ( ; contents_level > 3; contents_level--) { contents = contents "</" contents_list_type ">\n"; } contents = contents "<li>" text "\n"; } } else if (tag == "head3") { n = split(text, parts, "\\"); if (n > 1) { parts[2] = tolower(parts[2]); oline = oline "<h4><a name=\"" parts[2] "\"> "; line = parts[1] "</a></h4>" line; symbol[parts[2]] = "<a href=\"#" parts[2] "\">" parts[1] "</a>"; # for ( ; contents_level < 4; contents_level++) { # contents = contents "<" contents_list_type ">\n"; # } # for ( ; contents_level > 4; contents_level--) { # contents = contents "</" contents_list_type ">\n"; # } # contents = contents "<li><a href=\"#" parts[2] "\">" parts[1] "</a>\n"; } else { oline = oline "<h4>"; line = text "</h4>" line; # for ( ; contents_level < 4; contents_level++) { # contents = contents "<" contents_list_type ">\n"; # } # for ( ; contents_level > 4; contents_level--) { # contents = contents "</" contents_list_type ">\n"; # } # contents = contents "<li>" text "\n"; } } else if (tag == "product") { product = text " "; } else if (tag == "title") { print "<title>" product text ""; print "

" product text "

"; for ( ; contents_level < 1; contents_level++) { contents = contents "<" contents_list_type ">\n"; } for ( ; contents_level > 1; contents_level--) { contents = contents "\n"; } contents = contents "
  • " product text "\n"; } else if (tag == "list") { text = tolower(text); if (text == "unnumbered") { list_type[++nlists] = "ul"; } else if (text == "numbered" || text == "alphabetic") { list_type[++nlists] = "ol"; } else if (text == "simple") { list_type[++nlists] = "dl"; } else { oline = oline "" # unrecognised list types are treated as unordered list_type[++nlists] = "ul"; } oline = oline "<" list_type[nlists] ">"; } else if (tag == "endlist") { oline = oline ""; } else if (tag == "comment") { if (text == "") in_comment = 1; } else if (tag == "endcomment") { in_comment = 0; } else if (tag == "include") { do_file(text); } else if (tag == "code_example") { if (text != "") { oline = oline ""; line = text "" line; } else { oline = oline "
    ";
                        in_code_example = 1;
                    }
                } else if (tag == "line_art" || tag == "syntax" || tag == "exc") {
                    oline = oline "
    ";
                    in_code_example = 1;
                } else if (tag == "endcode_example" || tag == "endsyntax" || tag == "extext" || tag == "endline_art") {
                    oline = oline "
    "; in_code_example = 0; } else if (tag == "title_page") { on_title_page = 1; title_page = "

    Title Page

    \n"; } else if (tag == "endtitle_page") { on_title_page = 0; print "To see the title page, click here.\n"; } else if (tag == "copyright_page") { on_copyright_page = 1; copyright_page = "

    Copyright Information

    \n"; } else if (tag == "endcopyright_page") { on_copyright_page = 0; print "To see the copyright information, click here.

    \n"; } else if (tag == "note" || tag == "example_sequence") { oline = oline "

    "; if (text != "") line = text "

    " line; else line = "Note: " line; } else if (tag == "endnote" || tag == "endexample_sequence") { if (in_code_example) { oline = oline "

    "; in_code_example = 0; } oline = oline ""; } else if (tag == "table") { for (idx in table_data) delete table[idx]; } else if (tag == "table_heads") { } else if (tag == "table_row") { } else if (tag == "table") { } else if (tag == "table_end") { } else if (tag == "table_attributes") { } else if (tag == "table_setup") { } else if (tag == "table_row_break") { } else if (tag == "table_key") { } else if (tag == "endtable_key") { } else if (tag == "table_unit_heads") { } else if (tag == "table_unit") { } else if (tag == "endtable_unit") { } else if (tag == "endtable") { } else if (tag == "define_symbol" || tag == "define_book_name") { n = split(text, parts, "\\"); if (n > 1) { parts[1] = tolower(parts[1]); symbol[parts[1]] = parts[2]; } } else if (tag == "reference") { split(text, parts, "\\"); parts[1] = tolower(parts[1]); if (parts[1] in symbol) { line = symbol[parts[1]] line; } else { # we haven't seen the symbol yet -- assume it is a forward reference oline = oline "click here" } } else if (tag == "set_online_topic") { } else if (tag == "define") { } else if (tag == "document_attributes") { } else if (tag == "enddocument_attributes") { } else if (tag == "set_page_numbering") { } else if (tag == "revision") { } else if (tag == "dec_security") { oline = oline "Digital security classification is " text ".

    " } else if (tag == "front_matter") { } else if (tag == "preface") { n = split(text, parts, "\\"); if (n > 1) { parts[2] = tolower(parts[2]); oline = oline "

    Preface

    "; symbol[parts[2]] = "Preface"; for ( ; contents_level < 2; contents_level++) { contents = contents "<" contents_list_type ">\n"; } for ( ; contents_level > 2; contents_level--) { contents = contents "\n"; } contents = contents "
  • Preface\n"; } else { oline = oline "

    Preface

    "; for ( ; contents_level < 2; contents_level++) { contents = contents "<" contents_list_type ">\n"; } for ( ; contents_level > 2; contents_level--) { contents = contents "\n"; } contents = contents "
  • Preface\n"; } } else if (tag == "subhead1") { n = split(text, parts, "\\"); if (n > 1) { parts[2] = tolower(parts[2]); oline = oline "
    "; line = parts[1] "
    " line; symbol[parts[2]] = "" parts[1] ""; # for ( ; contents_level < 5; contents_level++) { # contents = contents "<" contents_list_type ">\n"; # } # for ( ; contents_level > 5; contents_level--) { # contents = contents "\n"; # } # contents = contents "
  • " parts[1] "\n"; } else { oline = oline "
    "; line = text "
    " line; # for ( ; contents_level < 5; contents_level++) { # contents = contents "<" contents_list_type ">\n"; # } # for ( ; contents_level > 5; contents_level--) { # contents = contents "\n"; # } # contents = contents "
  • " text "\n"; } } else if (tag == "subhead2") { n = split(text, parts, "\\"); if (n > 1) { parts[2] = tolower(parts[2]); oline = oline "
    "; line = parts[1] "
    " line; symbol[parts[2]] = "" parts[1] ""; # for ( ; contents_level < 6; contents_level++) { # contents = contents "<" contents_list_type ">\n"; # } # for ( ; contents_level > 6; contents_level--) { # contents = contents "\n"; # } # contents = contents "
  • " parts[1] "\n"; } else { oline = oline "
    "; line = text "
    " line; # for ( ; contents_level < 6; contents_level++) { # contents = contents "<" contents_list_type ">\n"; # } # for ( ; contents_level > 6; contents_level--) { # contents = contents "\n"; # } # contents = contents "
  • " text "\n"; } } else if (tag == "mark") { } else if (tag == "endmark") { } else if (tag == "mark_deletion") { } else if (tag == "endpreface") { } else if (tag == "endfront_matter") { } else if (tag == "running_title") { } else if (tag == "quote") { if (text != "") { line = "\"" text "\"" line; } else { oline = oline "
    " } } else if (tag == "endquote") { oline = oline "
    " } else if (tag == "keyword" || tag == "newterm") { oline = oline ""; line = text "" line; } else if (tag == "lmf") { } else if (tag == "lmf_producer") { } else if (tag == "lmf_product") { } else if (tag == "lmf_release_date") { } else if (tag == "lmf_version_number") { } else if (tag == "lmf_altname") { } else if (tag == "lmf_info") { } else if (tag == "endlmf") { } else if (tag == "date") { } else if (tag == "signatures") { } else if (tag == "byline") { gsub("\\\\","

    ",text); oline = oline "

    "; line = text "

    " line; } else if (tag == "print_date") { oline = oline "Prepared: "; line = text "

    " line; } else if (tag == "copyright_date") { n = split(text, parts, "\\"); oline = oline "Copyright "; line = parts[1] ", " parts[2] line; } else if (tag == "tbs") { oline = oline "TBS" } else if (tag == "cp") { } else if (tag == "footnote") { n = split(text, parts, "\\"); parts[1] = tolower(parts[1]); foot_seq[parts[1]]++; footnotes = footnotes "

    Footnote " parts[1] "\n"; footnotes = footnotes "
    " parts[2] "\n"; oline = oline " footnote " parts[1] ""; } else if (tag == "footref") { text = tolower(text); oline = oline " footnote " text ""; } else if (tag == "figure") { figure_filespec=""; if (text != "") { n = split(text, parts, "\\"); if (n > 1) { parts[2] = tolower(parts[2]); oline = oline "
    "; line = parts[1] "
    " line; figure_filespec = parts[2] ".gif"; symbol[parts[2]] = "" parts[1] ""; # for ( ; contents_level < 6; contents_level++) { # contents = contents "<" contents_list_type ">\n"; # } # for ( ; contents_level > 6; contents_level--) { # contents = contents "\n"; # } # contents = contents "
  • " parts[1] "\n"; } else { oline = oline "
    "; line = text "
    " line; # for ( ; contents_level < 6; contents_level++) { # contents = contents "<" contents_list_type ">\n"; # } # for ( ; contents_level > 6; contents_level--) { # contents = contents "\n"; # } # contents = contents "
  • " text "\n"; } } } else if (tag == "figure_attributes") { } else if (tag == "figure_file") { n = split(text, parts, "\\"); parts[1] = tolower(parts[1]); if (parts[1] == "html") figure_filespec = parts[2]; } else if (tag == "endfigure") { if (figure_filespec != "") oline = oline "

    " } else if (tag == "online_chunk") { } else if (tag == "online_popup") { } else if (tag == "endonline_popup") { } else if (tag == "argument") { oline = oline ""; line = text "" line; } else if (tag == "literal") { gsub("&","\\&",text); gsub(">","\\>",text); gsub("<","\\<",text); oline = oline text; } else if (tag == "argdeflist") { oline = oline "

    "; } else if (tag == "argitem") { oline = oline "
    "; line = text line; } else if (tag == "argdef") { oline = oline "
    "; } else if (tag == "endargdeflist") { oline = oline "
    "; } else if (tag == "examples_intro") { } else if (tag == "hellipsis") { oline = oline " ... " } else if (tag == "ellipsis") { oline = oline "

    .

    .

    .

    " } else if (tag == "issue") { } else if (tag == "endappendix") { } else if (tag == "set_appendix_letter") { } else if (tag == "abstract") { oline = oline "

    Abstract

    "; for ( ; contents_level < 2; contents_level++) { contents = contents "<" contents_list_type ">\n"; } for ( ; contents_level > 2; contents_level--) { contents = contents "\n"; } contents = contents "
  • Abstract\n"; } else if (tag == "endabstract") { } else if (tag == "contents_file") { print "To see the contents listing, click here.\n"; contents_needed = 1; } else { gsub(">","\\>",tag); gsub("<","\\<",tag); gsub(">","\\>",text); gsub("<","\\<",text); oline = oline "" } } if (line != "" && !in_comment) { if (in_code_example == 0) gsub("\\\\[A-Za-z0-9_]*", " ", line); oline = oline line; } if (oline != "" || in_code_example == 1) { if (on_title_page) { title_page = title_page oline "\n"; } else if (on_copyright_page) { copyright_page = copyright_page oline "\n"; } else print oline; } return; } # # Read a line from the current input file. # Sets EOF when appropriate # function read_line(filename, result, name) { # result and name are locals if (filename == "") result = getline; else result = getline < filename; if (result < 0) { name = (filename == "")? "standard input": filename; print "ERROR reading from", name; exit 1; } else if (result == 0) EOF = 1; } # # Read in a file. # function do_file(filename) { if (filename == "") # Will already be in $0 do_line(filename); while (!EOF) { read_line(filename); if (!EOF) do_line(filename); } if (filename != "") close(filename); EOF = 0; # EOF is shared by all files }