<?xml version="1.0"?>
<!DOCTYPE spec SYSTEM "../schema/xsl-query.dtd" [
<!ENTITY date.year "2004">
<!ENTITY date.month "October">
<!ENTITY date.MM "10">
<!ENTITY date.day "29">
<!ENTITY date.DD "&date.day;">
<!ENTITY doc.date "&date.year;&date.MM;&date.DD;">
<!ENTITY doc.prefix "WD-xslt-xquery-serialization">
<!ENTITY url.external "http://www.w3.org/TR/&date.year;/&doc.prefix;-&doc.date;/">
<!ENTITY url.this "&url.external;">

<!ENTITY tm    "&#x2122;">
<!ENTITY mdash "&#x2014;">
<!ENTITY rsquo "&#x2019;">
<!ENTITY ldquo "&#x201C;">
<!ENTITY rdquo "&#x201D;">
]>
<spec w3c-doctype="wd">

<header>
  <title>XSLT 2.0 and XQuery 1.0 Serialization</title>
  <version/>
  <w3c-designation>&doc.prefix;-&doc.date;</w3c-designation>
  <w3c-doctype>W3C Working Draft</w3c-doctype>
  <pubdate>
    <day>&date.day;</day>
    <month>&date.month;</month>
    <year>&date.year;</year>
  </pubdate>
  <publoc>
     <loc href="&url.this;">&url.this;</loc>
  </publoc>
  <altlocs>
    <loc href="&url.this;serialization.xml">XML</loc>
  </altlocs>
  <latestloc>
    <loc href="http://www.w3.org/TR/xslt-xquery-serialization/">http://www.w3.org/TR/xslt-xquery-serialization/</loc>
  </latestloc>
  <prevlocs>
    <loc href="http://www.w3.org/TR/2004/WD-xslt-xquery-serialization-20040723/">http://www.w3.org/TR/2004/WD-xslt-xquery-serialization-20040723/</loc>
    <loc href="http://www.w3.org/TR/2003/WD-xslt-xquery-serialization-20031112/">http://www.w3.org/TR/2003/WD-xslt-xquery-serialization-20031112/</loc>
    <loc href="http://www.w3.org/TR/2003/WD-xslt-xquery-serialization-20030502/">http://www.w3.org/TR/2003/WD-xslt-xquery-serialization-20030502/</loc>
  </prevlocs>
  <authlist>
    <author>
      <name>Michael Kay</name>
      <affiliation>
        <phrase diff="add">Saxonica</phrase>
        <phrase diff="chg">(formerly of Software AG)</phrase>
      </affiliation>
      <email href="http://www.saxonica.com"><phrase diff="add" at="E">http://www.saxonica.com</phrase><phrase diff="del" at="E">Michael.Kay@softwareag.com</phrase></email>
    </author>
    <author>
      <name>Norman Walsh</name>
      <affiliation>Sun Microsystems</affiliation>
      <email href="mailto:Norman.Walsh@Sun.COM">Norman.Walsh@Sun.COM</email>
    </author>
    <author diff="add" at="B">
      <name>Henry Zongaro</name>
      <affiliation>IBM</affiliation>
      <email href="mailto:zongaro@ca.ibm.com">zongaro@ca.ibm.com</email>
    </author>
  </authlist>

<status>
<p><emph>This section describes the status of this document at the
time of its publication. Other documents may supersede this document.
A list of current W3C publications and the latest revision of this
technical report can be found in the <loc
href="http://www.w3.org/TR/">W3C technical reports index</loc> at
http://www.w3.org/TR/.</emph></p>


<!-- *********************************************************************** -->
<!-- THE FOLLOWING PARAGRAPH MUST BE COMMENTED OUT FOR PUBLICATION OF THE    -->
<!-- PUBLIC WORKING DRAFTS                                                   -->
<!-- *********************************************************************** -->
<!--
<p>This is a <emph>private</emph> Working Draft of this document for
review by members of the XML Query Working Group and the XSLT Working
Group. It is subject to review by those two Working Groups before any
publication as a Public Working Draft of the W3C.</p>
-->
 
<!-- *********************************************************************** -->
<!-- THE FOLLOWING PARAGRAPH MUST BE RESTORED FOR PUBLICATION OF THE PUBLIC  -->
<!-- PUBLIC WORKING DRAFTS                                                   -->
<!-- *********************************************************************** -->
<p>This is a Public Working Draft for review by W3C Members and other
interested parties. Publication as a Working Draft does not imply
endorsement by the W3C Membership. This is a draft document and may be
updated, replaced or obsoleted by other documents at any time. It is
inappropriate to cite this document as other than work in progress.
</p>

<p>This document describes how <bibref ref="XSLT2"/>, <bibref
ref="XQuery"/>
<phrase diff="add" at="G">and other related XML standards</phrase>
convert an instance of the <bibref ref="DataModel"/>
into a sequence of octets.
<phrase diff="del" at="G">This material has
been moved out of the XSLT draft and into a separate document so that
it can be shared by both the named specifications and possibly other
specifications as well.</phrase></p>

<p>This draft includes many corrections and changes based on member-only
and public comments on the
<loc href="http://www.w3.org/TR/2003/WD-xslt-xquery-serialization-20031112/">
Last Call Working Draft</loc>
(http://www.w3.org/TR/2003/WD-xslt-xquery-serialization-20031112/).
The XML Query and XSL WGs wish to thank the people who have sent in comments
for their close reading of the document.</p>

<p>This draft reflects decisions taken up to and including the joint
teleconference meeting 209 of the XSL and XML Query Working Groups of
21 September 2004.  These decisions
are recorded in the Last Call
<loc href="http://www.w3.org/2004/10/xquery-serialization-issues.html">issues
list</loc>
(http://www.w3.org/2004/10/xquery-serialization-issues.html).  However, some
of these decisions may not yet be reflected in this document.</p>

<p>XSLT 2.0 and XQuery 1.0 Serialization has been defined jointly by
the <loc href="http://www.w3.org/Style/XSL/">XSL Working Group</loc> and
the <loc href="http://www.w3.org/XML/Query">XML Query Working Group</loc>
(both part of the <loc href="http://www.w3.org/XML/Activity.html">XML Activity</loc>).
</p>

<!-- *********************************************************************** -->
<!-- THE FOLLOWING PARAGRAPH MUST BE RESTORED FOR PUBLICATION OF LAST CALL   -->
<!-- PUBLIC WORKING DRAFTS                                                   -->
<!-- *********************************************************************** -->
<!--
<p diff="chg" at="C">This is a
<loc href="http://www.w3.org/2003/06/Process-20030618/tr.html#last-call">Last
 Call Working Draft</loc>.
Comments on this document are due on 15 February 2004. Comments should
be sent to the W3C mailing list
<loc href="mailto:public-qt-comments@w3.org">public-qt-comments@w3.org</loc>
(archived at
<loc href="http://lists.w3.org/Archives/Public/public-qt-comments">http://lists.
w3.org/Archives/Public/public-qt-comments</loc>/)
with &ldquo;[Serial]&rdquo; at the beginning of the Subject field.</p>
-->

<p>Public comments on this document and its open issues are invited.
Comments should be sent to the W3C XSLT/XPath/XQuery mailing list,
<loc href="mailto:public-qt-comments@w3.org">public-qt-comments@w3.org</loc>
(archived at
<loc href="http://lists.w3.org/Archives/Public/public-qt-comments/">http://lists.w3.org/Archives/Public/public-qt-comments/</loc>),
with &ldquo;[Serial]&rdquo; at the beginning of the subject field.
</p>

<p>The patent policy for this document is the <loc
href="http://www.w3.org/Consortium/Patent-Policy-20040205/">5 February
2004 W3C Patent Policy</loc>. 
Patent disclosures relevant
to this specification may be found on the <loc
href="http://www.w3.org/2002/08/xmlquery-IPR-statements">XML Query
Working Group's patent disclosure page</loc> and the <loc
href="http://www.w3.org/Style/XSL/Disclosures">XSL Working Group's
patent disclosure page</loc>. An individual who has actual knowledge of
a patent which the individual believes contains Essential Claim(s) with
respect to this specification should disclose the information in
accordance with <loc
href="http://www.w3.org/Consortium/Patent-Policy-20040205/#sec-Disclosure">section
6 of the W3C Patent Policy</loc>.
</p>
</status>

<abstract>
<p>This document defines serialization for the <bibref ref="XSLT2"/> and
<bibref ref="XQuery"/> specifications
and any other specifications that reference it.</p>
</abstract>

<langusage>
<language id="en">English</language>
</langusage>

<revisiondesc>
<p>See the CVS changelog.</p>
</revisiondesc>
</header>

<body>

<div1 id="intro">
<head>Introduction</head>

<p>This document defines serialization of the W3C XQuery 1.0 and XPath 2.0 Data Model,
which is the data model of at least <bibref ref="XPath2"/>,
<bibref ref="XSLT2"/>, and
<bibref ref="XQuery"/>, and any other specifications that reference it.</p>

<!-- Deleted in response to qt-2004Feb0264-01 and qt-2004Feb1195-01 -->
<ednote diff="del" at="F"><edtext>This material has been moved out
of the XSLT draft and
into a separate document. The Working Groups also considered moving
this material directly into the Data Model document, but elected to
keep it separate for the moment, principally in order to advance the
Data Model to Last Call. In the future, this material may be moved
into the Data Model. The Working Groups solicit public opinion about
which alternative is superior.</edtext></ednote>

<p>Serialization is the process of converting an instance of the
<bibref ref="DataModel"/> into a sequence of octets. Serialization is
well-defined for most data model instances.</p>

<ednote><edtext>The document assumes the reader already knows
generally what serialization is. A brief explanation will be added,
especially to disabuse any reader who thinks it might mean Java (or
.NET) serialization.</edtext>
</ednote>

<ednote><edtext>The editor has yet to align the description of
serialization errors with the description of errors in related
specifications.  That will be done in a future public working
draft.</edtext></ednote>

<div2 id="terminology">
<head>Terminology</head>
<p diff="add" at="B">In this specification,
<phrase diff="add" at="G">where they appear in upper case,</phrase>
the words "MUST", "MUST NOT",
"SHOULD", "SHOULD NOT", "MAY", "REQUIRED", and
"RECOMMENDED" are to be interpreted as described in
<bibref ref="RFC2119"/>.</p>

<p diff="add" at="G"><termdef id="serializer" term="serializer">As is indicated in
<specref ref="conformance"/>, conformance criteria for serialization
are determined by other specifications that refer to this specification.
A <term>serializer</term> is software that implements some or all of the
requirements of this specification in accordance with such conformance
criteria.</termdef>  A serializer is not REQUIRED to directly provide a
programming interface that permits a user to set serialization parameters
or to provide an input sequence for serialization.</p>

<p diff="add" at="G">
<termdef id="impdef" term="implementation-defined"><term>Implementation-defined</term> indicates an
aspect that MAY differ between
<termref def="serializer">serializers</termref>, but whose actual
behaviour MUST be specified either by another specification that sets
conformance criteria for serialization (see <specref ref="conformance"/>)
or in documentation that accompanies the
<termref def="serializer">serializer</termref>.</termdef></p>

<p diff="add" at="G">
<termdef id="impdep" term="implementation-dependent"><term>Implementation-dependent</term> indicates an
aspect that MAY differ between
<termref def="serializer">serializers</termref>, and whose actual
behaviour is not REQUIRED to be specified either by another specification
that sets conformance criteria for serialization (see
<specref ref="conformance"/>) or in documentation that accompanies the
<termref def="serializer">serializer</termref>.</termdef></p>

<p diff="add" at="G">
<termdef id="serial-err" term="serialization error">In some instances, the
sequence that is input to serialization cannot be successfully converted
into a sequence of octets given the set of serialization parameter
(<specref ref="serparam"/>) values specified.  A
<term>serialization error</term> is said to occur in such an instance.</termdef>
In some cases, a <termref def="serializer">serializer</termref> is
REQUIRED to signal such an error.
What it means to signal a serialization error is determined by the
relevant conformance criteria (<specref ref="conformance"/>) to which
the <termref def="serializer">serializer</termref> conforms.  In other cases,
there is an <termref def="impdef">implementation-defined</termref> choice
between signalling a serialization error and performing a recovery action.
Such a recovery action will allow a
<termref def="serializer">serializer</termref> to produce a sequence of
octets that might not fully reflect the usual requirements of the
parameter settings that are in effect.
</p>
</div2>
</div1>

<div1 id="serdm">
<!-- Title changed in response to qt-2004Feb0265-01 -->
<head>Sequence Normalization</head>

<p diff="del" at="C">The XQuery 1.0 and XPath 2.0 Data Model is richer and less
constrained than XML. There are valid instances of the data model
that have no direct analog in XML. In particular, instances of the data model
can contain typed values, sequences, and sequences of typed
values. And whereas XML deals only with <quote>documents</quote>, instances
of the data model can have as their root any node type, simple value, or
sequence and may even be empty.</p>

<p diff="del" at="C">This section describes how to convert an arbitrary
instance of the data model
into one of several simplified forms. We then describe how
these forms are serialized. This greatly simplifies the sections
which follow. A <termref def="serializer">serializer</termref> is not
REQUIRED to implement
serialization of arbitrary instances of the data model in this way, provided
it produces the same results as this conceptual model.
</p>

<olist diff="del" at="C">
<item><p>If the instance of the data model contains any
<phrase diff="del" at="B">typed or untyped</phrase>
<phrase diff="add" at="B">atomic</phrase>
values,
<!-- Deleted in response to qt-2004Feb1037-01 -->
<phrase diff="del" at="E">or sequences that contain
<phrase diff="del" at="B">typed or untyped</phrase>
<phrase diff="add" at="B">atomic</phrase>
values,</phrase>
convert
them to strings: obtain the lexical representation of each value by
casting it to an <code>xs:string</code> and replace the value with its
string representation.  <phrase diff="add" at="B">If the value cannot
be cast to <code>xs:string</code>, serialization of the instance of
the data model is undefined.</phrase></p>
</item>

<item><p>If adjacent strings occur in a sequence, replace both values
with their concatenation separated by a single space.</p>
</item>

<item><p>If empty sequences occur, replace them with the empty string.</p>
</item>

<item>
<p>To complete the simplification, perform the following steps
<phrase diff="del" at="B">interactively</phrase>
<phrase diff="add" at="B">iteratively</phrase>
until a simplest form is reached:</p>

<olist>
<item><p>If the instance of the data model has as its root an attribute or
namespace node, <phrase diff="del" at="B">or a QName value,</phrase>
or if it has as its root a sequence
which contains one of these items, serialization is undefined.</p>
</item>

<item><p>If the instance of the data model has as its root
<phrase diff="del" at="B">a single document
node, or an element, processing instruction, comment, or text node, or</phrase>
a sequence of only element, processing instruction, comment, and text nodes,
it is already in its simplest form.</p>
</item>

<item><p>If the instance of the data model has as its root a sequence of
document nodes, or a sequence which contains document nodes, replace
each document node with its children in document order.</p>
</item>

<item><p>If the instance of the data model has as its root a string value, or
a sequence which contains one or more string values, replace each
string value with a text node that contains the same string.</p>
</item>
</olist>
</item>
</olist>

<p diff="del" at="C">If there are any remaining string values among the children of elements in
the instance of the data model, replace them with text nodes that contain the same
string values and merge adjacent text nodes.</p>

<p diff="add" at="D">
An instance of the data model that is input to the serialization
process is a sequence.

<!-- Added in response to qt-2004Feb0921-01 -->
<phrase diff="add" at="F">Prior to serializing a sequence using any of
the output methods whose behavior is specified by this document
(<specref ref="serparam"/>)</phrase>
<phrase diff="chg" at="F">the</phrase>

<termref def="serializer">serializer</termref> MUST first

<!-- Reworded in response to qt-2004Feb0266-01 -->
<phrase diff="del" at="G">place that input sequence into a normalized
form</phrase>
<phrase diff="add" at="G">compute a normalized sequence</phrase>

for serialization; it
is the normalized sequence that is actually serialized.

<!-- Added in response to qt-2004Feb0265-01 -->
<phrase diff="add" at="G">The purpose of this sequence normalization step is
to create a sequence that can be serialized as a
well-formed XML document or external general parsed entity, that
also reflects the content of the input sequence to the extent
possible.</phrase>
</p>

<p>The normalized

<!-- Use "sequence" instead of "instance of data model" - qt-2004Feb1204-01 -->
<phrase diff="del" at="G">form</phrase>
<phrase diff="add" at="G">sequence</phrase>

for serialization is constructed by applying all
of the following rules in order, with the initial sequence being
input to the first step, and the sequence that results from any
step being used as input to the subsequent step.

<!-- Added in response to qt-2004Feb0921-01 -->
<phrase diff="add" at="F">For any
<termref def="impdef">implementation-defined</termref>
output method,
it is
<termref def="impdef">implementation-defined</termref>
whether this <phrase diff="add" at="G">sequence</phrase> normalization
process takes place.</phrase>
</p>

<!-- Added in response to qt-2004Feb0262-01 -->
<p diff="add" at="E">Where the process of converting the input sequence
to a normalized

<!-- Use "sequence" instead of "instance of data model" - qt-2004Feb1204-01 -->
<phrase diff="del" at="G">form</phrase>
<phrase diff="add" at="G">sequence</phrase>

indicates that a value MUST be cast to
<code>xs:string</code>, that operation is as
defined in <xspecref spec="FO" ref="casting-to-string"/> of
<bibref ref="FANDO"/>.

<phrase diff="add" at="G">The steps in computing the normalized sequence
are:</phrase>
</p>

<!-- Reworded items in list to make clear that normalization is not
     destructive in response to qt-2004Feb0266-01 -->
<olist diff="add" at="D">
<item>
<p diff="add" at="G">
If the sequence that is input to serialization is
empty, create a sequence <emph>S<sub>1</sub></emph> that consists of a
zero-length string.  Otherwise, copy each item in the sequence that is
input to serialization to create the new sequence <emph>S<sub>1</sub></emph>.
</p>
<p diff="del" at="G">Replace an empty sequence with a zero-length
string.</p>
</item>

<item>
<p diff="add" at="G">
For each item in <emph>S<sub>1</sub></emph>, if the item is atomic, obtain the 
lexical representation of the item by casting it to an <code>xs:string</code>
and copy the string representation to the new sequence; otherwise, copy the
item, which will be a node, to the new sequence.
The new sequence is <emph>S<sub>2</sub></emph>.
</p>
<p diff="del" at="G">If the instance of the data model contains any atomic values,
<!-- Removed in response to qt-2004Feb1037-01 -->
<phrase diff="del" at="E">or
sequences that contain atomic values,</phrase>
convert the atomic values
to strings: obtain the lexical representation of each value by
casting it to an <code>xs:string</code> and replace the value
with its string
representation.  It is a <termref def="serial-err">serialization error</termref> if the value
cannot be cast to <code>xs:string</code>.</p>
</item>

<item>
<p diff="add" at="G">
For each subsequence of adjacent strings in <emph>S<sub>2</sub></emph>,
copy a single string to the new sequence equal to the values of the
strings in the subsequence concatenated in order, each separated by a
single space.  Copy all other items to the new sequence.  The new
sequence is <emph>S<sub>3</sub></emph>.
</p>
<p diff="del" at="G">Replace all adjacent strings in the sequence with a single
string equal to the values of the strings concatenated, each
separated by a single space.</p>
</item>

<item>
<p diff="add" at="G">
For each item in <emph>S<sub>3</sub></emph>, if the item is a string,
create a text node in the new sequence whose string value is equal to
the string; otherwise, copy the item to the new sequence.  The new
sequence is <emph>S<sub>4</sub></emph>.
</p>
<p diff="del" at="G">Replace any string in the sequence with a text node whose
string value is equal to the string.</p>
</item>

<item>
<p diff="add" at="G">
For each item in <emph>S<sub>4</sub></emph>, if the item is a document node,
copy its children to the new sequence; otherwise, copy the item to the new 
sequence.  The new sequence is <emph>S<sub>5</sub></emph>.
</p>
<p diff="del" at="G">Replace any document node in the sequence with its
children.</p>
</item>

<item>
<p diff="add" at="G">
It is a <termref def="serial-err">serialization error</termref> if an item in <emph>S<sub>5</sub></emph> is an
attribute node or a namespace node. Otherwise, construct a new sequence,
<emph>S<sub>6</sub></emph>, that consists of a single document node and
copy all the items in the sequence, which are all nodes, as children of
that document node.
</p>
<p diff="del" at="G">It is a <termref def="serial-err">serialization error</termref> if an item in the sequence
is an attribute node or a namespace node.  Otherwise, create a
new document node and make all the items in the sequence, which
are all nodes, children of that document node.</p>
</item>
</olist>

<p diff="add" at="G"><emph>S<sub>6</sub></emph> is the normalized sequence.</p>

<p diff="add" at="C">The tree rooted at the document node that is
created by the final step of this <phrase diff="add" at="G">sequence</phrase>
normalization process is the
instance of the data model to which the rules of the appropriate
output method are applied.  If the <phrase diff="add" at="G">sequence</phrase>
normalization process results
in a <termref def="serial-err">serialization error</termref>, the

<!-- Changed term in response to qt-2004Feb0935-01 -->
<phrase diff="del" at="G">processor</phrase>
<phrase diff="add" at="G"><termref def="serializer">serializer</termref>
</phrase>

MUST signal the error.</p>

<note><p diff="add" at="C"><phrase diff="chg" at="D">The
<phrase diff="add" at="G">sequence</phrase>
normalization process for a sequence <code>$seq</code> is equivalent
to constructing a document node using the</phrase>
XSLT instruction:</p>

<eg diff="add" at="C">&lt;xsl:result-document&gt;
  &lt;xsl:copy-of select="$seq"/&gt;
&lt;/xsl:result-document&gt;</eg>

<p diff="add" at="C">or the XQuery expression:</p>

<!-- Fixed formatting and syntax error in response to qt-2004Feb0050-01
     and qt-2004Feb1045-01, -->
<eg diff="add" at="C">document {
  for $s in $seq return
    if ($s instance of document-node())
    then $s/child::node()
    else $s
}</eg>

<p diff="del" at="D"><phrase diff="add" at="C">and then serializing
the document node as described in <specref ref="xml-output"/>,
<specref ref="xhtml-output"/>, <specref ref="html-output"/>,
<specref ref="text-output"/>, or in an
<termref def="impdef">implementation-defined</termref>
manner.</phrase></p>

<p diff="add" at="C">This process
<!-- Changed wording in response to qt-2004Feb0263-01 -->
<phrase diff="del" at="E">will fail</phrase>
<phrase diff="add" at="E">results in a <termref def="serial-err">serialization error</termref></phrase>
with certain sequences,
for example sequences containing parentless attribute and namespace

<!-- There are no longer atomic values that cannot be cast to string -->
nodes<phrase diff="del" at="G">, or atomic values of types that cannot
be cast to a string, such as <code>xs:QName</code></phrase>.
<phrase diff="del" at="E">
<!-- Deleted due to last call issue qt-2004Feb0978-01 -->
and <code>xs:NOTATION</code>
</phrase>
<phrase diff="del" at="D">Such a failure results in a
<termref def="serial-err">serialization error</termref>; the

<!-- Changed term in response to qt-2004Feb0935-01 -->
<phrase diff="del" at="G">processor</phrase>
<phrase diff="add" at="G"><termref def="serializer">serializer</termref>
</phrase>

MUST signal the
error.</phrase></p>
</note>

</div1>

<div1 id="serparam">
<head>Serialization Parameters</head>

<p>There are a number of parameters that influence how serialization
is performed. Host languages MAY allow users to specify any or all of
these parameters, but they are not REQUIRED to be able to do so.</p>

<p>The following serialization parameters are defined:</p>

<ednote diff="del" at="C">
<edtext>Here and throughout the document, the distinction between "should"
and "must" will be revisited. When serialization was described in the XSLT
specification, use of "should" helped to clarify that the serialization process
was optional. Now that it's described here in a standalone specification,
many of those clauses should use "must".</edtext></ednote>

<!-- Table added in response to qt-2004Feb0261-01 and qt-2004Feb1042-01 -->
<!-- In response to qt-2004Feb0976-01 and qt-2004Feb0977-01, removed any
     mention of default parameter values. -->
<!-- Table alphabetized in response to qt-2004Feb1044-01 -->
<table diff="add" border="1" summary="Serialization parameters">
  <col width="180"/>
  <col/>
  <thead>
    <tr>
      <th align="left">Serialization parameter name</th>
      <th align="left">Permitted values for parameter</th>
    </tr>
  </thead>
  <tbody>
    <tr>
      <!-- Added parameter in response to qt-2004Feb0362-04 and
           qt-2004Feb0362-05
        -->
      <td><phrase diff="add" at="G"><code>byte-order-mark</code></phrase></td>
      <td><phrase diff="add" at="G">One of the enumerated values
          <code>yes</code> or <code>no</code>.  This parameter indicates
          whether the serialized sequence of octects is to be preceded by
          a Byte Order Mark.  (See Section 5.1 of
          <bibref ref="UNICODE-ENCODING"/>.)  The actual byte order used is
          <termref def="impdep">implementation-dependent</termref>.
          If the concept of a Byte Order Mark is
          not meaningful in connection with the value of the
          <code>encoding</code> parameter, the <code>byte-order-mark</code>
          parameter is ignored.</phrase></td>
    </tr>
    <tr>
      <td><code>cdata-section-elements</code></td>
      <!-- Changed names to expanded-QNames for qt-2004Feb0978-01 -->
      <td>A list of expanded-QNames, possibly empty.</td>
    </tr>
    <tr>
      <td><code>doctype-public</code></td>
      <td>A string of Unicode characters.  This parameter is optional.</td>
    </tr>
    <tr>
      <td><code>doctype-system</code></td>
      <td>A string of Unicode characters.  This parameter is optional.</td>
    </tr>
    <tr>
      <td><code>encoding</code></td>
      <td>A string of Unicode characters in the range #x21 to #x7E (that is,
          printable ASCII characters); the value SHOULD be a charset
          registered with the Internet Assigned Numbers Authority
          <bibref ref="IANA"/>, <bibref ref="RFC2278"/> or begin with the
          characters <code>x-</code> or <code>X-</code>.</td>
    </tr>
    <tr>
      <td><code>escape-uri-attributes</code></td>
      <td>One of the enumerated values <code>yes</code> or <code>no</code>.</td>
    </tr>
    <tr>
      <td><code>include-content-type</code></td>
      <td>One of the enumerated values <code>yes</code> or <code>no</code>.</td>
    </tr>
    <tr>
      <td><code>indent</code></td>
      <td>One of the enumerated values <code>yes</code> or <code>no</code>.</td>
    </tr>
    <tr>
      <td><code>media-type</code></td>
      <td>A string of Unicode characters specifying the media type (MIME
          content type)
          <!-- Changed reference in response to qt-2004Feb0362-09 -->
          <bibref ref="RFC2376" diff="del" at="G"/>
          <bibref ref="RFC2046" diff="add" at="G"/>;
          the charset parameter of
          the media type MUST NOT be specified explicitly in the value of
          <!-- In response to qt-2004Feb0362-23 [27], made this explicit -->
          the <code>media-type</code> parameter.
          <!-- Added following text in response to qt-2004Feb0362-09 -->
          <phrase diff="add" at="G">If the destination of the serialized output
          is annotated with a media type, this parameter MAY be used to
          provide such an annotation.  For example, it MAY be used to set
          the media type in an HTTP header.</phrase></td>
    </tr>
    <tr>
      <td><code>method</code></td>
      <td>An expanded-QName with a null namespace URI, and the local part of
          the name equal to one of <code>xml</code>, <code>xhtml</code>,
          <code>html</code> or <code>text</code>, or having a non-null
          namespace URI.  If the namespace URI is non-null, the parameter
          specifies an
          <termref def="impdef">implementation-defined</termref>
          output method.</td>
    </tr>
    <tr>
    <!-- Changed parameter name under qt-2004Jan0019-04, qt-2004Feb0362-10 -->
      <td><code>normalization-form</code></td>
      <td>One of the enumerated values <code>NFC</code>, <code>NFD</code>,
          <code>NFKC</code>, <code>NFKD</code>, <code>fully-normalized</code>,
          <code>none</code> or an
          <termref def="impdef">implementation-defined</termref> value.</td>
    </tr>
    <tr>
      <td><code>omit-xml-declaration</code></td>
      <td>One of the enumerated values <code>yes</code> or <code>no</code>.</td>
    </tr>
    <tr>
      <td><code>standalone</code></td>
      <td>One of the enumerated values <code>yes</code>, <code>no</code> or
          <code>none</code>.</td>
    </tr>
    <tr>
      <td><code>undeclare-namespaces</code></td>
      <td>One of the enumerated values <code>yes</code> or <code>no</code>.</td>
    </tr>
    <tr>
      <td><code>use-character-maps</code></td>
      <td>A list of pairs, possibly empty, with each pair consisting of
          a single Unicode character and a string of Unicode characters.</td>
    </tr>
    <tr>
      <td><code>version</code></td>
      <td>A string of Unicode characters.</td>
    </tr>
  </tbody>
</table>

<!-- List removed in response to qt-2004Feb0261-01 and qt-2004Feb1042-01 -->
<ulist diff="del" at="E">
<item><p><code>encoding</code> specifies the preferred character
encoding <phrase diff="del" at="C">that the

<!-- Changed term in response to qt-2004Feb0935-01 -->
<phrase diff="del" at="G">processor</phrase>
<phrase diff="add" at="G"><termref def="serializer">serializer</termref>
</phrase>

SHOULD use</phrase>
<phrase diff="chg" at="C">for encoding</phrase> sequences of
characters as sequences of bytes; the value of the parameter SHOULD be
treated case-insensitively; the value MUST contain only characters in
the range #x21 to #x7E (i.e. printable ASCII characters); the value
SHOULD either be a <code>charset</code> registered with the Internet
Assigned Numbers Authority <bibref ref="IANA"/>,
<bibref ref="RFC2278"/> or start with <code>X-</code>
</p>
<p>If this parameter is not specified,
<phrase diff="add" at="C">and the output method does not specify
any additional requirements,</phrase> the encoding used is
<termref def="impdef">implementation-defined</termref>.</p>
</item>

<item><p><code>cdata-section-elements</code> specifies a list of the
names of elements whose text node children
<phrase diff="chg" at="C">are to be</phrase> output using
CDATA sections</p>
<p>If this parameter is not specified, no elements will be treated specially.
</p>
</item>

<item><p><code>doctype-system</code> specifies the system identifier
to be used in the document type declaration</p>
<p diff="del" at="C">If this parameter is not specified,
<phrase diff="del" at="B">no system identifier will </phrase>
<phrase diff="add" at="B">a system
identifer MUST NOT</phrase> be generated.
For XML and XHTML output methods,
<phrase diff="del" at="B">no public identifier will </phrase>
 <phrase diff="chg" at="B">a public identifer
MUST NOT</phrase> be generated either,
regardless of the setting of <code>doctype-public</code>.
</p>
</item>

<item><p><code>doctype-public</code> specifies the public identifier
to be used in the document type declaration</p>
<p diff="del" at="C">If this parameter is not specified,
<phrase diff="chg" at="B">a public
identifier MUST NOT </phrase>
<phrase diff="del" at="B">no public identifier will </phrase>
be generated.
</p>
</item>

<item><p><code>escape-uri-attributes</code> specifies whether the

<!-- Changed term in response to qt-2004Feb0935-01 -->
<phrase diff="del" at="G">processor</phrase>
<phrase diff="add" at="G"><termref def="serializer">serializer</termref>
</phrase>

<phrase diff="del" at="B">SHOULD</phrase>
<phrase diff="chg" at="C">is to</phrase>
escape URI-valued attributes in HTML and XHTML output
using the method RECOMMENDED in <bibref ref="RFC2396"/> (section
2.4.1). The value MUST be <code>yes</code> or <code>no</code>.</p>
<p>If this parameter is not specified, the value is
<termref def="impdef">implementation-defined</termref>.
</p>
</item>

<item><p><code>include-content-type</code> specifies whether the serialization
process <phrase diff="chg" at="C">is to</phrase> add a
<code>meta</code> element in HTML and XHTML
output. The value MUST be <code>yes</code> or <code>no</code>.
</p>
<p>If this parameter is not specified, the value is
<termref def="impdef">implementation-defined</termref>.
</p>
</item>

<item><p><code>indent</code> specifies whether the

<!-- Changed term in response to qt-2004Feb0935-01 -->
<phrase diff="del" at="G">processor</phrase>
<phrase diff="add" at="G"><termref def="serializer">serializer</termref>
</phrase>

MAY add
additional whitespace when outputting the instance of the data model;
the value MUST be <code>yes</code> or <code>no</code></p>
<p>If this parameter is not specified, the value is
<termref def="impdef">implementation-defined</termref>.
</p>
</item>

<item><p><code>media-type</code> specifies the media type (MIME
content type) <bibref ref="RFC2376" diff="add" at="C"/>
of the data that results from outputting the instance of the data model;
the <code>charset</code> parameter
<phrase diff="chg" at="C">of the media type MUST</phrase>
NOT be specified
explicitly
<phrase diff="del" at="C">; instead, when the top-level media type is
<code>text</code>, a <code>charset</code> parameter SHOULD be added
according to the character encoding actually used by the output
method</phrase></p>
<p>If this parameter is not specified, the media type is
<termref def="impdef">implementation-defined</termref>.
</p>
</item>

<item><p><code>normalize-unicode</code>
<phrase diff="del" at="C">indicates whether or not the
serialization process <phrase diff="del" at="B">SHOULD</phrase>
<phrase diff="add" at="B">MUST</phrase> convert</phrase>
<phrase diff="chg" at="C">requests conversion of</phrase>
the serialized output to Unicode
Normalization Form C as specified in
<bibref ref="UNICODE-NORMALIZATION"/>. The value MUST be <code>yes</code> or
<code>no</code>.</p>
<p>If this parameter is not specified, the value is
<termref def="impdef">implementation-defined</termref>.
</p>
</item>

<item><p><code>omit-xml-declaration</code> specifies whether the serialization
process <phrase diff="del" at="C"><phrase diff="del" at="B">SHOULD</phrase>
<phrase diff="add" at="B">MUST</phrase></phrase>
<phrase diff="chg" at="C">is to</phrase> output
an XML declaration.<phrase diff="del" at="C">, unless the declaration is
REQUIRED by the setting of either the <code>encoding</code> parameter or the
<code>standalone</code> parameter</phrase>  The value MUST be <code>yes</code>
or <code>no</code></p>
<p>If this parameter is not specified, the value is
<termref def="impdef">implementation-defined</termref>.
</p>
</item>

<item><p><code>standalone</code> specifies whether the

<!-- Changed term in response to qt-2004Feb0935-01 -->
<phrase diff="del" at="G">processor</phrase>
<phrase diff="add" at="G"><termref def="serializer">serializer</termref>
</phrase>

<phrase diff="del" at="C"><phrase diff="del" at="B">SHOULD</phrase>
<phrase diff="add" at="B">MUST</phrase> output a
standalone document declaration and what its value
<phrase diff="del" at="B">SHOULD</phrase>
<phrase diff="add" at="B">MUST</phrase> be;</phrase>
<phrase diff="chg" at="C">is to emit a standalone document declaration and
the value of the declaration;</phrase>
the value <phrase diff="add" at="C">of the parameter</phrase> MUST be
<code>yes</code> or <code>no</code></p>
<p diff="del" at="C">If this parameter is not specified,
<phrase diff="add" at="B">a standalone
document declaration MUST NOT</phrase> be output.
</p>
</item>

<item><p><code>undeclare-namespaces</code> specifies whether
<!-- Extraneous comma removed in response to qt-2004Feb1043-01 and
     qt-2004Feb1196-01
  -->
namespaces<phrase diff="del" at="E">,</phrase>
<phrase diff="del" at="B">SHOULD</phrase><phrase diff="del" at="C">MUST</phrase>
<phrase diff="chg" at="C">are to</phrase>
be undeclared during serialization; the value MUST be 
<code>yes</code> or <code>no</code>.</p>
<p>If this parameter is not specified, the value is
<termref def="impdef">implementation-defined</termref>.
</p>
<p>This parameter only applies when the XML
serialization method is used and the version is greater than 1.0.</p></item>

<item><p><code>use-character-maps</code>
<!-- Changed in response to qt-2004Feb0978-01 -->
<phrase diff="del" at="E">provides</phrase>
<phrase diff="del" at="E">specifies</phrase>
a list of character/string
pairs that are used in serialization (see <specref ref="character-maps"/>).
</p>
<p>If this parameter is not specified, no character maps are used.
</p>
</item>

<item><p><code>version</code> specifies the version of the output
method</p>
<p>If this parameter is not specified, the value is
<termref def="impdef">implementation-defined</termref>.
</p>
</item>
</ulist>

<p>The <code>method</code> parameter identifies the overall output method
that MUST be used for serializing. The value of the <code>method</code>
parameter MUST be a valid QName. If the QName is in no namespace,
then it identifies a method specified in this document and MUST be one
of <code>xml</code>, <code>html</code>, <code>xhtml</code>, or
<code>text</code><phrase diff="add" at="B">; in this case, the output method
specified MUST be used for serializing.</phrase>
If the QName is in a namespace, then it identifies
<phrase diff="chg" at="C">an
<termref def="impdef">implementation-defined</termref></phrase>
output method; the behavior in this case is not specified by this
document.</p>

<!-- Added in response to qt-2004Feb1038-01 -->
<p diff="add" at="G">In those cases where they have no important
effect on the content of the serialized result, details of the
output methods defined by this specification are left unspecified
and are regarded as <termref def="impdep">implementation-dependent</termref>.
Whether a <termref def="serializer">serializer</termref> uses
apostrophes or quotation marks to delimit attribute values in the
XML output method is an example of such a detail.</p>

<p>The detailed semantics of each parameter will be described
separately for each output method for which it is applicable. If the
semantics of a parameter are not described for an output method, then
it is not applicable to that output method.</p>
</div1>

<!-- Split into its own section in response to qt-2004Feb0276-01 -->
<div1 id="serphases">
<head>Phases of Serialization</head>

<!-- Added in response to qt-2004Feb1199-01 -->
<p><phrase diff="add" at="E">Following the
<phrase diff="add" at="G">sequence</phrase>
normalization process described in
<specref ref="serdm"/>,</phrase>
serialization can be regarded as involving
<phrase diff="del" at="G">four</phrase>
<phrase diff="add" at="G">three</phrase>
phases of processing.
</p>

<!-- In response to qt-2004Feb0922-01, clarified that implementation-defined
     output methods don't have to implement these phases -->
<p><phrase diff="add" at="G">For an
<termref def="impdef">implementation-defined</termref> output method,
any of these phases MAY be skipped or MAY be performed in a different
order than is specified here.
</phrase>

<phrase diff="add" at="G">For the output methods defined in this
specification,</phrase>
these phases are carried out sequentially as follows:</p>

<olist>

<!-- Revised description of Markup Generation phase in response to
     qt-2004Feb0922-01-->
<item><p><emph>Markup generation</emph> produces the
<phrase diff="add" at="G">character</phrase> representation of
<phrase diff="del" at="G">
start and end tags for elements, and other constructs such as XML
declarations, processing instructions, and so on. This is influenced
by the parameters <code>method</code>, <code>doctype-system</code>,
<code>doctype-public</code>, <code>include-content-type</code>,
<code>indent</code>, <code>omit-xml-declaration</code>,
<code>standalone</code>,
<!-- Added in response to qt-Feb0934-01 -->
<phrase diff="add" at="E"><code>undeclare-namespaces</code></phrase>
and <code>version</code>.</phrase>
<phrase diff="add" at="G">those parts 
of the serialized result that describe the structure of the normalized 
sequence.  In the cases of the XML, HTML and XHTML
output methods, this phase produces the character representations of the 
following:</phrase></p>

<ulist>
<item><p diff="add" at="G">the document type declaration;
</p></item>

<item><p diff="add" at="G">start tags and end tags (except for
attribute values, whose representation is produced by the character
expansion phase);
</p></item>

<item><p diff="add" at="G">processing instructions; and</p></item>

<item><p diff="add" at="G">comments.</p></item>
</ulist>

<p diff="add" at="G">In the cases of the XML and XHTML output methods,
this phase also produces the following:

<ulist>
<item><p diff="add" at="G">the XML or text declaration; and</p></item>
<item><p diff="add" at="G">empty element tags (except for the attribute
values);</p></item>
</ulist>

In the case of the text output method, this phase has no effect.
</p>
</item>

<!-- In response to qt-2004Feb1040-01, merged descriptions of
     Character expansion and Unicode Normalization -->
<item><p><emph>Character expansion</emph> is concerned with the
representation of characters appearing in text and attribute nodes in
the normalized sequence. The
substitution processes that apply are listed below, in priority
order: a character that is handled by one process in this list will
be unaffected by processes appearing later in the list,

<phrase diff="add" at="G">except that a character affected by Unicode
normalization MAY be affected by creation of CDATA sections and by
character escaping:</phrase>
</p>

<ulist>
<item><p>URI escaping (in the case of URI-valued attributes in the
HTML and XHTML output methods), as determined by the
<code>escape-uri-attributes</code> parameter</p></item>

<item><p>Character mapping, as determined by the
<code>use-character-maps</code> parameter.

<phrase diff="add" at="G">Text nodes that are children of elements
specified by the <code>cdata-section-elements</code> parameter are not
affected by this step.</phrase>
</p></item>

<item><p>Unicode normalization, if requested by the
<!-- Changed parameter name under qt-2004Jan0019-04 and qt-2004Feb0362-10 -->
<code diff="del" at="E">normalize-unicode</code>
<code diff="add" at="E">normalization-form</code>
parameter.
<phrase diff="del" at="G">Unicode normalization is
applied to the character stream that results after all markup
generation and character expansion has taken place.</phrase>
</p>
<p diff="add" at="E">For the definitions of the various normalization
forms, see <bibref ref="CHARMOD"/></p>
<p diff="add" at="E">The meanings associated with the possible values of
the <code>normalization-form</code> parameter are as follows:
<ulist>
  <item>
    <p><code>NFC</code> specifies the serialized result will be
       in Unicode Normalization Form C.</p>
  </item>
  <item>
    <p><code>NFD</code> specifies the serialized eenult will be
       in Unicode Normalization Form D.</p>
  </item>
  <item>
    <p><code>NFKC</code> specifies the serialized result will be
       in Unicode Normalization Form KC.</p>
  </item>
  <item>
    <p><code>NFKD</code> specifies the serialized result will be
       in Unicode Normalization Form KD.</p>
  </item>
  <item>
    <p><code>fully-normalized</code> specifies the serialized result
       will be in fully normalized form.</p>
  </item>
  <item>
    <p><code>none</code> specifies that no Unicode normalization will
       be applied.</p>
  </item>
  <item>
    <p>An <termref def="impdef">implementation-defined</termref> value
       has an <termref def="impdef">implementation-defined</termref>
       effect.</p>
  </item>
</ulist></p>

<!-- Removed again in response to qt-2004Feb1040-01 -->
<p diff="del" at="G">
<!-- Note added in response to qt-2004Feb0362-19 -->
<note><p diff="add" at="F">Any characters produced under the effect
of the <code>use-character-maps</code> parameter are not subject to
Unicode normalization.  If the <code>normalization-form</code>
parameter has a value other than <code>none</code> and the
<code>use-character-maps</code> parameter is not empty, the whole
of the serialized document MAY NOT be in the normalization form
specified by the <code>normalization-form</code> parameter.</p></note>
</p>
</item>

<item><p>Creation of CDATA sections, as determined by the
<code>cdata-section-elements</code> parameter. Note that this is also
affected by the <code>encoding</code> parameter, in that characters
not present in the selected encoding cannot be represented in a CDATA
section.</p></item>

<item><p>Escaping according to XML or HTML rules
 of special characters
<phrase diff="add" at="G">that cannot be represented in the
selected encoding.</phrase>
For example replacing <code>&lt;</code> with
<code>&amp;lt;</code></p></item>
</ulist>
</item>

<item><p><emph>Encoding</emph>, as controlled by the
<code>encoding</code> parameter,

<phrase diff="del" at="G">This</phrase>

converts the character stream
produced by the previous phases into a byte stream.</p>

<!-- Note added in response to qt-2004Feb0362-01 -->
<note><p diff="add" at="E">Serialization is only defined in terms of
encoding the result as a stream of bytes.  However, a 

<!-- Changed term in response to qt-2004Feb0935-01 -->
<phrase diff="del" at="G">processor</phrase>
<phrase diff="add" at="G"><termref def="serializer">serializer</termref>
</phrase>

MAY provide an option that allows the encoding phase to be skipped, so
that the result of serialization is a stream of Unicode characters.
The effect of any such option is
<termref def="impdef">implementation-defined</termref>, and a

<!-- Changed term in response to qt-2004Feb0935-01 -->
<phrase diff="del" at="G">processor</phrase>
<phrase diff="add" at="G"><termref def="serializer">serializer</termref>
</phrase>

is not REQUIRED to support such an option.</p></note>
</item>
</olist>
</div1>

<div1 id="xml-output">
<head>XML Output Method</head>

<p>The <code>xml</code> output method outputs the

<!-- Use "sequence" instead of "instance of data model" - qt-2004Feb1204-01 -->
<phrase diff="del" at="G">instance of the data model</phrase>
<phrase diff="add" at="G">normalized sequence</phrase>

as an XML entity that
<phrase diff="chg" at="C">MUST</phrase> satisfy the rules for
either a well-formed XML document entity or a well-formed XML
external general parsed entity, or both.

<!-- Rewording in response to qt-2004Feb0265-01 -->
<phrase diff="add" at="G">A <termref def="serial-err">serialization error</termref> results if the

<!-- Changed term in response to qt-2004Feb0935-01 -->
<phrase diff="del" at="G">processor</phrase>
<phrase diff="add" at="G"><termref def="serializer">serializer</termref>
</phrase>

</phrase>
<phrase diff="add" at="C">
<phrase diff="del" at="G">unless the 

<!-- Changed term in response to qt-2004Feb0935-01 -->
<phrase diff="del" at="G">processor</phrase>
<phrase diff="add" at="G"><termref def="serializer">serializer</termref>
</phrase>

</phrase>
is unable to
satisfy those rules,
<phrase diff="add" at="G">except for contents modified by</phrase>
<phrase diff="del" at="G">due to either <termref def="serial-err">serialization errors</termref> or the
requirements of</phrase>
the character expansion phase of serialization,
as described in <specref ref="serphases"/>,</phrase>
<phrase diff="add" at="G">which could result in the serialized output
being not well-formed but will not result in a <termref def="serial-err">serialization error</termref>.  If a
<termref def="serial-err">serialization error</termref> results, the

<!-- Changed term in response to qt-2004Feb0935-01 -->
<phrase diff="del" at="G">processor</phrase>
<phrase diff="add" at="G"><termref def="serializer">serializer</termref>
</phrase>

MUST signal the error.</phrase>
<phrase diff="del" at="G">
<!-- Added in response to qt-2004Feb0053-01 and qt-2004Feb0932-01 -->
<phrase diff="add" at="E">If the

<!-- Changed term in response to qt-2004Feb0935-01 -->
<phrase diff="del" at="G">processor</phrase>
<phrase diff="add" at="G"><termref def="serializer">serializer</termref>
</phrase>

is unable to satisfy those
requirements for any other reason, a <termref def="serial-err">serialization error</termref> results.  The

<!-- Changed term in response to qt-2004Feb0935-01 -->
<phrase diff="del" at="G">processor</phrase>
<phrase diff="add" at="G"><termref def="serializer">serializer</termref>
</phrase>

MUST signal the error.</phrase>
<phrase diff="del" at="C"><phrase diff="add" at="B">Many of the requirements for the
serialized form of the instance of the data model with the <code>xml</code> output
method are described using the verb "should"; the

<!-- Changed term in response to qt-2004Feb0935-01 -->
<phrase diff="del" at="G">processor</phrase>
<phrase diff="add" at="G"><termref def="serializer">serializer</termref>
</phrase>

might not be able to meet the requirements of the <code>xml</code>
output method due to:</phrase></phrase>
</phrase>
</p>

<ulist diff="del" at="C">
<item><p>serialization errors;</p></item>

<item><p>specification of character mapping, as determined by the
<code>use-character-maps</code> parameter, whose expansion results
in XML that is not well-formed; or</p></item>

<item><p>disabled output escaping, that results in XML that is not
well-formed.</p></item>
</ulist>

<!-- Removed in response to qt-2004Feb0053-01 and qt-2004Feb0924-01 -->
<p diff="del" at="E"><phrase diff="add" at="B">In all other circumstances,
the serialized form
MUST comply with the requirements described for the <code>xml</code>
output method.</phrase></p>

<p>If the document node of the

<!-- Use "sequence" instead of "instance of data model" - qt-2004Feb1204-01 -->
<phrase diff="del" at="G">instance of the data model</phrase>
<phrase diff="add" at="G">normalized sequence</phrase>


has a single element
node child and no text node children,
<phrase diff="chg" at="C">
<!-- Changed in response to qt-2004Feb0053-01 and qt-2004Feb0932-01 -->
<phrase diff="del" at="E">and</phrase>
<phrase diff="add" at="E">then</phrase>
the serialized output
is a well-formed XML document entity, and the serialized output
MUST conform</phrase> to the
<phrase diff="add" at="G">appropriate version of the</phrase>
XML Namespaces Recommendation <bibref ref="XMLNAMES"/>
<phrase diff="add" at="G">or <bibref ref="XMLNAMES11"/></phrase>.
If the

<!-- Use "sequence" instead of "instance of data model" - qt-2004Feb1204-01 -->
<phrase diff="del" at="G">instance of the data model</phrase>
<phrase diff="add" at="G">normalized sequence</phrase>

does not take this form, <phrase diff="add" at="C">
<phrase diff="del" at="E">and</phrase>
<phrase diff="add" at="E">then</phrase>
the serialized
output is a well-formed XML external general parsed entity,</phrase>
<!-- Removed in response to qt-2004Feb0053-01 and qt-2004Feb0932-01 -->
<phrase diff="del" at="E">then the serialized output
<phrase diff="chg" at="C">must</phrase> be an
entity</phrase>
which, when referenced within a trivial XML document wrapper
like this:</p>

<eg>
<![CDATA[
<?xml version="]]><emph>version</emph><![CDATA["?>
<!DOCTYPE doc [
<!ENTITY e SYSTEM "]]><emph>entity-URI</emph><![CDATA[">
]>
<doc>&e;</doc>]]></eg>

<p>where <code>entity-URI</code> is a URI for the entity,
<phrase diff="add" at="G">and the value of the <code>version</code>
pseudo-attribute is the value of the <code>version</code>
parameter</phrase>, produces a
document which <phrase diff="chg" at="C">MUST</phrase> itself be a
well-formed XML document conforming
to the
<phrase diff="add" at="G">corresponding version of the</phrase>
XML Namespaces Recommendation <bibref ref="XMLNAMES"/>
<phrase diff="add" at="G">or <bibref ref="XMLNAMES11"/></phrase>.</p>

<p>In addition, the output <phrase diff="chg" at="C">MUST</phrase>
be such that if a new tree was
constructed by parsing the XML document and converting it into an
instance of the data model
as specified in <phrase diff="del" at="C">this document</phrase>
<bibref diff="add" at="C" ref="DataModel"/>, then the new

<!-- Use "sequence" instead of "instance of data model" - qt-2004Feb1204-01 -->
<phrase diff="del" at="G">instance of the data model</phrase>
<phrase diff="add" at="G">sequence</phrase>

would be the same as the
<!-- Changed in response to qt-2004Feb0056-01 -->
<phrase diff="del" at="E">starting</phrase>

<!-- Use "sequence" instead of "instance of data model" - qt-2004Feb1204-01 -->
<phrase diff="del" at="G">instance of the data model</phrase>
<phrase diff="add" at="G">normalized sequence</phrase>

<phrase diff="add" at="E">that resulted from the
<phrase diff="add" at="G">sequence</phrase>
normalization process
described in <specref ref="serdm"/></phrase>, with the following possible
exceptions:</p>

<ulist>
<item>
<p>If the document was produced by adding a document wrapper, as
described above, then it will contain an extra <code>doc</code>
element as the document element.</p>
</item>
<item>
<p>The order of attribute and namespace nodes in the two trees MAY be
different.</p>
</item>
<item>
<p>
<!-- Changes due to qt-2004Feb0936-01 -->
<phrase diff="del" at="E">The base URIs of nodes in the two trees MAY be
different.</phrase>
<phrase diff="add" at="E">The following properties of corresponding nodes
in the two trees MAY be different:</phrase>
<ulist diff="add" at="E">
<item><p>the base-uri property of document nodes and element nodes;</p></item>
<item><p>the document-uri and unparsed-entities properties of document
nodes;</p></item>
<item><p>the type-name and typed-value properties of element and attribute
nodes;</p></item>
<item><p>the nilled property of element nodes;</p></item>
<item><p>the content property of text nodes, due to the effect of the
<code>indent</code> and <code>use-character-maps</code>
parameters.</p></item>
</ulist>
</p>
</item>
<item>
<p>The new tree MAY contain additional attributes and text nodes resulting from the
expansion of default and fixed values in its DTD or schema.</p>
</item>
<item>
<p>The type annotations of the nodes in the two trees MAY be
different. Type annotations in a result tree are discarded when the
tree is serialized. Any new type annotations obtained by parsing the
document will 
<phrase diff="del" at="C">be derived by processing</phrase>
<phrase diff="add" at="C">depend on whether </phrase>
the serialized XML document
<phrase diff="add" at="C">is assessed</phrase>
against a schema, and this MAY result in type annotations that are
<!-- Changed in response to qt-2004Feb1198-01 -->
<phrase diff="del" at="G">either more or less precise than</phrase>
<phrase diff="add" at="G">different from</phrase>
those in the original result tree.
</p>
<note diff="add" at="C">
<!-- Note changed in response to qt-2004Feb0058-01 -->
<p diff="add" at="E">In order to influence the type annotations in the
instance of the data model that would result from processing a serialized XML document,
the author of the XSLT stylesheet, XQuery expression or other process
might wish to create the instance of the data model that is input to the
serialization process so that it makes use of mechanisms provided by
<bibref ref="XMLSCHEMA"/>, such as <code>xsi:type</code> and
<code>xsi:schemaLocation</code> attributes.  The serialization process
will not automatically create such attributes in the serialized
document if those attributes were not part of the result tree that is
to be serialized.</p>
<!-- Further information added to note in response to qt-2004Feb0064-01 -->
<p diff="add" at="E">Similarly, it is possible that an element node in
the instance of the data model that is to be serialized has the <code>nilled</code>
property with the value <code>true</code>, but no <code>xsi:nil</code>
attribute.  The serialization process will not create such an attribute
in the serialized document simply to reflect the value of the property.
The value of the <code>nilled</code> property has no direct effect on
the serialized result.
</p>
<p diff="del" at="E">In order to permit such type annotations
to be available in a data model that results from processing a
serialized XML document, the process that creates the input instance
of the data model could create it so that the serialized form
uses mechanisms provided by <bibref ref="XMLSCHEMA"/>, such as the
<code>xsi:type</code> and <code>xsi:schemaLocation</code>
attributes.</p></note>
</item>
<item><p diff="add" at="C">Additional namespace nodes MAY be present
in the new tree if the serialization process
<!-- Changed in response to qt-2004Feb0059-01 -->
<phrase diff="add" at="E">did not undeclare</phrase>
<phrase diff="del" at="E">undeclared</phrase>
<!-- Added "one or more" in response to qt-2004Feb0933-01 -->
<phrase diff="add" at="E">one or more</phrase>
namespaces,
as described in <specref ref="xml-undeclare-NS"/>,
and the starting instance of the data model contained an element node
with a namespace node that declared some prefix, but a child element
of that node did not have any namespace node that declared the same prefix.</p>

<!-- Paragraph added in response to qt-2004Feb0059-01 -->
<p diff="add" at="E">
<!-- Paragraph removed now that editor understands the
     response to qt-2004Feb0059-01
  -->
<phrase diff="del" at="F">Additional namespace nodes MAY also be present
in the new tree if the serialization process had to add namespace
declarations for attribute or element content of type <code>xs:QName</code>.
</phrase>
<phrase diff="add" at="F">The original tree MAY contain namespace nodes
that are not present in the new tree, as the process of creating an instance
of the data model MAY ignore namespace declarations in some circumstances.
See <xspecref spec="DM" ref="const-infoset-element"/> and
<xspecref spec="DM" ref="const-psvi-element"/> of <bibref ref="DataModel"/>
for additional information.
</phrase>

<!-- Added ednote to ask question about qt-2004Feb0059-01 -->
<!-- Deleted ednote now that issue qt-2004Feb0059-01 is resolved -->
<ednote diff="del" at="F"><edtext>
We've talked about this a couple of times now, but I'm still concerned
about the preceding paragraph.  This text was added in response to issue
<loc href="http://www.w3.org/XML/Group/xsl-query-specs/last-call-comments/xquery-serialization/issues.html#qt-2004Feb0059-01">qt-Feb0059-01</loc>.
Today, XSLT describes a
<loc href="http://lists.w3.org/Archives/Member/member-query-specs/2004Jun/att-0001/Overview-diff.html#namespace-fixup">
 namespace fixup</loc> procedure that ensures namespaces exist
for values of type <code>xs:QName</code>.
Is the resolution for this issue requiring
the same thing for serialization?  Or something else?
</edtext></ednote>
</p>
</item>
<!-- Added item in response to qt-2004Feb0269-01 and qt-2004Feb0926-01 -->
<item><p diff="add" at="E">If the <code>indent</code> parameter has
the value <code>yes</code>,</p>
<ulist>
<item><p>additional text nodes consisting of
whitespace characters MAY be present in the new tree; and</p></item>
<item><p>text nodes in the original tree that contained only whitespace
characters MAY correspond to text nodes in the new tree that contain additional
whitespace characters that were not present in the original tree</p></item>
</ulist>
<p>See <specref ref="xml-indent"/> for more information on the
<code>indent</code> parameter.</p>
</item>
<item><p diff="add" at="C">Additional nodes MAY be present in the
new tree

<!-- Added clarification in respone to qt-2004Feb0362-23 [29] -->
<phrase diff="add" at="G">due to the effect of character mapping in the
character expansion phase</phrase>,

and the values of attribute nodes and text nodes in the
new tree MAY be different from those in the original tree, due to

<!-- Added clarification in respone to qt-2004Feb0362-23 [29] -->
<phrase diff="add" at="G">the effects of URI expansion, character mapping
and Unicode normalization in</phrase>

the character expansion phase of serialization.
</p>
<!-- Added note in response to qt-2004Feb0060-01, qt-2004Feb0268-01 and
     qt-2004Feb0927-01 -->
<note>
<p diff="add" at="E">The <code>use-character-maps</code> parameter can
cause arbitrary characters to be inserted into the serialized XML document
in an unescaped form, including characters that would be considered to be
part of XML markup.  Such characters could result in arbitrary new element
nodes, attribute nodes, and so on, in the new tree that results from
processing the serialized XML document.</p>
</note>
</item>
</ulist>

<!--
   Dropped Henry's editorial comments.
<ednote><edtext>This section seems to become increasingly unwieldy.  The
list of exceptions seems endless.  Should
we consider a completely different approach to the description of
serialization other than the "round-trip" definition?  A constructive
definition, perhaps?
</edtext></ednote>
-->

<p>A consequence of this rule is that certain

<!-- Removed in response to qt-2004Feb0272-01 -->
<phrase diff="del" at="G">whitespace</phrase>

characters
<phrase diff="del" at="B">SHOULD</phrase>
<phrase diff="add" at="B">MUST</phrase> be output as character
references, to ensure that they survive
the round trip through serialization and parsing.
<!-- Added in response to qt-2004Feb0362-12 -->
<phrase diff="add" at="E">Specifically, CR, NEL and LINE
SEPARATOR characters in text nodes MUST be output respectively as
"<code>&amp;#xD;</code>", "<code>&amp;#x85;</code>", and
"<code>&amp;#x2028;</code>", or their equivalents; while CR, NL, TAB, NEL and
LINE SEPARATOR characters in attribute nodes MUST be output respectively
as "<code>&amp;#xD;</code>", "<code>&amp;#xA;</code>", "<code>&amp;#x9;</code>",
"<code>&amp;#x85;</code>", and "<code>&amp;#x2028;</code>", or their
equivalents.</phrase>


<!-- Added in response to qt-2004Feb0272-01 -->
<phrase diff="add" at="G">In addition, the non-whitespace control characters
#x1 through #x1F and #x7F through #x9F in text nodes and attribute nodes MUST be
output as character references.
</phrase>

<!-- Removed in response to qt-2004Feb0362-12 -->
<phrase diff="del" at="E">Specifically, CR
characters in text nodes <phrase diff="del" at="B">SHOULD</phrase>
<phrase diff="add" at="B">MUST</phrase> be written as
<code>&amp;#xD;</code> or an equivalent; while CR, NL, and TAB
characters in attribute nodes <phrase diff="del" at="B">SHOULD</phrase>
<phrase diff="add" at="B">MUST</phrase> be output respectively as
<code>&amp;#xD;</code>, <code>&amp;#xA;</code>, and
<code>&amp;#x9;</code>, or their equivalents.</phrase>
</p>

<p>For example, an attribute with the value "x" followed by "y"
separated by a newline will result in the output
<code>"x&amp;#xA;y"</code> (or with any equivalent character
reference). The XML output cannot be "x" followed by a literal newline
followed by a "y" because after parsing, the attribute value would be
<code>"x y"</code> as a consequence of the XML attribute normalization
rules.</p>

<!-- Replace content of note in response to qt-2004Feb0362-12 -->
<note><p diff="add" at="E">XML 1.0 did not permit
an XML processor to normalize
NEL or LINE SEPARATOR characters to a LINE FEED character.  However, if
a document entity that specifies version 1.1 invokes an external general
parsed entity with no text declaration or a text declaration that specifies
version 1.0, the external parsed entity is processed according to the rules
of XML 1.1.  For this reason, NEL and LINE SEPARATOR characters in text and
attribute nodes MUST always be escaped using character references,

<!-- Removed in response to qt-2004Feb0272-01 -->
<phrase diff="del" at="G">or CDATA sections</phrase>

regardless of the value of the <code>version</code>
parameter.
</p>

<!-- Added in response to qt-2004Feb0272-01 -->
<p diff="add" at="G">
XML 1.0 permitted control characters in the range #x7F through #x9F
to appear as literal characters in an XML document, but XML 1.1
requires such characters, other than NEL,
to be escaped as character references.  An
external general parsed entity with no text declaration or a text
declaration that specifies a version pseudo-attribute with value
<code>1.0</code> that is invoked by an XML 1.1 document entity MUST
follow the rules of XML 1.1.  Therefore, the non-whitespace control
characters in the ranges #x1 through #x1F and #x7F through #x9F,
other than NEL, MUST
always be escaped, regardless of the value of the version parameter.
</p>

<p diff="del" at="E">To anticipate the proposed changes to
end-of-line handling in
XML 1.1, a <termref def="serializer">serializer</termref> MAY also output the characters x85 and x2028
as character references. This will not affect the way they are
interpreted by an XML 1.0 parser.</p></note>

<p>It is a <termref def="serial-err">serialization error</termref> to request the output of a document
type declaration, or of a <code>standalone</code> parameter, if the
instance of the data model contains text nodes or multiple element nodes as children
of the root node. The

<!-- Changed term in response to qt-2004Feb0935-01 -->
<phrase diff="del" at="G">processor</phrase>
<phrase diff="add" at="G"><termref def="serializer">serializer</termref>
</phrase>

<phrase diff="del" at="B">MAY signal the error, or MAY recover</phrase>
<phrase diff="add" at="B">MUST either signal the error, or recover</phrase>
by ignoring the request to output a document type declaration or
<code>standalone</code> parameter.</p>

<p>The result of serialization using the XML output method is not
guaranteed to be well-formed XML if character maps have been specified
(see <specref ref="character-maps"/>).

<!-- Removed reference to user-defined functions in response to
     qt-2004Feb0362-21
  -->
<phrase diff="del" at="G">or if nodes in the instance of the
data model contain characters that are invalid in XML (introduced, perhaps, by
calling a user-written extension function: this is an error, but the

<!-- Changed term in response to qt-2004Feb0935-01 -->
<phrase diff="del" at="G">processor</phrase>
<phrase diff="add" at="G"><termref def="serializer">serializer</termref>
</phrase>

is not REQUIRED to signal it).</phrase>
</p>

<!-- Added new subdivision in response to qt-2004Feb1200-01 -->
<div2>
<head>The Influence of Serialization Parameters upon the XML Output Method</head>

<div3>
<head>XML Output Method: the <code>version</code> Parameter</head>

<p>The <code>version</code> parameter specifies the version of XML

<!-- Added ref. to Namespaces in response to qt-2004Feb0062-01 -->
<phrase diff="add" at="G">and the version of Namespaces in XML</phrase>

to
be used for outputting the instance of the data model. If the

<!-- Changed term in response to qt-2004Feb0935-01 -->
<phrase diff="del" at="G">processor</phrase>
<phrase diff="add" at="G"><termref def="serializer">serializer</termref>
</phrase>

does not support this version of XML,
<!-- Changed in response to qt-2004Feb0061-01.  It's now a serialization
     error for the version paramater to specify a value that is not
     supported by the processor.
  -->
<phrase diff="del" at="F"> it <phrase diff="del" at="B">SHOULD</phrase>
<phrase diff="add" at="B">MUST</phrase> use a version of XML that it
does support.</phrase>
<phrase diff="add" at="F">it MUST signal a <termref def="serial-err">serialization error</termref>.</phrase>

The version output in the XML declaration (if an XML
declaration is output) <phrase diff="del" at="B">SHOULD</phrase>
<phrase diff="add" at="B">MUST</phrase> correspond to the version of XML that
the

<!-- Changed term in response to qt-2004Feb0935-01 -->
<phrase diff="del" at="G">processor</phrase>
<phrase diff="add" at="G"><termref def="serializer">serializer</termref>
</phrase>

used for outputting the instance of the data model. The value of the
<code>version</code> parameter <phrase diff="del" at="B">SHOULD</phrase>
<phrase diff="add" at="B">MUST</phrase> match the
<xnt spec="XML" ref="NT-VersionNum"/>
production of the XML Recommendation <bibref ref="XML"/>.</p>

<!-- Added details in response to qt-2004Feb0062-01 -->
<p diff="add" at="G">If the serialized result would contain an
<xnt spec="Names" ref="NT-NCName"/> that contains a character that is not
permitted by the version of Namespaces in XML specified by the
<code>version</code> parameter, a <termref def="serial-err">serialization error</termref> results.
The <termref def="serializer">serializer</termref> MUST signal the error.</p>

<p diff="add" at="G">If the serialized result would contain a character
that is not permitted by the version of XML specified by the
<code>version</code> parameter, a <termref def="serial-err">serialization error</termref> results.  The
<termref def="serializer">serializer</termref> MUST signal the error.</p>

<example><p diff="add" at="G">For example, if the <code>version</code>
parameter has the value <code>1.0</code>, and the instance of the data
model contains a non-whitespace control character in the range #x1 to
#x1F, a <termref def="serial-err">serialization error</termref> results.
If the <code>version</code> parameter has the value <code>1.1</code>
and a comment node in the instance of the data model contains a
non-whitespace control character in the range #x1 to #x1F or a
control character other than NEL in the range #x7F to #x9F, a
<termref def="serial-err">serialization error</termref> results.</p></example>
</div3>
<div3>
<head>XML Output Method: the <code>encoding</code> Parameter</head>

<p>The <code>encoding</code> parameter specifies the
<phrase diff="del" at="F">preferred</phrase>
encoding to use for outputting the instance of the data model.

<!-- Changed term in response to qt-2004Feb0935-01 -->
<phrase diff="del" at="G">Processors</phrase>
<phrase diff="add" at="G"><termref def="serializer">Serializers</termref>
</phrase>

are REQUIRED to support values of <code>UTF-8</code> and
<code>UTF-16</code>. A <termref def="serial-err">serialization error</termref> occurs if an output
encoding other than <code>UTF-8</code> or <code>UTF-16</code> is
requested and the <termref def="serializer">serializer</termref>
does not support that encoding. The

<!-- Changed term in response to qt-2004Feb0935-01 -->
<phrase diff="del" at="G">processor</phrase>
<phrase diff="add" at="G"><termref def="serializer">serializer</termref>
</phrase>

<phrase diff="add" at="B">MUST</phrase>
<phrase diff="del" at="B">MAY</phrase> signal the error, or
<phrase diff="del" at="B">MAY</phrase> recover by using
<code>UTF-8</code> or <code>UTF-16</code> instead. The

<!-- Changed term in response to qt-2004Feb0935-01 -->
<phrase diff="del" at="G">processor</phrase>
<phrase diff="add" at="G"><termref def="serializer">serializer</termref>
</phrase>

MUST NOT use an encoding whose name does not match the
<xnt spec="XML" ref="NT-EncName"/>
production of the XML Recommendation <bibref ref="XML"/>.
<!-- Removed in response to qt-2004Feb0977-01 -->
<phrase diff="del" at="E">If no
<code>encoding</code> parameter is specified, then the

<!-- Changed term in response to qt-2004Feb0935-01 -->
<phrase diff="del" at="G">processor</phrase>
<phrase diff="add" at="G"><termref def="serializer">serializer</termref>
</phrase>

<phrase diff="del" at="B">SHOULD</phrase>
<phrase diff="add" at="B">MUST</phrase> use either
<code>UTF-8</code> or <code>UTF-16</code>.</phrase></p>

<p>When outputting a newline character in the instance of the data model, the
<termref def="serializer">serializer</termref> is free to represent it using any character sequence
that will be normalized to a newline character by an XML parser,
unless a specific mapping for the newline character is
provided in a character map: see <specref ref="character-maps"/>.</p>

<p>When outputting any other character that is defined in the
selected encoding, the character <phrase diff="del" at="B">SHOULD</phrase>
<phrase diff="add" at="B">MUST</phrase> be output
using the correct representation of that character in the selected encoding.</p>

<p>It is possible that the instance of the data model will contain a character that
cannot be represented in the encoding that the

<!-- Changed term in response to qt-2004Feb0935-01 -->
<phrase diff="del" at="G">processor</phrase>
<phrase diff="add" at="G"><termref def="serializer">serializer</termref>
</phrase>

is using for
output. In this case, if the character occurs in a context where XML
recognizes character references (that is, in the value of an attribute
node or text node), then the character <phrase diff="del" at="B">SHOULD</phrase>
<phrase diff="add" at="B">MUST</phrase> be output as a character
reference. A <termref def="serial-err">serialization error</termref> occurs if such a character appears in
a context where character references are not allowed (for example if
the character occurs in the name of an element). The

<!-- Changed term in response to qt-2004Feb0935-01 -->
<phrase diff="del" at="G">processor</phrase>
<phrase diff="add" at="G"><termref def="serializer">serializer</termref>
</phrase>

<phrase diff="del" at="B">SHOULD</phrase>
<phrase diff="add" at="B">MUST</phrase>
signal the error.
</p>

<!-- Added more examples in response to qt-2004Feb1201-01 -->
<example>
<p diff="add" at="G">For example,
if a text node contains the character LATIN SMALL LETTER E WITH ACUTE (#xE9),
and the value of the <code>encoding</code> parameter is
<code>US-ASCII</code>, the character MUST be serialized as a character
reference.  If a comment node contained the same character, a
<termref def="serial-err">serialization error</termref> would result.
</p>
</example>

</div3>
<div3 id="xml-indent">
<head>XML Output Method: the <code>indent</code> Parameter</head>

<p>If the <code>indent</code> parameter has the value
<code>yes</code>, then the <code>xml</code> output method MAY output
whitespace in addition to the whitespace in the instance of the data model (possibly
based on whitespace stripped from either the source document or the
stylesheet, <phrase diff="add" at="C">in the case of XSLT, or
guided by other means that might depend on the host language,
in the case of an instance of the data model created using some other process</phrase>)
in order to indent the result nicely; if the
<code>indent</code> parameter has the value <code>no</code>, it
<phrase diff="del" at="B">SHOULD</phrase>
<phrase diff="add" at="B">MUST</phrase>
NOT output any additional whitespace.
<phrase diff="chg" at="B">If the <code>xml</code>
output method does output additional whitespace,</phrase> it
<phrase diff="del" at="B">SHOULD</phrase>
<phrase diff="add" at="B">MUST</phrase> use an
algorithm to output additional whitespace that satisfies the
following constraints:</p>

<ulist>
<item><p>Whitespace characters MUST NOT be added adjacent to a text
node that contains non-whitespace characters.</p></item>

<item><p>Whitespace MAY only be added adjacent to an element node,
that is, immediately before a start tag or immediately after an end
tag.</p></item>

<item><p>The new whitespace characters MAY replace existing whitespace
characters in the same position, for example a tab MAY be inserted as
a replacement for existing spaces. However, existing whitespace MUST
NOT be removed without such a replacement.</p></item>

<item><p>Whitespace characters MUST NOT be inserted in a part of the
result document that is controlled by an
<!-- Changed wording in response to qt-2004Feb0978-01 -->
<phrase diff="del" at="E"><code>xml:space="preserve"</code> attribute.</phrase>
<phrase diff="add" at="E"><code>xml:space</code> attribute with value
<code>preserve</code>.  (See <bibref ref="XML"/> for more information
about the <code>xml:space</code> attribute.)</phrase>
</p></item>

<!-- Added in response to qt-2004Feb0930-01 -->
<item><p diff="add" at="G">Whitespace characters SHOULD NOT be added in
places where the characters would be significant &mdash; for example, in the
content of an element whose content model is known to be mixed.
</p></item>
</ulist>

<note><p>The effect of these rules is to ensure that whitespace
<phrase diff="del" at="B">MAY only be</phrase>
<phrase diff="add" at="B">is only</phrase>
added in places where (a) XSLT's <code>&lt;xsl:strip-space&gt;</code>
declaration could cause it to be removed, and
(b) it does not affect the string value of any element node with
simple content. It is usually not safe to indent document types that include elements
with mixed content.</p>
</note>

</div3>
<div3>
<head>XML Output Method: the <code>cdata-section-elements</code> Parameter</head>

<p>The <code>cdata-section-elements</code> parameter contains a list
of expanded-QNames. If the expanded-QName of the parent of a text node
is a member of the list, then the text node
<phrase diff="del" at="B">SHOULD</phrase>
<phrase diff="add" at="B">MUST</phrase> be output as a
CDATA section<phrase diff="add" at="B">, except in those circumstances
described below</phrase>.</p>

<p>If the text node contains the sequence of characters
<code>]]&gt;</code>, then the currently open CDATA section
<phrase diff="del" at="B">SHOULD</phrase>
<phrase diff="add" at="B">MUST</phrase> be
closed following the <code>]]</code> and a new CDATA section opened
before the <code>&gt;</code>.</p>

<p>If the text node contains characters that are not
representable in the character encoding being used to output the
instance of the data model, then the currently open CDATA section
<phrase diff="del" at="B">SHOULD</phrase>
<phrase diff="add" at="B">MUST</phrase> be closed
before such characters, the characters
<phrase diff="del" at="B">SHOULD</phrase>
<phrase diff="add" at="B">MUST</phrase> be output using
character references or entity references, and a new CDATA
section
<phrase diff="del" at="B">SHOULD</phrase>
<phrase diff="add" at="B">MUST</phrase> be opened for any further
characters in the text node.</p>

<p>CDATA sections <phrase diff="del" at="C">SHOULD</phrase>
<phrase diff="add" at="C">MUST</phrase> NOT be used except where they
have been explicitly requested by the user, either by using the
<code>cdata-section-elements</code> parameter, or by using some other
<termref def="impdef">implementation-defined</termref> mechanism.</p>

<note>
<p>This is phrased to permit an implementor to provide an option that
attempts to preserve CDATA sections present in the source
document.</p>
</note>

</div3>
<div3>
<head>XML Output Method: the <code>omit-xml-declaration</code>
<!-- Added in response to qt-2004Feb0278-01 and qt-2004Feb1039-01 -->
<phrase diff="add" at="E">and <code>standalone</code></phrase>
Parameters</head>

<p>The <code>xml</code> output method
<phrase diff="del" at="B">SHOULD</phrase>
<phrase diff="add" at="B">MUST</phrase> output an XML declaration
<!-- Used positive wording in response to qt-2004Feb0270-01 -->
<phrase diff="del" at="E">unless</phrase>
<phrase diff="add" at="E">if</phrase>
the <code>omit-xml-declaration</code> parameter has the value
<phrase diff="del" at="E"><code>yes</code></phrase>
<phrase diff="add" at="E"><code>no</code></phrase>. The XML declaration
<phrase diff="del" at="B">SHOULD</phrase>
<phrase diff="add" at="B">MUST</phrase> include both version
information and an encoding declaration. If the
<code>standalone</code> parameter
<!-- No default parameter values in response to qt-2004Feb0976-01 -->
<phrase diff="chg">has the value <code>yes</code> or the value
<code>no</code></phrase>,
the XML declaration
<phrase diff="del" at="B">SHOULD</phrase>
<phrase diff="add" at="B">MUST</phrase> include a
standalone document declaration with the same value as
the value of the <code>standalone</code> parameter.

<phrase diff="del" at="G">Otherwise, it</phrase>
<phrase diff="add" at="G">If the <code>standalone</code> parameter has
the value <code>none</code>, the XML declaration</phrase>

<phrase diff="del" at="B">SHOULD</phrase>
<phrase diff="add" at="B">MUST</phrase>
NOT include a standalone document declaration; this ensures
that it is both an XML declaration (allowed at the beginning of a
document entity) and a text declaration (allowed at the beginning of
an external general parsed entity).</p>

<!-- Deleted paragraph in response to qt-2004Feb0362-15, qt-2004Feb0928-01
     and qt-2004Feb0996-01 -->
<p diff="del" at="E">The <code>omit-xml-declaration</code> parameter
<phrase diff="del" at="B">SHOULD</phrase>
<phrase diff="add" at="B">MUST</phrase> be ignored
if the <code>standalone</code> parameter
<!-- No default parameter values in response to qt-2004Feb0976-01 -->
<phrase diff="chg">has the value <code>yes</code> or the value
<code>no</code></phrase>,
or if the
<code>encoding</code> parameter specifies a value other than UTF-8 or
UTF-16.</p>

<!-- Added paragraph in response to qt-2004Feb0362-15, qt-2004Feb0928-01
     and qt-2004Feb0996-01 -->
<p diff="add" at="E">A <termref def="serial-err">serialization error</termref> results if the
<code>omit-xml-declaration</code> parameter has the value
<code>yes</code>, and
<ulist>
<item><p>the <code>standalone</code> attribute has a value other than
<code>none</code>; or
</p></item>
<item><p>the <code>version</code> parameter has a value other than
<code>1.0</code> and the <code>doctype-system</code>
parameter is specified.</p></item>
</ulist>
The

<!-- Changed term in response to qt-2004Feb0935-01 -->
<phrase diff="del" at="G">processor</phrase>
<phrase diff="add" at="G"><termref def="serializer">serializer</termref>
</phrase>

MUST signal the error.
</p>

</div3>
<div3>
<head>XML Output Method: the <code>doctype-system</code>
and <code>doctype-public</code> Parameters</head>

<p>If the <code>doctype-system</code> parameter is specified, the
<code>xml</code> output method <phrase diff="del" at="B">SHOULD</phrase>
<phrase diff="add" at="B">MUST</phrase> output a document type
declaration immediately before the first element. The name following
<code>&lt;!DOCTYPE</code> <phrase diff="del" at="B">SHOULD</phrase>
<phrase diff="add" at="B">MUST</phrase> be the name of the first element,
<phrase diff="add" at="B">if any</phrase>. If
the <code>doctype-public</code> parameter is also specified, then the
<code>xml</code> output method <phrase diff="del" at="B">SHOULD</phrase>
<phrase diff="add" at="B">MUST</phrase> output <code>PUBLIC</code>
followed by the public identifier and then the system identifier;
otherwise, it <phrase diff="del" at="B">SHOULD</phrase>
<phrase diff="add" at="B">MUST</phrase> output <code>SYSTEM</code>
followed by the system
identifier. The internal subset <phrase diff="del" at="B">SHOULD</phrase>
<phrase diff="add" at="B">MUST</phrase> be empty. The
<code>doctype-public</code> parameter <phrase diff="del" at="B">SHOULD</phrase>
<phrase diff="add" at="B">MUST</phrase> be ignored unless the
<code>doctype-system</code> parameter is specified.</p>

</div3>
<div3 id="xml-undeclare-NS">
<head>XML Output Method: the <code>undeclare-namespaces</code> Parameter</head>

<p>The Data Model allows an element

<!-- Clarified in response to qt-2004Feb0052-01 -->
<phrase diff="del" at="G">to have fewer in-scope namespaces
than its parent.</phrase>
<phrase diff="add" at="G">node that binds a non-empty prefix to have
a child element node that does not bind that same prefix.</phrase>

In

<phrase diff="del" at="G">XML 1.1,</phrase>
<phrase diff="add" at="G">Namespaces in XML 1.1
(<bibref ref="XMLNAMES11"/>),</phrase>

this can be represented

<!-- Removed in response to qt-2004Feb1203-01 -->
<phrase diff="del" at="E">most</phrase>
accurately by undeclaring
namespaces.  If the <code>undeclare-namespaces</code> parameter has the value
<code>yes</code> and
the output method is XML and the <code>version</code> is greater than
<phrase diff="del" at="B">1.1</phrase>
<phrase diff="add" at="B">1.0</phrase>,

<!-- Changed term in response to qt-2004Feb0935-01 -->
<phrase diff="del" at="G">serialization</phrase>
<phrase diff="add" at="G">the <termref def="serializer">serializer</termref></phrase>

<phrase diff="del" at="B">SHOULD</phrase>
<phrase diff="add" at="B">MUST</phrase> undeclare namespaces.</p>

<!-- Fixed to account for xml prefix in response to qt-2004Feb1206-01 -->
<example>
<p>Consider an element <code>x:foo</code> with
<phrase diff="del" at="G">three</phrase>
<phrase diff="add" at="G">four</phrase>
in-scope namespaces
<phrase diff="add" at="G">that associate prefixes with URIs as follows:</phrase>
</p>

<ulist>
<item><p diff="add" at="G"><code>x</code> is associated with
<code>http://example.org/x</code></p></item>
<item><p diff="add" at="G"><code>y</code> is associated with
<code>http://example.org/y</code></p></item>
<item><p diff="add" at="G"><code>z</code> is associated with
<code>http://example.org/z</code></p></item>
<item><p diff="add" at="G"><code>xml</code> is associated with
<code>http://www.w3.org/XML/1998/namespace</code></p></item>
</ulist>

<eg diff="del" at="G">&lt;x:foo xmlns:x="http://example.org/x"
       xmlns:y="http://example.org/y"
       xmlns:z="http://example.org/z"&gt;</eg>

<p>Suppose that it has a child element <code>x:bar</code> with
<phrase diff="del" at="G">two</phrase>
<phrase diff="add" at="G">three</phrase>
in-scope namespaces:</p>

<ulist>
<item><p diff="add" at="G"><code>x</code> is associated with
<code>http://example.org/x</code></p></item>
<item><p diff="add" at="G"><code>y</code> is associated with
<code>http://example.org/y</code></p></item>
<item><p diff="add" at="G"><code>xml</code> is associated with
<code>http://www.w3.org/XML/1998/namespace</code></p></item>
</ulist>

<eg diff="del" at="G">&lt;x:bar xmlns:x="http://example.org/x"
       xmlns:y="http://example.org/y"&gt;...</eg>

<p>If namespace undeclaration is in effect, it will be serialized this way:</p>

<eg>&lt;x:foo xmlns:x="http://example.org/x"
       xmlns:y="http://example.org/y"
       xmlns:z="http://example.org/z"&gt;
      &lt;x:bar xmlns:z=""&gt;...&lt;/x:bar&gt;
&lt;/x:foo&gt;</eg>
</example>

<p>In

<phrase diff="del" at="G">XML 1.0,</phrase>
<phrase diff="add" at="G">Namespaces in XML (<bibref ref="XMLNAMES"/>),</phrase>

namespace undeclaration is not possible.
<phrase diff="add" at="C">If the output method is <code>xml</code>,
<phrase diff="add" at="F">the value of the <code>undeclare-namespaces</code>
parameter is <code>yes</code>,</phrase>
and the value of the <code>version</code> parameter is 1.0,
<!-- Conflicts in parameters now serialization error qt-2004Feb0976-01 -->
<phrase diff="add" at="E">a <termref def="serial-err">serialization error</termref> results; the

<!-- Changed term in response to qt-2004Feb0935-01 -->
<phrase diff="del" at="G">processor</phrase>
<phrase diff="add" at="G"><termref def="serializer">serializer</termref>
</phrase>

MUST signal the error.</phrase>
<phrase diff="del" at="E">namespace
undeclaration is not performed, and the <code>undeclare-namespace</code>
parameter is ignored.</phrase></phrase>
</p>

</div3>
<div3>
<head>XML Output Method: the <code>normalization-form</code> Parameter</head>

<p>The
<!-- Changed parameter under qt-2004Jan0019-04 and qt-2004Feb0362-10 -->
<code diff="del" at="E">normalize-unicode</code>
<code diff="add" at="E">normalization-form</code>
parameter is applicable for the
<code>xml</code> output method.
<phrase diff="add" at="E">The values <code>NFC</code>
and <code>none</code> MUST be supported by
the

<!-- Changed term in response to qt-2004Feb0935-01 -->
<phrase diff="del" at="G">processor</phrase>
<phrase diff="add" at="G"><termref def="serializer">serializer</termref>.
</phrase>

A <termref def="serial-err">serialization error</termref> results if the value of the
<code>normalization-form</code> parameter specifies a normalization form
that is not supported by the

<!-- Changed term in response to qt-2004Feb0935-01 -->
<phrase diff="del" at="G">processor</phrase>
<phrase diff="add" at="G"><termref def="serializer">serializer</termref></phrase>;

the

<!-- Changed term in response to qt-2004Feb0935-01 -->
<phrase diff="del" at="G">processor</phrase>
<phrase diff="add" at="G"><termref def="serializer">serializer</termref>
</phrase>

MUST signal the error.</phrase></p>

<p diff="add" at="E">It is a <termref def="serial-err">serialization error</termref> if the value of the
parameter is <code>fully-normalized</code> and any relevant construct
of the result begins with a combining character.  The

<!-- Changed term in response to qt-2004Feb0935-01 -->
<phrase diff="del" at="G">processor</phrase>
<phrase diff="add" at="G"><termref def="serializer">serializer</termref>
</phrase>

MUST signal the error.  See Section 2.13 of <bibref ref="XML11"/> for the
definition of the relevant constructs of XML.</p>
</div3>
<div3>
<head>XML Output Method: Other Parameters</head>

<p>The <code>media-type</code> parameter is applicable for the
<code>xml</code> output method.
<!-- Added in response to qt-2004Feb0362-22 -->
<phrase diff="add" at="G">See <specref ref="serparam"/> for more
information.</phrase></p>

<p>The <code>use-character-maps</code> parameter is applicable for the
<code>xml</code> output method.
<!-- Added in response to qt-2004Feb0362-22 -->
<phrase at="G" diff="add">See <specref ref="character-maps"/> for
more information.</phrase></p>

<!-- Added parameter in response to qt-2004Feb0362-04 and qt-2004Feb0362-05 -->
<p diff="add" at="G">The <code>byte-order-mark</code> parameter is
applicable for the <code>xml</code> output method.  See
<specref ref="serparam"/> for more information.</p>
</div3>
</div2>
</div1>

<div1 id="xhtml-output">
<head>XHTML Output Method</head>

<p>The <code>xhtml</code> output method serializes the instance of the
data model as
XML, using the HTML compatibility guidelines defined in the XHTML
specification.</p>

<p>It is entirely the responsibility of the
<phrase diff="chg" at="C">person or process that creates the instance of
the data model</phrase>
to ensure that the instance of the data model
conforms to the <bibref ref="XHTML10"/> or
<bibref ref="XHTML11"/> specification. It is not an error if the
instance of the data model is invalid XHTML. Equally, it is entirely under the
control of the
<phrase diff="chg" at="C">person or process that creates the instance
of the data model</phrase>
whether the output conforms to XHTML
Strict, XHTML Transitional, XHTML Frameset, or XHTML Basic.</p>

<p>The serialization of the instance of the data model follows the same rules as for
the <code>xml</code> output method, with the exceptions noted below.
These differences are based on the HTML compatibility guidelines
published in Appendix C of <bibref ref="XHTML10"/>, which are designed
to ensure that as far as possible, XHTML is rendered correctly on user
agents designed originally to handle HTML.</p>

<ulist>
<item>
<p>Given an empty instance of an <phrase>XHTML</phrase> element whose
content model is not EMPTY (for example, an empty title or paragraph)
the <termref def="serializer">serializer</termref> <phrase diff="del" at="B">SHOULD</phrase>
<phrase diff="add" at="B">MUST</phrase> NOT use the minimized form.
That is, it <phrase diff="del" at="B">SHOULD</phrase>
<phrase diff="add" at="B">MUST</phrase>
output <code>&lt;p&gt;&lt;/p&gt;</code> and not
<code>&lt;p&#xa0;/&gt;</code>. </p>
</item>

<item>
<p>Given an XHTML element whose content model is EMPTY, the <termref def="serializer">serializer</termref>
<phrase diff="del" at="B">SHOULD</phrase>
<phrase diff="add" at="B">MUST</phrase> use the minimized tag syntax,
for example
<code>&lt;br&#xa0;/&gt;</code>, as the alternative syntax
<code>&lt;br&gt;&lt;/br&gt;</code> allowed by XML gives uncertain
results in many existing user agents. The <termref def="serializer">serializer</termref>
<phrase diff="del" at="B">SHOULD</phrase>
<phrase diff="add" at="B">MUST</phrase> include a
space before the trailing <code>/&gt;</code>, e.g.
<code>&lt;br&#xa0;/&gt;</code>, <code>&lt;hr&#xa0;/&gt;</code> and
<code>&lt;img&#xa0;src="karen.jpg"&#xa0; alt="Karen"&#xa0;/&gt;</code>.</p>
</item>

<!-- Removed in response to qt-2004Feb0980-01 -->
<item diff="del" at="E">
<p>The <termref def="serializer">serializer</termref> SHOULD avoid outputting line breaks and multiple
 whitespace characters within attribute values. These are handled
 inconsistently by user agents.</p>
</item>

<item>
<p>The <termref def="serializer">serializer</termref> <phrase diff="del" at="B">SHOULD avoid use of</phrase>
<phrase diff="add" at="B">MUST NOT use</phrase> the entity reference
<code>&amp;apos;</code> which, although legal in XML and therefore in
XHTML, is not defined in HTML and is not recognized by all HTML user
agents.</p>
</item>

<item><p>The <termref def="serializer">serializer</termref> SHOULD output namespace declarations
in a way that is consistent with the requirements of the XHTML DTD if this is
possible. The DTD requires the declaration
<code>xmlns="http://www.w3.org/1999/xhtml"</code>
to appear on the <code>html</code> element, and only on the <code>html</code> element.
The <termref def="serializer">serializer</termref> MUST output namespace declarations that are consistent with
the namespace nodes present in the result tree, but it SHOULD avoid outputting
redundant namespace declarations on elements where the DTD would make them invalid.</p>

<note><p diff="add" at="C">Where the process used to construct
the input instance of the data model does not provide complete control over the prefix
used for an element name in the instance of the data model or control of whether the element is
in the default namespace (for instance, the XSLT namespace fixup process),
implementors are encouraged to provide means or endeavor to preserve the
obvious intent of a user to place the <code>html</code> element in
<!-- Deleted extraneous "in" in response to qt-2004Feb0978-01 -->
<phrase diff="del" at="E">in</phrase>
the default namespace, wherever possible.  For example, implementors
of XSLT processors are encouraged to place the <code>html</code>
element that results from a literal result element like the following in
the default namespace:</p>
<eg diff="add" at="C">&lt;html xmlns="http://www.w3.org/1999/xhtml"&gt; ... &lt;/html&gt;</eg>
<p diff="del" at="C">Although the specification of the namespace
fixup process provides no guarantees about the namespace prefixes that
are allocated,
implementors are encouraged to ensure that where possible,
writing the literal result element
<code>&lt;html xmlns="http://www.w3.org/1999/xhtml"&gt; ... &lt;/html&gt;</code>
places the resulting <code>html</code> element in the default namespace.</p></note>
</item>

<item>
<p>If <phrase>the instance of the data model includes a <code>head</code> element in
the XHTML namespace</phrase>,
<!-- Used positive wording in response to qt-2004Feb0270-01 -->
<phrase diff="del" at="E">then unless</phrase>
<phrase diff="add" at="E">and</phrase>
the <code>include-content-type</code> parameter has the value
<phrase diff="del" at="E"><code>"no"</code></phrase>
<phrase diff="add" at="E"><code>yes</code></phrase>,
the <code>xhtml</code> output method
<phrase diff="chg" at="C">MUST</phrase>
add a <code>meta</code> element immediately after the start-tag of the
<code>head</code> element specifying the character encoding actually
used.</p>

<example>
<p>For example,</p>
<eg><![CDATA[<head>
<meta http-equiv="Content-Type" content="text/html; charset=EUC-JP"/>
...]]></eg>
</example>

<p>The content type SHOULD be set to the value given for the
<code>media-type</code> parameter; the default value for XHTML is
<code>text/html</code>. The value <code>application/xhtml+xml</code>,
registered in <bibref ref="RFC3236"/>, MAY also be used.</p>

<p diff="add" at="C">If the instance of the data model includes a <code>head</code>
element that has a <code>meta</code> element child, the

<!-- Changed term in response to qt-2004Feb0935-01 -->
<phrase diff="del" at="G">processor</phrase>
<phrase diff="add" at="G"><termref def="serializer">serializer</termref>
</phrase>

SHOULD replace any <code>content</code> attribute of the <code>meta</code>
element, or add such an attribute, with the value as described above,
rather than output a new <code>meta</code> element. </p>
</item>

<item>
<p>
<!-- Used positive wording in response to qt-2004Feb0270-01 -->
<phrase diff="del" at="E">Unless</phrase>
<phrase diff="add" at="E">If</phrase>
the <code>escape-uri-attributes</code> parameter
has the value
<phrase diff="del" at="E"><code>no</code></phrase>
<phrase diff="add" at="E"><code>yes</code></phrase>,
the <code>xhtml</code> output
method <phrase diff="del" at="B">SHOULD</phrase>
<phrase diff="add" at="B">MUST</phrase> escape non-ASCII characters in URI
attribute values using the method

<!-- Changed reference in response to qt-2004Feb0362-07 -->
<phrase diff="add" at="G">defined by Section 5.4 of
<bibref ref="XLINK"/>, except that relative URIs MUST NOT be absolutized.
</phrase>
<phrase diff="del" at="G">
RECOMMENDED in Section 2.4.1 of <bibref ref="RFC2396"/>.
</phrase>
</p>
</item>
</ulist>

<note>
<p>This escaping is deliberately confined to non-ASCII characters,
because escaping of ASCII characters is not always appropriate, for
example when URIs or URI fragments are interpreted locally by the HTML
user agent. Even in the case of non-ASCII characters, escaping can
sometimes cause problems. More precise control of URI escaping is
therefore available by setting <code>escape-uri-attributes</code> to
<code>no</code>, and controlling the escaping of URIs by means of the
<function>fn:escape-uri</function> function defined in <bibref ref="FANDO"/>.</p>
</note>

<note><p>As with the XML output method, the XHTML
output method outputs an XML declaration unless it is suppressed using
the <code>omit-xml-declaration</code> parameter. Appendix C.1 of 
<bibref diff="chg" at="C" ref="XHTML10"/>
provides advice on the consequences of including,
or omitting, the XML declaration.</p></note>

<!-- Added note in response to qt-2004Feb0980-01 -->
<note><p diff="add" at="E">Appendix C of <bibref ref="XHTML10"/> describes
a number of compatibility guidelines for users of XHTML who wish to
render their XHTML documents with HTML user agents.  In some cases, such
as the guideline on the form empty elements SHOULD take, only the
serialization process itself has the ability to follow the guideline.  In
such cases, those guidelines are reflected in the requirements on the

<!-- Changed term in response to qt-2004Feb0935-01 -->
<phrase diff="del" at="G">processor</phrase>
<phrase diff="add" at="G"><termref def="serializer">serializer</termref>
</phrase>

described above.</p>
<p diff="add" at="E">In all other cases, the guidelines can be
adhered to by the instance of the data model that is input to the serialization
process.  The guideline on the use of whitespace characters in attribute
values is one such example.  It is the responsibility of the person or
process that creates the instance of the data model that is input to the
serialization process to ensure it is created in a way that is consistent
with the guidelines.  No <termref def="serial-err">serialization error</termref> results if the input instance
of the data model does not adhere to the guidelines.</p></note>
</div1>

<div1 id="html-output">
<head>HTML Output Method</head>

<p>The <code>html</code> output method outputs the instance of the data model as
HTML.</p>

<example>
<p>For example,</p>
<eg><![CDATA[<xsl:stylesheet version="2.0"
                xmlns:xsl="http://www.w3.org/1999/XSL/Transform">

<xsl:output method="html"/>

<xsl:template match="/">
<html>
<xsl:apply-templates/>
</html>
</xsl:template>

...

</xsl:stylesheet>]]></eg>
</example>

<p>The <code>version</code> parameter indicates the version of the
HTML
<!-- Added in response to qt-2004Feb0976-01 and qt-2004Feb0977-01 -->
<phrase diff="add" at="E">Recommendation <bibref ref="HTML"/> to which
the serialized result is to conform</phrase>.
<!-- Deleted in response to qt-2004Feb0976-01 and qt-2004Feb0977-01 -->
<phrase diff="del" at="E">
The default value is <code>4.0</code>, which specifies that the
result <phrase diff="del" at="B">SHOULD</phrase>
<phrase diff="del" at="C"><phrase diff="add" at="B">MUST</phrase></phrase>
<phrase diff="add" at="C">SHOULD</phrase>
be output as HTML conforming to the HTML 4.0
Recommendation <bibref ref="HTML"/>.</phrase>
<!-- Changed in response to qt-2004Feb0061-01.  It's now a serialization
     error for the version paramater to specify a value that is not
     supported by the processor.
  -->
<phrase diff="add" at="F">If the

<!-- Changed term in response to qt-2004Feb0935-01 -->
<phrase diff="del" at="G">processor</phrase>
<phrase diff="add" at="G"><termref def="serializer">serializer</termref>
</phrase>

does not support the version of
HTML specified by the <code>version</code> parameter, it MUST signal a
<termref def="serial-err">serialization error</termref>.</phrase>
</p>

<!-- Added new subdivision in response to qt-2004Feb1200-01 -->
<div2>
<head>The Influence of Serialization Parameters upon the HTML Output Method</head>

<div3>
<head>HTML Output Method: Markup for Elements</head>

<p>The <code>html</code> output method
<phrase diff="del" at="B">SHOULD</phrase>
<phrase diff="add" at="B">MUST</phrase> NOT output an element
differently from the <code>xml</code> output method unless the
expanded-QName of the element has a null namespace URI; an element
whose expanded-QName has a non-null namespace URI
<phrase diff="del" at="B">SHOULD</phrase>
<phrase diff="add" at="B">MUST</phrase> be output as
XML. If the expanded-QName of the element has a null namespace URI,
but the local part of the expanded-QName is not recognized as the name
of an HTML element, the element <phrase diff="del" at="B">SHOULD</phrase>
<phrase diff="add" at="B">MUST</phrase> be output in the same way as a
non-empty, inline element such as <code>span</code>. In particular:</p>

<olist>
<item><p>If the result tree contains namespace nodes for namespaces other than the
XML namespace, the HTML output method <phrase diff="del" at="B">will</phrase>
<phrase diff="add" at="B">MUST</phrase> represent these namespaces using
attributes named <code>xmlns</code> or <code>xmlns:</code><emph>prefix</emph>
in the same way as the XML output method would represent them when the
version parameter is set to 1.0.</p>
</item>

<item><p>If the result tree contains elements or attributes whose names have a
non-null namespace URI, the HTML output method
<phrase diff="del" at="B">will</phrase>
<phrase diff="add" at="B">MUST</phrase> generate
namespace-prefixed QNames for these nodes in the same way as the XML output
method would do when the version parameter is set to 1.0.</p></item>

<item><p>Where special rules are defined later in this section for
serializing specific HTML elements and attributes, these rules
<phrase diff="del" at="B">are never</phrase>
<phrase diff="add" at="B">MUST NOT be</phrase> 
applied to an element or attribute whose name has a non-null
namespace URI. However, the generic rules for the HTML output method
that apply to all elements and attributes, for example the rules for
escaping special characters in the text and the rules for indentation,
MUST be used also for namespaced elements and attributes.</p>
</item>

<item><p>When serializing an element whose name is not defined in the
HTML specification, but that is in the null namespace, the HTML output
method
<phrase diff="del" at="B">SHOULD</phrase>
<phrase diff="add" at="B">MUST</phrase>
apply the same rules (for example, indentation rules) as
when serializing a <code>span</code> element. The descendants of such
an element <phrase diff="del" at="B">SHOULD</phrase>
<phrase diff="add" at="B">MUST</phrase> be serialized as if they were descendants of a
<code>span</code> element.</p>
</item>

<item><p>When serializing an element whose name is in a non-null
namespace, the HTML output method <phrase diff="del" at="B">SHOULD</phrase>
<phrase diff="add" at="B">MUST</phrase> apply the same rules (for
example, indentation rules) as when serializing a <code>div</code>
element. The descendants of such an element
<phrase diff="del" at="B">SHOULD</phrase>
<phrase diff="add" at="B">MUST</phrase> be serialized as if
they were descendants of a <code>div</code> element.</p>
</item>
</olist>

<p>The <code>html</code> output method <phrase diff="del" at="B">SHOULD</phrase>
<phrase diff="add" at="B">MUST</phrase> NOT output an end-tag
for empty elements. For HTML 4.0, the empty elements are
<code>area</code>, <code>base</code>, <code>basefont</code>,
<code>br</code>, <code>col</code>, <code>frame</code>,
<code>hr</code>, <code>img</code>, <code>input</code>,
<code>isindex</code>, <code>link</code>, <code>meta</code> and
<code>param</code>. For example, an element written as
<code>&lt;br/&gt;</code> or <code>&lt;br&gt;&lt;/br&gt;</code> in an
XSLT stylesheet <phrase diff="del" at="B">SHOULD</phrase>
<phrase diff="add" at="B">MUST</phrase> be output as <code>&lt;br&gt;</code>.</p>

<p>The <code>html</code> output method <phrase diff="del" at="B">SHOULD</phrase>
<phrase diff="add" at="B">MUST</phrase> recognize the names of
HTML elements regardless of case. For example, elements named
<code>br</code>, <code>BR</code> or <code>Br</code> <phrase diff="del" at="B">SHOULD</phrase>
<phrase diff="add" at="B">MUST</phrase> all be
recognized as the HTML <code>br</code> element and output without an
end-tag.</p>

<p>The <code>html</code> output method <phrase diff="del" at="B">SHOULD</phrase>
<phrase diff="add" at="B">MUST</phrase> NOT perform escaping for
the content of the <code>script</code> and <code>style</code>
elements.</p>

<example>
<p>For example, <phrase diff="add" at="C">a <code>script</code> element
created by an XQuery direct element constructor or an XSLT</phrase>
literal result element, such as:</p>
<eg><![CDATA[<script>if (a &lt; b) foo()</script>]]></eg>
<p>or</p>
<eg><![CDATA[<script><![CDATA[if (a < b) foo()]]]]><![CDATA[></script>]]></eg>
<p><phrase diff="del" at="B">SHOULD</phrase>
<phrase diff="add" at="B">MUST</phrase> be output as</p>
<eg><![CDATA[<script>if (a < b) foo()</script>]]></eg>
</example>

<example>
<p>A common requirement is to output a <code>script</code> element
as shown in the example below:</p>

<eg><![CDATA[<script type="text/javascript">
      document.write ("<em>This won't work</em>")
</script>]]></eg>

<p>This is illegal HTML, for the reasons explained in section B.3.2 of
the HTML 4.01 specification. Nevertheless, it is possible to output
this fragment, using either of the following constructs:</p>

<p>Firstly, by use of <phrase diff="add" at="C">a <code>script</code> element
created by an XQuery direct element constructor or an
XSLT</phrase> literal result element:</p>

<eg><![CDATA[<script type="text/javascript">
      document.write ("<em>This won't work</em>")
</script>]]></eg>

<p>Secondly, by constructing the markup from ordinary text characters:</p>

<eg><![CDATA[<script type="text/javascript">
      document.write ("&lt;em&gt;This won't work&lt;/em&gt;")
</script>]]></eg>

<p>As the HTML specification points out, the correct way to write this
is to use the escape conventions for the specific scripting language.
For JavaScript, it can be written as:</p>

<eg><![CDATA[<script type="text/javascript">
      document.write ("&lt;em&gt;This will work&lt;\/em&gt;")
</script>]]></eg>

<p>The HTML 4.01 specification also shows examples of how to write
this in various other scripting languages. The escaping MUST be done
manually, it will not be done by the <termref def="serializer">serializer</termref>.</p>
</example>

</div3>
<div3>
<head>HTML Output Method: Writing Attributes</head>

<p>The <code>html</code> output method
<phrase diff="chg" at="C">MUST</phrase> NOT escape
"<code>&lt;</code>" characters occurring in attribute values.</p>

<p>If the <code>indent</code> parameter has the value
<code>yes</code>, then the <code>html</code> output method MAY add or
remove whitespace as it outputs the instance of the data model, so long as it does
not change how an HTML user agent would render the output.</p>

<!-- Used positive wording in response to qt-2004Feb0270-01 -->
<p><phrase diff="del" at="E">Unless</phrase>
<phrase diff="add" at="E">If</phrase>
the <code>escape-uri-attributes</code> parameter
<!-- Deleted in response to qt-2004Feb0976-01 -->
<phrase diff="del" at="E">is specified and</phrase>
has the value
<phrase diff="del" at="E"><code>no</code></phrase>
<phrase diff="add" at="E"><code>yes</code></phrase>,
the <code>html</code> output method
<phrase diff="del" at="B">SHOULD</phrase>
<phrase diff="add" at="B">MUST</phrase>

escape non-ASCII characters in URI attribute values using the
method

<!-- Changed reference in response to qt-2004Feb0362-07 -->
<phrase diff="add" at="G">defined by Section 5.4 of
<bibref ref="XLINK"/>, except that relative URIs MUST NOT be absolutized.
</phrase>
<phrase diff="del" at="G">
 RECOMMENDED in <bibref ref="RFC2396"/> (section 2.4.1).
</phrase>
</p>

<note>
<p>This escaping is deliberately confined to non-ASCII characters,
because escaping of ASCII characters is not always appropriate, for
example when URIs or URI fragments are interpreted locally by the HTML
user agent. Even in the case of non-ASCII characters, escaping can
sometimes cause problems. More precise control of URI escaping is
therefore available by setting <code>escape-uri-attributes</code> to
<code>no</code>, and controlling the escaping of URIs by means of the
<function>fn:escape-uri</function> function defined in <bibref
ref="FANDO"/>.</p>
</note>

<p>The <code>html</code> output method
<phrase diff="chg" at="C">MUST</phrase> output boolean
attributes (that is attributes with only a single allowed value that
is equal to the name of the attribute) in minimized form.</p>

<example>
<p>For example, a start-tag <phrase diff="chg" at="C">created
using the following XQuery direct element constructor or XSLT
literal result element</phrase></p>
<eg>&lt;OPTION selected="selected"&gt;</eg>
<p><phrase diff="chg" at="C">MUST</phrase> be output as</p>
<eg>&lt;OPTION selected&gt;</eg>
</example>

<p>The <code>html</code> output method <phrase diff="del" at="B">SHOULD</phrase>
<phrase diff="add" at="B">MUST</phrase> NOT escape a
<code>&amp;</code> character occurring in an attribute value
immediately followed by a <code>{</code> character (see <loc
href="http://www.w3.org/TR/REC-html40/appendix/notes.html#h-B.7.1.1">Section
B.7.1</loc> of the HTML 4.0 Recommendation).</p>

<example>
<p>For example, a start-tag <phrase diff="chg" at="C">created
using the following XQuery direct element constructor or XSLT
literal result element</phrase></p>
<eg>&lt;BODY bgcolor='&amp;{{randomrbg}};'&gt;</eg>
<p><phrase diff="del" at="B">SHOULD</phrase>
<phrase diff="add" at="B">MUST</phrase> be output as</p>
<eg>&lt;BODY bgcolor='&amp;{randomrbg};'&gt;</eg>
</example>

</div3>
<div3><head>HTML Output Method: Indentation</head>

<p>If the <code>indent</code> attribute has the value
<code>yes</code>, then the <code>html</code> output method MAY add or
remove whitespace as it outputs the result tree, so long as it does
not change the way that a conforming HTML user agent would render the output.
<!-- Deleted in response to qt-2004Feb0976-01 and qt-2004Feb0977-01 -->
<phrase diff="del" at="E">The
default value is <code>yes</code>.</phrase></p>

<note><p>This rule can be satisfied by observing the
following constraints:</p>

<p>Whitespace MUST only be added before or after an element,
or adjacent to an existing whitespace character.</p>
<p>Whitespace MUST NOT be added or removed adjacent to an inline element.
The inline elements are those included in the <code>%inline</code>
category <phrase diff="chg" at="C">of any of the HTML 4.01
DTD's,</phrase>
<phrase diff="add" at="C">as well as the <code>INS</code> and
<code>DEL</code> elements if they are used as inline elements
(i.e., if they do not contain element children).</phrase></p>
<p>Whitespace MUST NOT be added or removed inside a formatted element,
the formatted elements being <code>pre</code>, <code>script</code>,
<code>style</code>, and <code>textarea</code>.</p>

<p>Note that the HTML definition of whitespace is different from the XML definition:
see section 9.1 of the HTML 4.01 specification.</p>
</note>

</div3>
<div3><head>HTML Output Method: Writing Character Data</head>

<p>The <code>html</code> output method MAY output a character using a
character entity reference in preference to using a numeric character
reference, if an entity is defined for the character in the version of
HTML that the output method is using. Entity references and character
references SHOULD be used only where the character is not present in
the selected encoding, or where the visual representation of the
character is unclear (as with <code>&amp;nbsp;</code>, for
example).</p>

<p>When outputting a sequence of whitespace characters in the
instance of the data model, within an element where whitespace is treated normally
<phrase>(but not in elements such as <code>pre</code> and
<code>textarea</code>)</phrase>, the <code>html</code> output method
<phrase diff="del" at="B">is free to</phrase>
<phrase diff="add" at="B">MAY</phrase>
represent it using any sequence of whitespace that will be treated
<!-- Changed in response to qt-2004Feb0362-16 -->
<phrase diff="del" at="E">as whitespace</phrase>
<phrase diff="add" at="E">in the same way</phrase>
by an HTML user agent.
See section 3.5 of <bibref ref="xhtml-mod"/> for some additional information
on handling of whitespace by an HTML user agent.
</p>

<p>Certain characters, specifically the control characters #x7F-#x9F,
are legal in XML but not in HTML. It is a
<termref def="serial-err">serialization error</termref> to use the HTML
output method when such characters appear in the instance of the data model. The

<!-- Changed term in response to qt-2004Feb0935-01 -->
<phrase diff="del" at="G">processor</phrase>
<phrase diff="add" at="G"><termref def="serializer">serializer</termref>
</phrase>

MAY signal the error, but is not REQUIRED to do so. If it
does not signal the error, it MAY copy the offending characters into
the serialized output, creating invalid HTML.</p>

<p>The <code>html</code> output method <phrase diff="del" at="B">SHOULD</phrase>
<phrase diff="add" at="B">MUST</phrase> terminate processing
instructions with <code>&gt;</code> rather than
<code>?&gt;</code>.</p>

</div3>
<div3>
<head>HTML Output Method: Encoding</head>

<p>The <code>encoding</code> parameter specifies the

<!-- In response to qt-2004Feb0362-03:  require UTF-8, UTF-16.  Unsupported
     encoding results in serialization error
  -->
<phrase diff="del" at="F">preferred</phrase>
encoding to be used.
<phrase diff="add" at="F">

<!-- Changed term in response to qt-2004Feb0935-01 -->
<phrase diff="del" at="G">Processors</phrase>
<phrase diff="add" at="G"><termref def="serializer">Serializers</termref>
</phrase>

are
REQUIRED to support values of <code>UTF-8</code> and
<code>UTF-16</code>. A <termref def="serial-err">serialization error</termref> occurs if an output
encoding other than <code>UTF-8</code> or <code>UTF-16</code> is
requested and the <termref def="serializer">serializer</termref>
does not support that encoding. The

<!-- Changed term in response to qt-2004Feb0935-01 -->
<phrase diff="del" at="G">processor</phrase>
<phrase diff="add" at="G"><termref def="serializer">serializer</termref>
</phrase>

MUST signal the error.
</phrase>
</p>

<p>If there is a <code>HEAD</code> element,
<!-- Used positive wording in response to qt-2004Feb0270-01 -->
<phrase diff="del" at="E">then unless</phrase>
<phrase diff="add" at="E">and</phrase>
the <code>include-content-type</code> parameter
<!-- Removed in response to qt-2004Feb0976-01 -->
<phrase diff="del" at="C">is specified and</phrase>
has the value
<phrase diff="del" at="E"><code>no</code></phrase>
<phrase diff="add" at="E"><code>yes</code></phrase>,
the <code>html</code> output method
<phrase diff="chg" at="C">MUST</phrase> add a <code>META</code> element
immediately after the start-tag
of the <code>HEAD</code> element specifying the character encoding
actually used.</p>

<example>
<p>For example,</p>
<eg>&lt;HEAD>
&lt;META http-equiv="Content-Type" content="text/html; charset=EUC-JP">
...</eg>
</example>

<p>The content type <phrase diff="chg" at="C">MUST</phrase>
be set to the value given for the
<!-- Deleted in response to qt-2004Feb0976-01 and qt-2004Feb0977-01 -->
<code>media-type</code> parameter<phrase diff="del" at="E">; the default value is
<code>text/html</code></phrase>.</p>

<p diff="add" at="C">If the instance of the data model includes a <code>head</code>
element that has a <code>meta</code> element child, the

<!-- Changed term in response to qt-2004Feb0935-01 -->
<phrase diff="del" at="G">processor</phrase>
<phrase diff="add" at="G"><termref def="serializer">serializer</termref>
</phrase>

SHOULD replace any <code>content</code> attribute of the <code>meta</code>
element, or add such an attribute, with the value as described above,
rather than output a new <code>meta</code> element. </p>

<p>It is possible that the instance of the data model will contain a character that
cannot be represented in the encoding that the

<!-- Changed term in response to qt-2004Feb0935-01 -->
<phrase diff="del" at="G">processor</phrase>
<phrase diff="add" at="G"><termref def="serializer">serializer</termref>
</phrase>

is using for
output. In this case, if the character occurs in a context where HTML
recognizes character references, then the character <phrase diff="del" at="B">SHOULD</phrase>
<phrase diff="add" at="B">MUST</phrase> be output
as a character entity reference or decimal numeric character
reference; otherwise (for example, in a <code>script</code> or
<code>style</code> element or in a comment), the

<!-- Changed term in response to qt-2004Feb0935-01 -->
<phrase diff="del" at="G">processor</phrase>
<phrase diff="add" at="G"><termref def="serializer">serializer</termref>
</phrase>

<phrase diff="del" at="B">SHOULD</phrase>
<phrase diff="add" at="B">MUST</phrase>
signal a <termref def="serial-err">serialization error</termref>.
</p>
</div3>

<div3>
<head>HTML Output Method: Document Type Declaration</head>

<p>If the <code>doctype-public</code> or <code>doctype-system</code>
parameters are specified, then the <code>html</code> output method
<phrase diff="del" at="B">SHOULD</phrase>
<phrase diff="add" at="B">MUST</phrase>
output a document type declaration immediately before the first
element. The name following <code>&lt;!DOCTYPE</code>
<phrase diff="del" at="B">SHOULD</phrase>
<phrase diff="add" at="B">MUST</phrase> be
<code>HTML</code> or <code>html</code>. If the
<code>doctype-public</code> parameter is specified, then the output
method <phrase diff="del" at="B">SHOULD</phrase>
<phrase diff="add" at="B">MUST</phrase> output <code>PUBLIC</code>
followed by the specified
public identifier; if the <code>doctype-system</code> parameter is
also specified, it <phrase diff="del" at="B">SHOULD</phrase>
<phrase diff="add" at="B">MUST</phrase> also output the specified
system identifier
following the public identifier. If the <code>doctype-system</code>
parameter is specified but the <code>doctype-public</code> parameter
is not specified, then the output method
<phrase diff="del" at="B">SHOULD</phrase>
<phrase diff="add" at="B">MUST</phrase> output
<code>SYSTEM</code> followed by the specified system identifier.</p>

</div3>

<!-- Promoted to its own section under qt-2004Jan0019-04 and qt-2004Feb0362-10 -->
<div3>
<head>HTML Output Method: Unicode Normalization</head>

<p>The
<!-- Changed parameter under qt-2004Jan0019-04 and qt-2004Feb0362-10 -->
<code diff="del" at="E">normalize-unicode</code>
<code diff="add" at="E">normalization-form</code>
parameter is applicable for the
<code>html</code> output method.
<phrase diff="add" at="E">The values <code>NFC</code> and
<code>none</code> MUST be supported by the

<!-- Changed term in response to qt-2004Feb0935-01 -->
<phrase diff="del" at="G">processor</phrase>
<phrase diff="add" at="G"><termref def="serializer">serializer</termref>.
</phrase>
A <termref def="serial-err">serialization error</termref> results if the value of the <code>normalization-form</code>
parameter specifies a normalization form that is not supported by the

<!-- Changed term in response to qt-2004Feb0935-01 -->
<phrase diff="del" at="G">processor</phrase>
<phrase diff="add" at="G"><termref def="serializer">serializer</termref>;
</phrase>

the

<!-- Changed term in response to qt-2004Feb0935-01 -->
<phrase diff="del" at="G">processor</phrase>
<phrase diff="add" at="G"><termref def="serializer">serializer</termref>
</phrase>

MUST signal the error.</phrase></p>

</div3>

<div3>
<head>HTML Output Method: Other Parameters</head>

<p>The <code>media-type</code> parameter is applicable for the
<code>html</code> output method.
<!-- Added in response to qt-2004Feb0362-22 -->
<phrase diff="add" at="G">See <specref ref="serparam"/> for more
information.</phrase></p>

<p diff="del" at="G">The <code>use-character-maps</code> parameter is applicable for the
<code>xml</code> output method.</p>

<p>The <code>use-character-maps</code> parameter is applicable for the
<code>html</code> output method.
<!-- Added in response to qt-2004Feb0362-22 -->
<phrase diff="add" at="G">See <specref ref="character-maps"/> for more
information.</phrase></p>

<!-- Added parameter in response to qt-2004Feb0362-04 and qt-2004Feb0362-05 -->
<p diff="add" at="G">The <code>byte-order-mark</code> parameter is
applicable for the <code>html</code> output method.  See
<specref ref="serparam"/> for more information.</p>
</div3>
</div2>
</div1>

<div1 id="text-output">
<head>Text Output Method</head>

<p>The <code>text</code> output method outputs the instance of the data model by
outputting the string-value of every text node in the instance of the data model in
document order without any escaping.</p>

<p>A newline character in the instance of the data model MAY be output using any
character sequence that is conventionally used to represent a line
ending in the chosen system environment.</p>

<p>The <code>media-type</code> parameter is applicable for the
<code>text</code> output method.
<!-- Added in response to qt-2004Feb0362-22 -->
<phrase diff="add" at="G">See <specref ref="serparam"/> for more
information.</phrase></p>

<p>The <code>encoding</code> parameter identifies the encoding that
the <code>text</code> output method
<phrase diff="chg" at="C">MUST</phrase> use to convert sequences of
characters to sequences of bytes.

<!-- In response to qt-2004Feb0362-03, require support for UTF-8, UTF-16 -->
<phrase diff="add" at="F">

<!-- Changed term in response to qt-2004Feb0935-01 -->
<phrase diff="del" at="G">Processors</phrase>
<phrase diff="add" at="G"><termref def="serializer">Serializers</termref>
</phrase>

are
REQUIRED to support values of <code>UTF-8</code> and
<code>UTF-16</code>.
</phrase>

<!-- Deleted in response to qt-2004Feb0976-01 and qt-2004Feb0977-01 -->
<phrase diff="del" at="E">The default is <termref def="impdef">implementation-defined</termref>.</phrase>
<phrase diff="add" at="C">A <termref def="serial-err">serialization error</termref>
occurs if the <termref def="serializer">serializer</termref>
does not support the encoding specified
by the <code>encoding</code> parameter.</phrase>
<!-- In response to qt-2004Feb0362-03, require support for UTF-8, UTF-16,
     and error if encoding is not supported.
  -->
<phrase diff="add" at="F">The

<!-- Changed term in response to qt-2004Feb0935-01 -->
<phrase diff="del" at="G">processor</phrase>
<phrase diff="add" at="G"><termref def="serializer">serializer</termref>
</phrase>

MUST signal the error.</phrase>
If the instance of the data model contains a
character that cannot be represented in the encoding that the

<!-- Changed term in response to qt-2004Feb0935-01 -->
<phrase diff="del" at="G">processor</phrase>
<phrase diff="add" at="G"><termref def="serializer">serializer</termref>
</phrase>

is using for output, the <termref def="serializer">serializer</termref>
<phrase diff="del" at="B">SHOULD</phrase>
<phrase diff="add" at="B">MUST</phrase>
signal a <termref def="serial-err">serialization error</termref>.</p>

<!-- Deleted in response to qt-2004Feb0976-01 and qt-2004Feb0977-01 -->
<p diff="del" at="E">The default encoding for the <code>text</code> output
method is <termref def="impdef">implementation-defined</termref>.</p>

<p>The
<!-- Changed parameter under qt-2004Jan0019-04 and qt-2004Feb0362-10 -->
<code diff="del" at="E">normalize-unicode</code>
<code diff="add" at="E">normalization-form</code>
parameter is applicable for the
<code>text</code> output method.
<phrase diff="add" at="E">The values <code>NFC</code>
and <code>none</code> MUST be supported by the

<!-- Changed term in response to qt-2004Feb0935-01 -->
<phrase diff="del" at="G">processor</phrase>
<phrase diff="add" at="G"><termref def="serializer">serializer</termref>.
</phrase>

A <termref def="serial-err">serialization error</termref> results if the value of the
<code>normalization-form</code> parameter specifies a normalization form
that is not supported by the

<!-- Changed term in response to qt-2004Feb0935-01 -->
<phrase diff="del" at="G">processor</phrase>
<phrase diff="add" at="G"><termref def="serializer">serializer</termref>;
</phrase>

the

<!-- Changed term in response to qt-2004Feb0935-01 -->
<phrase diff="del" at="G">processor</phrase>
<phrase diff="add" at="G"><termref def="serializer">serializer</termref>
</phrase>

MUST signal the
error.</phrase></p>

<p diff="del" at="G">The <code>use-character-maps</code> parameter is applicable for the
<code>xml</code> output method.</p>

<p>The <code>use-character-maps</code> parameter is applicable for the
<code diff="chg" at="C">text</code> output method.
<!-- Added in response to qt-2004Feb0362-22 -->
<phrase at="G" diff="add">See <specref ref="character-maps"/> for more
information.</phrase></p>

<!-- Added parameter in response to qt-2004Feb0362-04 and qt-2004Feb0362-05 -->
<p diff="add" at="G">The <code>byte-order-mark</code> parameter is
applicable for the <code>text</code> output method.  See
<specref ref="serparam"/> for more information.</p>
</div1>

<div1 id="character-maps">
<head>Character Maps</head>

<p>The <code>use-character-maps</code> parameter is a list of characters
and corresponding string substitutions.</p>

<p>Character maps allow a specific character appearing in a text or
attribute node in the instance of the data model to be replaced with a specified
string of characters during serialization. The string that is
substituted is output "as is," and the <termref def="serializer">serializer</termref> performs no checks
that the resulting document is well-formed. This mechanism can
therefore be used to introduce arbitrary markup in the serialized
output.

<!-- Added cross-reference in response to qt-2004Feb0362-18 -->
<phrase diff="add" at="G">See <xspecref spec="XT" ref="character-maps"/>
of <bibref ref="XSLT2"/> for examples of using character mapping in
XSLT.</phrase></p>

<p>Character mapping is applied to the characters that actually appear
in a text or attribute node in the instance of the data model, before any other
serialization operations such as escaping or Unicode normalization are
applied. If a character is mapped, then it is not subjected to XML or
HTML escaping, nor to Unicode normalization. The string that is
substituted for a character is not validated or processed in any way
by the <termref def="serializer">serializer</termref>, except for translation into the target encoding. In
particular, it is not subjected to XML or HTML escaping, it is not
subjected to Unicode normalization, and it is not subjected to further
character mapping. If the string cannot be represented using the
target encoding, the <termref def="serializer">serializer</termref> takes the same action as it would if
the offending characters appeared directly in the instance of the data model.</p>

<p>Character mapping is not applied to characters in text nodes whose
parent elements are listed in the <code>cdata-section-elements</code>
parameter,

<phrase diff="add" at="G">nor to characters for which output escaping has
been disabled (disabling output escaping is an <bibref ref="XSLT2"/>
feature),</phrase>

nor to characters in attribute
values that are subject to the URI escaping defined for the HTML and
XHTML output methods, unless URI escaping has been disabled using the
<code>escape-uri-attributes</code> parameter in the output
definition.</p>

<p>On serialization, occurrences of a character specified in the
<code>use-character-maps</code> in text nodes and attribute values
are replaced by the corresponding string from the <code>use-character-maps</code>
parameter.</p>

<note><p>Using a character map can result in non-well-formed documents
if the string contains XML-significant
characters. For example, it is possible to create documents containing
unmatched start and end tags, references to entities that are not
declared, or attributes that contain tags or unescaped quotation
marks.</p></note>

<p diff="del" at="G">Character mapping is applied to the characters that actually appear
in a text or attribute node in the instance of the data model, before any other
serialization operations such as escaping or Unicode normalization are
applied.</p>

<p diff="del" at="G">Character mapping is not applied to characters for which output
escaping has been disabled (disabling output escaping is an <bibref
ref="XSLT2"/> feature),
nor to characters in text nodes whose parent
elements are listed in the <code>cdata-section-elements</code> parameter,
nor to characters in attribute values that are
subject to the URI escaping defined for the HTML and XHTML output
methods, unless URI escaping has been disabled using the
<code>escape-uri-attributes</code> parameter.</p>

<p>If a character is mapped, then it is not subjected to XML or HTML escaping.</p>

<p>A <termref def="serial-err">serialization error</termref> occurs if character mapping causes the output
of a string containing a character that cannot be represented in the
encoding that the

<!-- Changed term in response to qt-2004Feb0935-01 -->
<phrase diff="del" at="G">processor</phrase>
<phrase diff="add" at="G"><termref def="serializer">serializer</termref>
</phrase>

is using for output. The

<!-- Changed term in response to qt-2004Feb0935-01 -->
<phrase diff="del" at="G">processor</phrase>
<phrase diff="add" at="G"><termref def="serializer">serializer</termref>
</phrase>

<phrase diff="chg" at="B">MUST</phrase><phrase diff="del" at="B">SHOULD</phrase>
signal the error.</p>

</div1>

<!-- Added conformance section in response to qt-2004Feb0271-01 -->
<div1 id="conformance">
<head>Conformance</head>

<p at="G" diff="add">Serialization is intended primarily as a component
that can be used by other specifications.  Therefore, this document
relies on specifications that use it to specify conformance criteria
for Serialization in their respective environments.
Specifications that set conformance criteria for their use of
Serialization MUST NOT change the semantic definitions of 
Serialization as given in this specification, except by
subsetting and/or compatible extensions.</p>
</div1>
</body>

<back>

<div1>
<head>References</head>
<div2>
<head>Normative References</head>
<blist>

<bibl id="CHARMOD" key="Character Model for the World Wide Web 1.0">
  World Wide Web Consortium,
  <emph>Character Model for the World Wide Web 1.0, Last Call Working Draft.</emph>
  See <loc href="http://www.w3.org/TR/2002/WD-charmod-20020430/">http://www.w3.org/TR/2002/WD-charmod-20020430/</loc>
</bibl>

<bibl id="DataModel" key="Data Model">
  World Wide Web Consortium,
  <emph>XQuery 1.0 and XPath 2.0 Data Model</emph>.
  See <loc href="http://www.w3.org/TR/xpath-datamodel/">http://www.w3.org/TR/xpath-datamodel/</loc>.
</bibl>

<bibl id="FANDO" key="Functions and Operators">
  World Wide Web Consortium,
  <emph>XQuery 1.0 and XPath 2.0 Functions and Operators.</emph>
  W3C Working Draft. See <loc href="http://www.w3.org/TR/xpath-functions/"/>.
</bibl>

<bibl id="HTML" key="HTML">World Wide Web Consortium. <emph>HTML 4.01
specification</emph>. W3C Recommendation.
See <loc href="http://www.w3.org/TR/html4/"/>.</bibl>

<bibl id="IANA" key="IANA">Internet Assigned Numbers
Authority. <emph>Character Sets</emph>.
See <loc href="http://www.iana.org/assignments/character-sets"/>.</bibl>

<bibl id="RFC2046" key="RFC2046">N. Freed, N. Borenstein.  <emph>Multipurpose
Internet Mail Extensions (MIME) Part Two: Media Types</emph>. IETF RFC
2046.  See <loc href="http://www.ietf.org/rfc/rfc2046.txt"/>.</bibl>

<bibl id="RFC2119" key="RFC2119">S. Bradner.  <emph>Key words for use in RFCs
to Indicate Requirement Levels</emph>.  IETF RFC 2119.
See <loc href="http://www.ietf.org/rfc/rfc2119.txt"/>.</bibl>

<bibl id="RFC2278" key="RFC2278">N. Freed, J. Postel.  <emph>IANA
Charset Registration Procedures</emph>.  IETF RFC 2278.
See <loc href="http://www.ietf.org/rfc/rfc2278.txt"/>.</bibl>

<bibl id="RFC2376" key="RFC2376">E. Whitehead, M. Murata.  <emph>XML
Media Types</emph>. IETF RFC 2376.
See <loc href="http://www.ietf.org/rfc/rfc2376.txt"/>.</bibl>

<bibl id="RFC2396" key="RFC2396">T. Berners-Lee, R. Fielding, and
L. Masinter.  <emph>Uniform Resource Identifiers (URI): Generic
Syntax</emph>. IETF RFC 2396.
See <loc href="http://www.ietf.org/rfc/rfc2396.txt"/>.</bibl>

<bibl id="RFC3023" key="RFC3023">M. Murata, S. St.Laurent, D. Kohn.  <emph>XML
Media Types</emph>. IETF RFC 3023.
See <loc href="http://www.ietf.org/rfc/rfc3023.txt"/>.</bibl>

<bibl id="RFC3236" key="RFC3236">M. Baker, P. Stark.
<emph>The 'application/xhtml+xml' Media Type</emph>.  IETF RFC 3236.
See <loc href="http://www.ietf.org/rfc/rfc3236.txt"/>.</bibl>

<bibl id="UNICODE-ENCODING" key="Unicode Encoding">Unicode Consortium.
<emph>Unicode Character Encoding Model</emph>. Unicode Standard Annex #17.
See <loc href="http://www.unicode.org/unicode/reports/tr17/"/>.
</bibl>

<bibl id="UNICODE-NORMALIZATION" key="Unicode Normalization">Unicode Consortium.
<emph>Unicode Normalization Forms</emph>. Unicode Standard Annex #15.
See <loc href="http://www.unicode.org/unicode/reports/tr15/"/>.
</bibl>

<bibl id="XHTML10" key="XHTML 1.0">World Wide Web Consortium. <emph>XHTML
1.0: The Extensible HyperText Markup Language (Second Edition).</emph>
W3C Recommendation. See <loc href="http://www.w3.org/TR/xhtml1/"/>.</bibl>

<bibl id="XHTML11" key="XHTML 1.1">World Wide Web Consortium. <emph>XHTML
1.1: Module-Based XHTML.</emph> W3C Recommendation.
See <loc href="http://www.w3.org/TR/xhtml11/"/>.
</bibl>

<bibl id="XML" key="XML10">World Wide Web Consortium. <emph>Extensible
Markup Language (XML) 1.0 (Second Edition)</emph> W3C Recommendation.
See <loc href="http://www.w3.org/TR/2000/REC-xml-20001006"/>.</bibl>

<bibl id="XML11" key="XML11">World Wide Web Consortium. <emph>Extensible
Markup Language (XML) 1.1</emph> W3C Recommendation.
See <loc href="http://www.w3.org/TR/2004/REC-xml11-20040204/"/>.</bibl>

<bibl id="XMLNAMES" key="XML Names">World Wide Web
Consortium. <emph>Namespaces in XML.</emph> W3C Recommendation. See
<loc href="http://www.w3.org/TR/REC-xml-names/"/>.
</bibl>

<bibl id="XMLNAMES11" key="XML Names 1.1">World Wide Web
Consortium. <emph>Namespaces in XML 1.1.</emph> W3C Recommendation. See
<loc href="http://www.w3.org/TR/xml-names11/"/>.
</bibl>

<bibl id="XLINK" key="XLink">World Wide Web Consortium. <emph>XML
Linking Language (XLink).</emph>  W3C Recommendation.  See
<loc href="http://www.w3.org/TR/2001/REC-xlink-20010627/"/>.</bibl>

<bibl id="XMLSCHEMA" key="XML Schema">
  World Wide Web Consortium.
  <emph>XML Schema Part 1: Structures</emph> and <emph>XML Schema Part 2: Data Types</emph>. W3C Recommendation.  See <loc href="http://www.w3.org/TR/xmlschema-1/"/> and <loc href="http://www.w3.org/TR/xmlschema-2/"/>
</bibl>

<bibl id="XPath2" key="XPath 2.0">
  World-Wide Web Consortium,
  <emph>XML Path Language (XPath) 2.0.</emph>
  See <loc href="http://www.w3.org/TR/xpath20/">http://www.w3.org/TR/xpath20/</loc>.
</bibl>

<bibl id="XQuery" key="XQuery 1.0">
  World Wide Web Consortium,
  <emph>XQuery 1.0: An XML Query Language</emph>.
  See <loc href="http://www.w3.org/TR/xquery/">http://www.w3.org/TR/xquery/</loc>.
</bibl>

<bibl id="XSLT2" key="XSLT 2.0">
  World Wide Web Consortium,
  <emph>XSL Transformations Language (XSLT) Version 2.0.</emph>
  See <loc href="http://www.w3.org/TR/xslt20/">http://www.w3.org/TR/xslt20/</loc>.
</bibl>

</blist>
</div2>
<div2 diff="add" at="E">
<head>Non-normative References</head>
<blist>

<!-- Added in response to qt-2004Feb0362-16 -->
<bibl id="xhtml-mod" key="XHTML Modularization">
  World Wide Web Consortium,
  <emph>Modularization of XHTML<sup>&tm;</sup></emph>
  See <loc href="http://www.w3.org/TR/xhtml-modularization/">http://www.w3.org/TR/xhtml-modularization/</loc>.
</bibl>

</blist>
</div2>
</div1>

</back>
</spec>
