<?xml version='1.0'?>
<?xml:stylesheet type='text/xsl' href='../xmlschema.msxsl'?>
<!DOCTYPE spec PUBLIC "-//W3C//DTD Specification::19990205//EN" "../xmlspec-19990429.dtd" [   
 <!ENTITY cellback '#d0d9fa'>   
 <!ENTITY cellfront '#bedce6'>
 <!ENTITY xmlspec "http://www.w3.org/TR/REC-xml">   
 <!ENTITY xmlnsspec "http://www.w3.org/TR/REC-xml-names/">   
 <!ENTITY xslspec "http://www.w3.org/TR/WD-xsl/">   
 <!ENTITY xsdl "http://www.w3.org/TR/xmlschema-1/">   
 <!ENTITY rdfschspec "http://www.w3.org/TR/PR-rdf-schema">
 <!ENTITY SchemaWG "<loc href='http://www.w3.org/XML/Activity#schema-wg'>W3C XML Schema Working Group</loc>">
    
 <!ENTITY nbsp "&#160;">   
    
 <!ENTITY year "1999">   
 <!ENTITY mm "09">   
 <!ENTITY MM "September">   
 <!ENTITY dd "24">   
 <!ENTITY MMDD "&mm;&dd;">   
 <!ENTITY iso.doc.date "&year;&MMDD;">


 <!ENTITY WD-XSP2 "datatypes">
   
 <!ENTITY thisversion "http://www.w3.org/&year;/&mm;/&dd;-xmlschema/datatypes/">
 <!ENTITY order "&#163;">   
 <!ENTITY le "&#8804;"> <!-- less than or equal to, U+2264 ISOtech -->   
 <!ENTITY ne "&#8800;"> <!-- not equal to, U+2260 ISOtech -->   
 <!ENTITY infin "&#8734;"> <!-- infinity, U+221E ISOtech -->   
 <!ENTITY mdash "--"> <!-- &#x2014, but nsgmls doesn't grok hex -->   
    
]>
<spec>   
<header>   
<title>XML Schema Part 2: Datatypes</title>   
<version></version>   
    <w3c-designation>&WD-XSP2;-&iso.doc.date;</w3c-designation>
    <w3c-doctype>W3C Working Draft</w3c-doctype>
<pubdate><day>&dd;</day><month>&MM;</month><year>&year;</year></pubdate>   
<publoc>   
	<loc href='&thisversion;'>   
	&thisversion;</loc>   
	(in <loc href='&thisversion;datatypes.xml'>XML</loc> and   
<loc href='&thisversion;datatypes.html'>HTML</loc>,
	with accompanying <loc href='&thisversion;datatypes.xsd'>schema</loc> and <loc href='&thisversion;datatypes.dtd'>DTD</loc>)    
</publoc>   
<prevlocs>
	<loc href='http://www.w3.org/1999/05/06-xmlschema-2/'>   
	http://www.w3.org/1999/05/06-xmlschema-2/</loc>   
</prevlocs>   
<latestloc>   
   
	<loc href='http://www.w3.org/TR/xmlschema-2/'>http://www.w3.org/TR/xmlschema-2/</loc>   
   
</latestloc>   
<authlist>   
<author>   
<name>Paul V. Biron</name>   
<affiliation>Kaiser Permanente, for Health Level Seven</affiliation>   
<email href='mailto:Paul.V.Biron@kp.org'>Paul.V.Biron@kp.org</email>   
</author>   
<author>   
<name>Ashok Malhotra</name>   
<affiliation>IBM</affiliation>   
<email href='mailto:petsa@us.ibm.com'>petsa@us.ibm.com</email>   
</author>   
</authlist>   
<status>   
<p>   
This is a W3C Working Draft for review by members of the W3C and other interested   
parties in the general public.    
</p>   
<p>   
It has been reviewed by the XML Schema Working Group and the Working Group has agreed   
to its publication. Note that not that all sections of the draft represent the current   
consensus of the WG. Different sections of the specification may well command different   
levels of consensus in the WG. Public comments on this draft will be instrumental in   
the WG's deliberations.   
</p>   
<p>   
Please review and send comments to <loc href='mailto:www-xml-schema-comments@w3.org'>   
www-xml-schema-comments@w3.org</loc>   
(<loc href='http://lists.w3.org/Archives/Public/www-xml-schema-comments/'>archive</loc>).   
</p>   
<p>   
The facilities described herein are in a preliminary state of design. The Working Group   
anticipates substantial changes, both in the mechanisms described herein, and in   
additional functions yet to be described. The present version should not be implemented   
except as a check on the design and to allow experimentation with alternative designs.   
<emph>The Schema WG will not allow early implementation to constrain its ability to make   
changes to this specification prior to final release.</emph>   
</p>   
<p>   
A list of current W3C working drafts can be found at   
<loc href='http://www.w3.org/TR'>http://www.w3.org/TR</loc>. They may be updated,   
replaced, or obsoleted by other documents at any time. It is inappropriate to use   
W3C Working Drafts as reference material or to cite them as other than "work in progress".   
</p>   
<ednote>   
	<edtext>Several "note types" are used throughout this draft:   
	<glist>   
		<gitem><label>issue [Issue (issue-name): ]</label>   
			<def>   
			<p>something on which the editors are seeking comment.</p>   
			</def>   
		</gitem>   
		<gitem><label>editorial note [Ed. Note: ]</label>   
			<def>   
				<p>something the editors wish to call to the attention of the   
				reader. To be removed prior to the recommendation becoming final.</p>   
			</def>   
		</gitem>   
		<gitem><label>note [Note: ]</label>   
			<def>   
				<p>something the editors wish to call to the attention of the reader.   
				To remain in the final recommendation.</p>   
			</def>   
		</gitem>   
	</glist>   
	</edtext>   
</ednote>   
</status>   
<abstract>   
<p>   
This document specifies a language for defining datatypes to be used   
in XML Schemas and possibly elsewhere.   
</p>   
</abstract>   
<langusage>   
<language id='EN'>English</language>   
</langusage>   
<revisiondesc>   
<slist>   
<!--   
     commenting these out means only that they won't show up in the stylesheet   
	 generated "Revisions from previous draft" appendix   
  --> 
<sitem>   
19990521: PVB: corrected definition of length and maxlengths facet for strings to   
be in terms of <emph>characters</emph> not <emph>bytes</emph>   
</sitem>   
<sitem>   
19990521: PVB: removed issue "other-date-representations".  We don't   
want other separators, left mention of aggregate reps for dates as   
an ednote.   
</sitem>   
<sitem>   
19990521: PVB: fixed "holidays" example, "-0101" ==> "==0101"   
(where == in the correction should be two hyphens, but that would   
not allow us to comment out this sitem)   
</sitem>   
<sitem>   
19990521: PVB: fixed "common date" example, lexicalRepresenation ==> lexicalRepresentation   
</sitem>   
<sitem>   
19990521: PVB: added note that -YY-MM-DD style dates are deprecated   
</sitem>   
<sitem>   
19990521: PVB: added termdef element around definition of subtype   
</sitem>   
<sitem>   
19990521: PVB: removed "whose basetype is a built-in" from definition of   
"user-generated" datatype   
</sitem>   
<sitem>   
19990521: PVB: clarified that the length facet for binary datatype is   
length in bytes   
</sitem>   
<sitem>   
19990521: PVB: fixed weird spacing problems introduced by ArborText   
</sitem>   
<sitem>   
19990521: PVB: fixed references to non-terminals in productions   
</sitem>   
<sitem>   
19990524: AM: changed "boolean" to have a single lexical representation.   
</sitem>   
<sitem>   
19990524: AM: added issue: "should we add a facet to restrict a binary    
datatype to a user-defined format such as audio, image, etc."    
</sitem>   
<sitem>   
19990524: AM: corrected reference to SQL standard.    
</sitem>   
<sitem>   
19990524: AM: corrected definition of length and maximum length   
facets to be a positive integer.    
</sitem>   
<sitem>   
19990524: AM: corrected default format for integer, decimal and real.    
</sitem>   
<sitem>   
19990524: AM: rewrote issue definition-overiding.    
</sitem>   
<sitem>   
19990524: AM: edited Conformance section to add example of lexical   
errors and fix reference to above issue.    
</sitem>   
<sitem>   
19990601: PVB: changed date formats in examples of Section 1 to be conformant   
with the date datatype   
</sitem>   
<sitem>   
19990601: PVB: added a "for compatibility" terminology entry   
</sitem>   
<sitem>   
19990601: PVB: added a Name datatype and redefined the XML 1.0 attribute types   
in terms of it   
</sitem>   
<sitem>   
19990601: PVB: remove "for attributes only" restriction on XML 1.0  attribute types.   
Added a "for compatibility" clause for attribute values.   
</sitem>   
<sitem>   
19990601: PVB: added language datatype   
</sitem>   
<sitem>   
19990602: PVB: added uuid datatype   
</sitem>   
<sitem>   
19990602: PVB: added NCName datatype   
</sitem>   
<sitem>   
19990604: AM: changed date and time formats to allow only ISO 8601   
extended format. Impacted sections on the date, time datatypes,   
section 4, Appendix C.    
</sitem>   
<sitem>   
19990604: AM: added ednote to string datatype saying we need to harmonize   
with I18N character model.    
</sitem>   
<sitem>   
19990604: PVB: added "Revisions from previous draft" appendix   
</sitem>   
<sitem>   
19990604: PVB: moved "built-in generated" datatype definitions into the   
schema for datatype definitions (instead of it being in its own appendix).   
</sitem>   
<sitem>   
19990604: PVB: upadted the schema for datatype definitions to point to   
the correct (per xmlschema-1) DTD and schema   
</sitem>   
<sitem>   
19990623: AM: added paragraph to conformance section which begins   
to be more precise about how conforming processors should behave   
</sitem>   
<sitem>   
19990623: AM: removed confusing statement from conformance section   
which said that " checking for lexical conformance is all that is   
expected of an XML processor."   
</sitem>   
<sitem>   
19990623: PVB: removed section on "Characterizing Operations" and   
all references to it (or its content) in the rest of the draft.   
</sitem>   
<sitem>   
19990623: PVB: removed uuid datatype   
</sitem>   
<sitem>   
19990623: PVB: made NMTOKEN a primitive datatype and Name a   
subtype of NMTOKEN.   
</sitem>   
<sitem>   
19990623: PVB: corrected the basetypes of following XML-related   
generated datatypes: IDREFS (from ID to IDREF), ENTITY (from ID to Name),   
ENTITIES (from ID to ENTITY), NMTOKENS (from Name to NMTOKEN).   
</sitem>   
<sitem>   
19990623: PVB: changed name of section "User-Generated Datatypes" to   
the more correct "Defining Generated Datatypes".  Also added some   
explanatory text to the beginning of that section which explains   
that the abstract syntax there is used both for defining built-in   
and user-generated datatypes.   
</sitem>   
<sitem>   
19990623: PVB: added explanations of abstract and concrete   
syntax (mostly borrowed from the structural draft) to section   
"Defining Generated Datatypes".   
</sitem>   
<sitem>   
19990623: PVB: separated references into those that are normative   
and those that are non-normative   
</sitem>   
<sitem>   
19990623: PVB: added a pointer to the draft revision of ISO 8601   
in its bib entry   
</sitem>   
<sitem>   
19990623: PVB: added "no-consensus" issues to those all sections   
except "Type System" and "Built-in datatypes" stating that no WG   
concensus has been reached on the section (the exclusions above   
are because those sections which granted consensus status at the   
Ann Arbor f2f)   
</sitem>   
<sitem>   
19990623: PVB: cleaned up productions for numeric literals   
</sitem>   
<sitem>   
19990624: PVB: excluded subsections 1.1 and 1.2 from the "no-consusus"   
issue for section 1   
</sitem>   
<sitem>   
19990630: PVB: removed number datatype, made real into a built-in   
primitive, changed the basetype of decimal to real and the basetype   
of integer to decimal.  Also added NaN, INF and -INF to the lexical   
space of all numeric types.   
</sitem>   
<sitem>   
19990630: PVB: added 2 new subtypes of integer: non-positive-integer   
and non-negative-integer, each of which has 1 subtype: negative-integer   
and positive-integer, respectively.  Added generated datatype definitions   
for these to the schema for datatypes.   
</sitem>   
<sitem>   
19990630: PVB: fixed typos in definition of IDREF and IDREFS   
(was "the lexical space of ID is .." now "the lexical space of IDREF is ...")   
</sitem>   
<sitem>   
19990630: PVB: added issue(non-negative-integer-literals)   
</sitem>   
<sitem>   
19990630: PVB: added links to known subtypes in all datatype   
descriptions   
</sitem>   
<sitem>   
19990630: PVB: changed "no-consensus" issues to "no-consensus"   
ednotes   
</sitem>   
<sitem>   
19990630: PVB: changed "no-consensus" ednote for section 1 to   
exclude subsection 1.3, as voted on during the telcon today   
</sitem>   
<sitem>   
19990630: PVB: corrected severl interal cross-references: from termref's   
to specref's   
</sitem>   
<sitem>   
19990630: PVB: added all previous drafts (internal as well as public WDs)   
to the "Previous Versions" section.  In future public WDs only those   
"previous versions" which were public WDs will display   
</sitem>   
<sitem>   
19990630: PVB: changed "collection" to "set" in definition of "value space"   
(thought this had been changed long ago, sorry)   
</sitem>   
<sitem>   
19990708: PVB: removed section 1.5 "Organization", per WG vote on telcon   
</sitem>   
<sitem>   
19990708: PVB: removed "no-consensus" ednote from section 1   
</sitem>   
<sitem>   
19990709: PVB: added (stub) subsections on "Precision", "Scale" and "Encoding" to   
section 2.4.2 "Constraining Facets".  All facets mentioned in all datatype   
definitions in section 3 should be listed in 2.4.2. (this is not intended to address   
the standing issue <xspecref href="http://www.w3.org/XML/Group/xmlschema-current/issues.html#constraining-facet-definitions">   
constraining-facet-definitions</xspecref>, but was needed for the next revision item)   
</sitem>   
<sitem>   
19990709: PVB: added "Datatypes and Facets" appendix which consists of   
several tables which attempt to show which facets apply to which datatypes   
</sitem>   
<sitem>   
19990713: PVB: fixed bug in schema for datatypes regarding modelGroup vs.   
elementType Refs in unordered modelGroup
<!--, as per   
<loc href='http://lists.w3.org/Archives/Public/www-xml-schema-comments/1999AprJun/0088.html'>   
http://lists.w3.org/Archives/Public/www-xml-schema-comments/1999AprJun/0088.html</loc>   
-->
</sitem>   
<sitem>   
19990726: AM: Changed example of user-generated datatype from   
heightInInches to i4.   
</sitem>   
<sitem>   
19990726: AM: Rewrote "Exact and Approximate".   
</sitem>   
<sitem>   
19990812: PVB: Removed all mention of picture constraints as lexical-representations   
for strings   
</sitem>   
<sitem>   
19990819: AM: Amended Ed. Note on a URL for the datatypes namespace   
referring to Dan Connolly's note "make up your own".   
</sitem>   
<sitem>   
19990819: AM: Removed issue on NULLS -- 2 occurrences.   
</sitem>   
<sitem>   
19990819: AM: Changed Ed. Note on "Better Ref Mechs" associated with   
IDREFS to "issue"..   
</sitem>   
<sitem>   
19990819: AM: Removed issue on measurement units as WG decided to   
defer to version 2.   
</sitem>
<sitem>
19990919: HT: modifed abstract syntax to better reflect intent?
</sitem>
<sitem>
19990923: HT: modified schema for schemas to conform to the concrete
syntax in the latest Structures draft
</sitem>
<sitem>
19990923: PVB: added minAbsoluteValue and maxAbsoluteValue facets to
real, their intent is to allow generation of subtypes of real whose
value spaces correspond to comment float-point representations.
Added examples to section 4 to show how to generate IEEE 32-bit, etc.
</sitem>
<sitem>
19990923: PVB: replaced dateTime, date, time and timePeriod with all
new date/time related types: timeInstant, timeDuration, recurringInstant,
date and time.  Additionally, limited the lexical representations of each
of the new types to a single form (w/ the exception of still allowing both
left truncation and reduced [i.e., right truncated] representations).
Changed all examples which used date/time to use the new lexical representations
</sitem>
<sitem>
19990923: PVB: modified the abstract syntax, schema for datatypes and DTD
for datatypes to bring them in line with above changes.
</sitem>
<sitem>
19990924: HST: link housekeeping before publication</sitem>
</slist>   
</revisiondesc>   
</header>   
<body>   
<div1 id='Intro'>   
<head>Introduction</head>   
<div2 id='purpose'>   
<head>Purpose</head>   
<p>   
The <bibref ref='XML'/> specification defines limited    
facilities for applying datatypes to document content in that documents   
may contain or refer to DTDs that assign types to elements and attributes.   
However, document authors, including authors of traditional   
<emph>documents</emph> and those transporting <emph>data</emph> in XML,   
often require a high degree of type checking to ensure robustness in   
document understanding and data interchange.   
</p>   
<p>   
The table below offers two typical examples of XML instances   
in which datatypes are implicit: the instance on the left   
represents a billing invoice, the instance on the   
right a memo or perhaps an email message in XML.   
</p>   
<table border='1' bgcolor='&cellback;'>
<thead>   
<tr>   
<th align='center'>Data oriented</th>   
<th align='center'>Document oriented</th>   
</tr>   
</thead>   
<tbody>   
<tr>   
<td><eg><![CDATA[<invoice>   
   <orderDate>19990121</orderDate>   
   <shipDate>19990125</shipDate>   
   <billingAddress>   
      <name>Ashok Malhotra</name>   
      <street>123 IBM Ave.</street>   
      <city>Hawthorne</city>   
      <state>NY</state>   
      <zip>10532-0000</zip>   
   </billingAddress>   
   <voice>555-1234</voice>   
   <fax>555-4321</fax>   
</invoice>]]></eg></td>   
<td><eg><![CDATA[<memo importance="high"   
      date="19990323">   
   <from>Paul V. Biron</from>   
   <to>Ashok Malhotra</to>   
   <subject>Latest draft</subject>   
   <body>   
      We need to discuss the latest   
      draft <emph>immediately</emph>.   
      Either email me at <email>   
      mailto:paul.v.biron@kp.org</email>   
      or call <phone>555-9876</phone>   
   </body>   
</memo>]]></eg></td>   
</tr>   
</tbody>   
</table>   
<p>   
The invoice contains several dates and telephone numbers, the postal    
abbreviation for a state   
(which comes from an enumerated list of sanctioned values), and a ZIP code   
(which takes a definable regular form).  The memo contains many   
of the same types of information: a date, telephone number, email address   
and an "importance" value (which undoubtedly comes from an enumerated   
list, such as "low", "medium" or "high").  Applications which process   
invoices and memos need to raise exceptions if something that was   
supposed to be a date or telephone number did not conform to the rules   
for valid dates or telephone numbers.   
</p>   
<p>   
In both cases, validity constraints exist on the content of the   
instances that are not expressible in XML DTDs.  The limited datatyping   
facilities in XML have prevented validating XML processors from supplying   
the rigorous type checking required in these situations.  The result   
has been that individual applications writers have had to implement type   
checking in an ad hoc manner.  This specification addresses   
the need of both document authors and applications writers for a robust,   
extensible datatype system for XML which could be incorporated into   
XML processors.  As discussed below, these datatypes could be used in other   
XML-related standards as well.   
</p>   
</div2>   
<div2 id='requirements'>   
<head>Requirements</head>   
<p>   
The <bibref ref='schema-requirements'/> document spells out   
concrete requirements to be fulfilled by this specification,   
which state that the XML Schema Language must:   
</p>   
<olist>   
<item>   
<p>   
provide for primitive data typing, including byte, date,   
integer, sequence, SQL &amp; Java primitive data types, etc.;   
</p>   
</item>   
<item>   
<p>   
define a type system that is adequate for import/export   
from database systems (e.g., relational, object, OLAP);   
</p>   
</item>   
<item>   
<p>   
distinguish requirements relating to lexical data representation   
vs. those governing an underlying information set;   
</p>   
</item>   
<item>   
<p>   
allow creation of user-defined datatypes, such as   
datatypes that are derived from existing datatypes and which   
may constrain certain of its properties (e.g., range,   
precision, length, format).   
</p>   
</item>   
</olist>   
</div2>   
<div2 id='scope'>   
<head>Scope</head>   
<p>   
This portion of the XML Schema Language discusses datatypes that can be   
used in a XML Schema.  These datatypes can be specified for element content   
that would be specified as <xspecref href='&xmlspec;#dt-chardata'>#PCDATA</xspecref>   
and attribute values of <xspecref href='&xmlspec;#sec-attribute-types'>various   
types </xspecref> in a DTD.  It is the intension of this specification that   
it be usable outside of the context of XML Schemas for a wide range of other   
XML-related activities such as <bibref ref='XSL'/> and <bibref ref='RDFSchema'/>.   
</p>   
<p>   
For the most part, this specification discusses what are sometimes referred   
to as <emph>scalar datatypes</emph> in that they constrain the lexical representation   
of a single literal.  In some cases, as for example in <specref ref='IDREFS'/>, <specref ref='ENTITIES'/> and <specref ref='NMTOKENS'/>, the value may consist of a   
list or set of literals separated by spaces. This is an example of what is   
called an <emph>aggregate datatype</emph>. Future versions of this specification   
will contain a more general mechanism for aggregate (collection) datatypes   
such as sets, bags and records.   
</p>   
</div2>   
<div2 id='terminology'>   
<head>Terminology</head>   
<p>   
The terminology used to describe XML Schema Datatypes is defined in the body of   
this specification. The terms defined in the following list are used in building   
those definitions and in describing the actions of a datatype processor:   
</p>   
<glist>   
<gitem id='compatibility'>   
<label>For compatibility</label>   
<def>   
<p>   
A feature of this specification included solely to ensure that   
schemas which use this feature remain compatible with   
<bibref ref='XML'/>   
</p>   
</def>   
</gitem>   
</glist>   
<!--   
<ednote>   
<edtext>   
if necessary, insert a terminology list (e.g., may,   
must, datatype valid, etc.)   
</edtext>   
</ednote>   
-->   
</div2>   
</div1>   
<div1 id='typesystem'>   
<head>Type System</head>   
<p>   
This section describes the conceptual framework behind the type system   
defined in this specification.  The framework has been influenced by the   
<bibref ref='ISO11404'/> standard on language-independent datatypes   
as well as the datatypes for <bibref ref='SQL'/> and for programming languages   
such as Java.   
</p>   
<p>   
The datatypes discussed in this specification are computer representations   
of well known abstract concepts such as <emph>integer</emph> and <emph>date</emph>.   
It is not the place of this specification to define these concepts;   
many other publications provide excellent definitions.   
</p>   
<p>   
Two concepts are essential for an understanding of datatypes as they   
are discussed here: a <emph>value space</emph> is an abstract collection of   
permitted values for a datatype. Each datatype also has a space of valid   
lexical representations or literals, each of which denotes a single value.   
A single value in the value space may   
be denoted by several distinct valid literals.   
</p>   
<div2 id='datatype'>   
<head>Datatype</head>   
<p>   
<termdef id='dt-datatype' term='datatype'>In this specification,   
a <term>datatype</term> is defined as a 3-tuple, consisting of   
a) a set of distinct values, called its <termref def='dt-valuespace'/>,   
b) a set of lexical representations, called its <termref def='dt-lexical-space'/>,   
and c) a set of facets that characterize properties of the value space,   
the lexical space or of individual values or lexical items.   
</termdef>   
</p>   
</div2>   
<div2 id='value-space'>   
<head>Value space</head>   
<p>   
A <emph>value space</emph> is a set of permitted values   
for a datatype.  Value spaces have certain properties.  For example,   
they always have the property of <termref def='dt-cardinality'/>   
and some definition of <emph>equality</emph>   
and may have the concept of <termref def='dt-order'/> by which individual   
values within the value space can be compared to one another.   
</p>   
<p>   
<termdef id='dt-valuespace' term='value space'>A <term>value   
space</term> is the set of permitted values for a given   
datatype.</termdef> The value space of a given datatype can be defined in one of   
the following ways:   
<ulist>   
<item>   
<p>enumerated outright (extensional definition)</p>   
</item>   
<item>   
<p>   
defined axiomatically from fundamental notions (intensional definition)   
</p>   
</item>   
<item>   
<p>   
defined as the subset of values from an already defined   
value space with a given set of properties   
</p>   
</item>   
<item>   
<p>   
defined as a combination of values from some already defined   
value space(s) by a specific construction procedure   
</p>   
</item>   
</ulist>   
</p>   
<p>   
In addition to the value space, each datatype has a space of   
valid lexical representations or literals.  A single value in the value   
space may be denoted by several valid literals.  For example, "100" and "1.0E2"   
are two different representations for the same value.  Depending on the   
situation, either or both of these representations might be acceptable.   
The type system defined in this specification provides a mechanism for   
schema designers to control both the set of values and the set of   
acceptable lexical representations of those values for a datatype.   
</p>   
</div2>   
<div2 id='lexical-space'>   
<head>Lexical Space</head>   
<p>   
In addition to its <emph>value space</emph>, each datatype also has a lexical   
representation space.  <termdef term='lexical space' id='dt-lexical-space'>The   
<term>lexical space</term> consists of a set of valid literals for a datatype.   
Each value in the datatype's value space is denoted by one or more literals in its   
lexical space.</termdef>  Each <specref ref='built-in-primitive-datatypes'/>   
definition includes a detailed description of the default lexical space.   
</p>   
</div2>   
<!--   
     remove this entire section (and all references to it) for now...   
	 we will reintroduce it later if we get EVERYTHING else done and   
	 there is still time   
	    
<div2 id="characterizing-operations">   
<head>Characterizing operations</head>   
<p>   
Many different datatypes may share the same value space.  As   
a result, a datatype is only partially defined by its value space.   
<termdef id="dt-characterizing-operations" term="characterizing operations">The   
<term>characterizing operations</term> for a datatype are those operations (such as   
"add" or "append") on or resulting in values of the datatype which distinguish   
this datatype from other datatypes having value spaces which are identical   
except possibly for substitution of symbols.</termdef>   
</p>   
<p>   
Characterizing operations  can be useful in choosing the appropriate   
datatype for particular purposes, such as mapping to or from common programming   
languages or database environments.   
</p>   
<ednote>   
<edtext>   
Currently, no characterizing operations are defined on the   
built-in datatypes provided by this specification; additionally, there is   
no means to specify characterizing operations on user-generated datatypes.   
This will be addressed in a future draft.   
</edtext>   
</ednote>   
<p>   
This discussion of characterizing operations in the definition   
of datatype is for pedagogical purposes only and does <emph>not</emph>   
imply that conforming processors must implement those operations, nor   
does it imply that <emph>expressions</emph> (containing operators) which   
evaluate to values of a given datatype will be accepted by conforming processors.   
</p>   
<div3 id="equal">   
<head>Equal</head>   
<p>   
Every value space supports the notion of equality, with	the following   
rules:   
</p>   
<ulist>   
<item>   
<p>   
for any two instances of values from the value space (a,b), either a   
is equal to b, denoted a = b, or a is not equal to b, denoted a &#8800; b;   
</p>   
</item>   
<item>   
<p>   
there is no pair of instances (a, b) of values from the value space such   
that both a = b and a &#8800; b;   
</p>   
</item>   
<item>   
<p>   
for every value a from the value space, a = a;   
</p>   
</item>   
<item>   
<p>   
for any two instances (a, b) of values from the value space, a = b if and only if b = a;   
</p>   
</item>   
<item>   
<p>   
for any three instances (a, b, c) of values from the value space, if a = b and b = c,   
then a = c.   
</p>   
</item>   
</ulist>   
<p>   
On every datatype, the operation Equal is defined in terms of the equality property of   
the value space: for any values a, b drawn from the value space, Equal(a,b) is true if   
a = b, and false otherwise.   
</p>   
</div3>   
</div2>   
  -->   
<div2 id='facets'>   
<head>Facets</head>   
<p>   
<termdef id='dt-facet' term='facet'>A <term>facet</term> is a   
single defining aspect of a concept or an object.  Generally speaking,   
each facet characterizes a concept or object along independent aspects   
or dimensions.</termdef>   
</p>   
<p>   
The facets of a datatype serve to distinguish those aspects of   
one datatype which <emph>differ</emph> from other datatypes.   
Rather than being defined solely in terms of a prose description   
the datatypes in this specification are defined in terms of   
the <emph>synthesis</emph> of facet values which together   
determine the value space and properties of the datatype.   
</p>   
<p>   
Facets are of two types: <emph>fundamental</emph> facets that define   
the datatype and <emph>non-fundamental</emph> or <emph>constraining   
</emph> facets that constrain the permitted values of a datatype.   
</p>   
<div3 id='fundamental-facets'>   
<head>Fundamental facets</head>   
<!--   
					<issue id="facet-or-property">   
						<p>   
							Are the <bibref ref="ISO11404"/> notions of <specref   
							ref="order"/>, <specref ref="bound"/>, etc. really   
							facets as we've been talking about them, or are they   
							<emph>properties</emph> which are logically derived   
							from concrete values given for specific facets?   
							(e.g., if a value is given for the <term>maxInclusive</term>   
							facet then the datatype has the property of being   
							<term>bounded</term>).   
						</p>   
					</issue>   
  -->   
<p>   
Datatypes are characterized by properties of their value spaces.   
These optional properties are discussed in this section.  Each of   
these properties give rise to a facet that serves to characterize   
the datatype.   
</p>   
<div4 id='order'>   
<head>Order</head>   
<p>   
<termdef id='dt-order' term='order'>A value space, and hence a datatype,   
is said to be <term>ordered</term> if there exists an <term>order relation</term>   
defined for that value space.</termdef>  Order relations have the following   
rules:   
</p>   
<ulist>   
<item>   
<p>   
for every pair (a, b) from the value space, either a &le; b or b &le;   
a, or a = b;   
</p>   
</item>   
<item>   
<p>   
for every triple (a, b, c) from the value space, if a &le; b and   
b &le; c, then a &le; c.   
</p>   
</item>   
</ulist>   
<!--   
     remove all references to the section on characterizing-operations)   
	 for now...   
	 we will reintroduce it later if we get EVERYTHING else done and   
	 there is still time   
	    
<p>   
If a value space is ordered, then the datatype will have a corresponding   
<specref ref="characterizing-operations"/>, called InOrder(a, b), defined   
by:   
</p>   
<ulist>   
<item>   
<p>   
for every (a, b) from the value space, InOrder(a, b) is true if a &le;   
b, and false otherwise.   
</p>   
</item>   
</ulist>   
  -->   
<p>   
There may exist several possible order relations for a given value space.   
Additionally, there may exist multiple datatypes with the same value space.   
In such cases, each datatype will define a different order relation on the   
value space.   
</p>   
<ednote>   
<edtext>   
Currently, no order relations are defined on the built-in   
datatypes provided by this specification; additionally, there is no means   
to specify an order relation on user-generated datatypes.  This will be addressed   
in a future draft.   
</edtext>   
</ednote>   
</div4>   
<div4 id='bounds'>   
<head>Bounds</head>   
<p>   
Some ordered value spaces, and hence some datatypes, are said to be bounded. <termdef id='bounded-above' term='bounded above'>A value space is <term>bounded above</term>   
if there exists a unique value <emph>U</emph> in the value space such that,   
for all values <emph>v</emph> in the value space, <emph>v</emph> &le; <emph>U</emph>.   
The value <emph>U</emph> is said to be an <term>upper bound</term> of the   
value space.</termdef> <termdef id='bounded-below' term='bounded below'>A   
value space is <term>bounded below</term> if there exists a unique value <emph>L</emph>   
in the space such that, for all values <emph>v</emph> in the value space, <emph>L</emph> &le; <emph>v</emph>.   
 The value <emph>L</emph> is then said to be a  <term>lower bound</term> of   
the value space.</termdef>   
</p>   
<p>   
<termdef id='dt-bounded' term='Bounded'>A datatype is <term>bounded</term> if its value   
space has both an upper and a lower bound.</termdef>   
</p>   
</div4>   
<div4 id='cardinality'>   
<head>Cardinality</head>   
<p>   
<termdef id='dt-cardinality' term='cardinality'>Every value space has associated   
with it the concept of <term>cardinality</term>.  Some value spaces are finite,   
some are countably infinite while still others are uncountably infinite. A   
datatype is said to have the cardinality of its value space</termdef>.  It   
is sometimes useful to categorize value spaces ( and hence, datatypes) as   
to their cardinality, there are three significant cases:   
</p>   
<ulist>   
<item>   
<p>   
value spaces that are finite   
</p>   
</item>   
<item>   
<p>   
value spaces that are countably infinite and exact (see <specref ref='exact-approx'/>)   
</p>   
</item>   
<item>   
<p>   
value spaces that are countably infinite and approximate (see <specref ref='exact-approx'/>)   
</p>   
</item>   
</ulist>   
<p>   
Every conceptually finite value space is necessarily exact. No computational   
datatype is uncountably infinite.   
</p>   
<ednote>   
<edtext>   
Currently, cardinality is not specified for the built-in   
datatypes provided by this specification; additionally, there is no means   
to specify a cardinality on user-generated datatypes.  This will be addressed   
in a future draft.   
</edtext>   
</ednote>   
</div4>   
<div4 id='exact-approx'>   
<head>Exact and Approximate</head>   
<p>   
The computational representation of a datatype may limit the degree   
to which values of the datatype can be distinguished.  If every   
value in the value space of the conceptual datatype is distinguishable   
in the computational representation from every other value in the value space,   
then the datatype is said to be exact.   
</p>   
<p>   
Certain mathematical datatypes with very large or infinite value   
spaces have representations which are said to be approximate in that   
multiple values in the conceptual value space map to single values   
in the value space of the representation.   
In this specification, all approximate datatypes have computational   
models which specify, via parametric values, a degree of approximation,   
that is, they require a certain minimum set of values of the mathematical   
datatype to be distinguishable in the computational datatype.   
Further, each value in the conceptual value space must be be capable   
of being represented in the representational value space within a certain   
distance i.e. the difference between the conceptual value and the   
representational value must not exceed some agreed upon value.   
</p>    
<ednote>   
<edtext>   
Currently, exactness is not specified for the built-in datatypes   
provided by this specification; additionally, there is no means to specify   
a exactness for user-generated datatypes.  This will be addressed in a future   
draft.   
</edtext>   
</ednote>   
</div4>   
<div4 id='numeric'>   
<head>Numeric</head>   
<p>   
A datatype is said to be numeric if its values are conceptually   
quantities (in some mathematical number system).  A datatype   
whose values do not have this property is said to be non-numeric.   
</p>   
<!--   
<p>   
The significance of the numeric facet is that the representations   
of the values depend on some radix, but can be algorithmically   
transformed from one radix to another.   
</p>   
  -->   
</div4>   
<!--					   
					<div4 id="radix">   
						<head>Radix</head>   
						<p>   
							A datatype which has the <specref ref="numeric"/> property   
							must specify a value for the radix facet.   
						</p>   
					</div4>   
  -->   
</div3>   
<div3 id='non-fundamental'>   
<head>Constraining or Non-fundamental facets</head>   
<p>   
Constraining facets are optional properties that can be applied   
to a datatype to (further) constrain its value space.  Constraining   
the value space consequently constrains the allowed lexical representations.   
Adding constraining facets to a  <specref ref='basetype'/> is used in   
<specref ref='defining-generated-datatypes'/>.   
</p>   
   
<!--   
					<issue id="instance-overriding">   
						<p>   
							should it be possible to specify a value for a non-fundamental   
							facet on an element or attribute of a given datatype in an   
							instance document (on an element-instance/attribute-instance   
							basis)?  If so, what syntax should be used?   
							This needs to be coordinated with   
							the structural schema editorial team.   
						</p>   
					</issue>   
					<issue id="definition-overriding">   
						<p>   
							should it be possible to specify a value for a non-fundamental   
							facet in an element or attribute definition in a   
							schema (see Section 3.4.4 of <bibref ref="structural-schemas"/>?   
							If so, what syntax should be used?   
							This needs to be coordinated with   
							the structural schema editorial team.   
						</p>   
					</issue>   
  -->   
<div4 id='length'>   
<head>length</head>   
<p>   
<termdef id='dt-length' term='length'> For the <specref ref='string'/>   
datatype, <term>length</term> specifies the number of allowable   
characters in the string. For the <specref ref='binary'/> datatype it specifies the   
length in bits. The value of the length facet must be a   
positive integer.</termdef>   
</p>   
<ednote>   
<edtext>   
We need to ultimately reconcile the notion of string length with the resolution   
of the i18n issues around character, indexing, etc.  I18N recommends   
that length and maxLength be a "character count" and do not indicate   
storage requirements.   
</edtext>   
</ednote>   
</div4>   
<div4 id='maxlength'>   
<head>maximum length</head>   
<p>   
<termdef id='dt-maxlength' term='maxlength'>The <term>maxlength</term>   
facet indicates the maximum length, in characters, of a <specref ref='string'/>   
datatype for which the <specref ref='length'/> facet is not   
specified.  For the <specref ref='binary'/> datatype it specifies the   
maximum length in bits if the  <specref ref='length'/> facet is not   
specified. The value of the maximum length facet must be a positive integer.</termdef>   
</p>   
</div4>   
<!--   
<div4 id="sign">   
<head>sign</head>   
<p>   
<termdef id="dt-sign" term="sign"> The <term>sign</term>   
facet determines the upper or lower bound of the value space   
for a datatype with the <specref ref="numeric"/> property.</termdef>   
</p>   
<p>   
If the sign facet is unspecified, numeric quantities are assumed   
to be positive.  If it is specified, positive and well as negative   
numbers can be specified.   
</p>    
</div4>   
-->   
<div4 id='lexical-representation'>   
<head>lexical representation</head>   
<p>   
The datatypes defined in this specification are defined   
in terms of abstract value spaces and their properties as opposed   
to how values are lexically represented in XML instances.  However,   
the lexical representation of values is of prime importance in many   
applications.  Because of this importance, each <specref ref='built-in-primitive-datatypes'/> definition includes a detailed   
description of its default <specref ref='lexical-space'/>.   
<termdef id='dt-lexical-representation' term='lexical representation'>   
The <term>lexical representation</term> facet can be used to constrain   
the allowable representations, or literals, for values of a datatype.   
The meaning of the lexical representation facet depends on the   
datatype to which it is applied.</termdef>   
</p>   
<p>   
For example, for <specref ref='string'/>, values for the   
lexical representation facet are <specref ref='regexs'/><!--, while for   
<specref ref='dateTime'/>, values are derived from <bibref ref='ISO8601'/>-->.
</p>   
</div4>   
<div4 id='enumeration'>   
<head>enumeration</head>   
<p>   
<termdef id='dt-enumeration' term='enumeration'>Presence of an   
<term>enumeration</term> facet constrains the value space of the   
datatype to the specified list.</termdef>  The enumeration   
facet can be applied to any datatype.  No order or any other   
relationship is implied between the elements of the enumeration list.   
</p>   
</div4>
<div4 id="minAbsoluteValue">
<head>minAbsoluteValue</head>
<p>
<termdef id="dt-minAbsoluteValue" term="minAbsoluteValue">The
<term>minAbsoluteValue</term> facet specifies the minimum absolute value
of the value space for generated datatypes whose basetype is <specref ref="real"/>.
</termdef>
This facet (together with <specref ref="maxAbsoluteValue"/>) can be used to
generate subtypes of <specref ref="real"/> which correspond to common floating
point representations.
</p>
</div4>
<div4 id="maxAbsoluteValue">
<head>maxAbsoluteValue</head>
<p>
<termdef id="dt-maxAbsoluteValue" term="maxAbsoluteValue">The
<term>maxAbsoluteValue</term> facet specifies the maximum absolute value
of the value space for generated datatypes whose basetype is <specref ref="real"/>.
</termdef>
This facet (together with <specref ref="maxAbsoluteValue"/>) can be used to
generate subtypes of <specref ref="real"/> which correspond to common floating
point representations.
</p>
</div4>
<div4 id='maxInclusive'>   
<head>maxInclusive</head>   
<p>   
<termdef id='dt-maxInclusive' term='maxInclusive'>The   
<term>maxInclusive</term> facet determines the upper bound   
of the value space for a datatype with the <specref ref='order'/>   
property.  The maximum value specified with this facet is   
<emph>inclusive</emph> in the sense that the value specified   
for the facet is itself included in the value space   
for the datatype.</termdef>   
</p>   
</div4>   
<div4 id='maxExclusive'>   
<head>maxExclusive</head>   
<p>   
<termdef id='dt-maxExclusive' term='maxExclusive'>The   
<term>maxExclusive</term> facet determines the upper bound   
of the value space for a datatype with the <specref ref='order'/>   
property.  The maximum value specified with this facet is   
<emph>exclusive</emph> in the sense that the value specified   
for the facet is itself excluded from the value   
space for the datatype.</termdef>   
</p>   
<!--   
<p>   
In a datatype definition, the value specified for this facet   
must be a literal value of type number.   
</p>   
-->   
</div4>   
<div4 id='minInclusive'>   
<head>minInclusive</head>   
<p>   
<termdef id='dt-minInclusive' term='minInclusive'>The   
<term>minInclusive</term> facet determines the lower bound   
of the value space for a datatype with the <specref ref='order'/>   
property.  The minimum value specified with this facet is   
<emph>inclusive</emph> in the sense that the value specified for   
the facet is itself included in the value space for the datatype.</termdef>   
</p>   
</div4>   
<div4 id='minExclusive'>   
<head>minExclusive</head>   
<p>   
<termdef id='dt-minExclusive' term='minExclusive'>The   
<term>minExclusive</term> facet determines the lower bound   
of the value space for a datatype with the <specref ref='order'/>   
property.  The minimum value specified with this facet is   
<emph>exclusive</emph> in the sense that the value specified   
for the facet is itself excluded from the value space for the datatype.</termdef>   
</p>   
</div4>   
<div4 id='precision'>   
<head>precision</head>   
<p>   
<termdef id='dt-precision' term='precision'> The <term>precision</term> facet, which only   
applies to the <specref ref='decimal'/> datatype refers to the total   
number of decimal digits in the number. Its value must be a positive integer.   
</termdef>   
</p>   
</div4>   
<div4 id='scale'>   
<head>scale</head>   
<p>   
<termdef id='dt-scale' term='scale'> The <term>scale</term> facet, which only   
applies to the <specref ref='decimal'/> datatype refers to the total   
number of decimal digits to the right of the decimal point.  Its value must be a   
positive number less than or equal to the precision.   
</termdef>   
</p>   
</div4>   
<div4 id='encoding'>   
<head>encoding</head>   
<p>   
<termdef id='dt-encoding' term='encoding'>   
</termdef>   
</p>   
<ednote>   
<edtext>   
need to fill out definition of this facet, which applies (currently) only to   
<specref ref='binary'/>   
</edtext>   
</ednote>   
</div4>   
<div4 id='period'>   
<head>period</head>   
<p>   
<termdef id='dt-period' term='period'>   
</termdef>
</p>   
<ednote>   
<edtext>   
need to fill out definition of this facet, which applies (currently) only to   
<specref ref='recurringInstant'/>   
</edtext>   
</ednote>   
</div4>   
</div3>   
</div2>   
<div2 id='datatype-dichotomies'>   
<head>Datatype dichotomies</head>   
<p>   
It is useful to categorize the datatypes defined in this specification   
along various dimensions, forming a set of characterization dichotomies.   
</p>   
<div3 id='atomic-vs-aggregate'>   
<head>Atomic vs. aggregate datatypes</head>   
<p>   
The first distinction to be made is that between <term>atomic</term>   
and <term>aggregate</term> datatypes.   
</p>   
<ulist>   
<item>   
<p>   
<termdef id='dt-atomic' term='atomic'><term>Atomic</term> datatypes   
are those having values which are intrinsically indivisible.</termdef>   
</p>   
</item>   
<item>   
<p>   
<termdef id='dt-aggregate' term='aggregate'><term>Aggregate</term>   
datatypes are those having values which can be decomposed into two   
or more component values.</termdef>   
</p>   
</item>   
</ulist>   
<p>   
For example, a date that is represented as a single character   
string could be the value of an atomic <emph>date</emph> datatype; while   
another date represented as separate "month", "day" and "year"   
elements would be the value of an aggregate <emph>date</emph> datatype.   
Not surprisingly, the distinction is analogous to that between   
an XML element whose content model is #PCDATA and one with element content.   
</p>   
<p>   
As discussed above, this specification focuses mainly on atomic datatypes.   
 Later versions will address aggregate datatypes in more detail. Note that   
the legacy XML attribute types <specref ref='IDREFS'/>, <specref ref='ENTITIES'/>   
and <specref ref='NMTOKENS'/> can be thought of as aggregate (list)   
types.   
</p>   
<p>   
A datatype which is atomic in this specification need not   
be an "atomic" datatype in any programming language used to implement   
this specification.   
</p>   
</div3>   
<div3 id='primitive-vs-generated'>   
<head>Primitive vs. generated datatypes</head>   
<ulist>   
<item>   
<p>   
<termdef id='dt-primitive' term='primitive'><term>Primitive</term>   
datatypes are those that are not defined in terms of other datatypes;   
they exist <emph>ab initio</emph>.</termdef>   
</p>   
</item>   
<item>   
<p>   
<termdef id='dt-generated' term='generated'><term>Generated</term>   
datatypes are those that are defined in terms of other datatypes.</termdef>   
</p>   
</item>   
</ulist>   
<p>   
For example, a <specref ref='real'/> is a well defined mathematical   
concept that cannot be defined in terms of other datatypes while   
a <specref ref='date'/> is a special case of the more general datatype
<specref ref='recurringInstant'/>.   
</p>   
<p>   
The datatypes defined by this specification fall into both   
the primitive and the generated categories.  It is felt that a judiciously   
chosen set of primitive datatypes will serve the widest possible audience   
by providing a set of convenient datatypes that can be used as is, as well   
as providing a rich enough base from which the variety of datatypes needed   
by schema designers can be generated.   
</p>   
<p>   
A datatype which is primitive in this specification need not   
be a "primitive" datatype in any programming language used to implement   
this specification.   
</p>   
<div4 id='basetype'>   
<head>Base type</head>   
<p>   
<termdef id='dt-basetype' term='base type'>Every generated   
datatype is defined in terms of an existing datatype, referred to as   
the <term>base type</term>.  Base types may be either primitive or generated.</termdef>   
</p>   
<p>   
<termdef id='dt-subtype' term='subtype'>In the example above, <specref ref='date'/>   
is referred to as a <term>subtype</term> of the base type <specref ref='recurringInstant'/>.   
The value space of a subtype is a subset of the value space of the base type.</termdef>   
</p>   
</div4>   
</div3>   
<div3 id='built-in-vs-user-generated'>   
<head>Built-in vs. user-generated datatypes</head>   
<ulist>   
<item>   
<p>   
<termdef id='dt-built-in' term='built-in'><term>Built-in</term>   
datatypes are those which are entirely defined in this specification,   
and may be either primitive or generated;</termdef>   
</p>   
</item>   
<item>   
<p>   
<termdef id='dt-user-generated' term='user-generated'><term>User-generated</term>   
datatypes are those generated datatypes that are defined by individual schema   
designers by giving values to constraining facets.</termdef>   
</p>   
</item>   
</ulist>   
<p>   
Conceptually there is no difference between the built-in generated   
datatypes included in this specification and the user-generated   
datatypes which will be created by individual schema designers.   
The built-in generated datatypes are those which are believed to   
be so common that if they were not defined in this specification   
many schema designers would end up "reinventing" them.  Furthermore,   
including these generated datatypes in this specification   
serves to demonstrate the mechanics and utility of the datatype   
generation facilities of this specification.   
</p>   
<p>   
A datatype which is built-in in this specification need not   
be a "built-in" datatype in any programming language used to implement   
this specification.   
</p>   
</div3>   
</div2>   
</div1>   
<div1 id='built-in-datatypes'>   
<head>Built-in datatypes</head>   
   
<div2 id='namespaces'>   
<head>Namespace considerations</head>   
<p>   
The built-in datatypes defined by this specification are designed so   
that systems other than the XML Schema Definition Language may access them.   
To facilitate such usage, the built-in datatypes in this specification come   
from the XML Datatype Language namespace, the specific namespace defined by   
this specification.  This applies to both built-in primitive and  built-in   
generated datatypes.   
</p>   
<ednote>   
<edtext>   
The exact URLs for the namespace(s) defined by this W3C specification   
is still an open issue. This issue has been raised with the XML Coordination   
Group (issue 1999-0201-07 Standardizing W3C namespace  URIs) for general   
coordination and resolution.  On August 11, Dan Connolly recommended   
we make up our own URL for datatypes.  See http://lists.w3.org/Archives/Member/w3c-xml-schema-ig/1999Aug/0060.html.    
</edtext>   
</ednote>   
<p>   
Each user-generated datatype is also associated with a unique namespace.   
However, user-generated datatypes do not come from the XML Datatype Language   
namespace; rather, they come from the namespace of the schema in which they   
are defined.  Note that associating a namespace with a user-generated datatype   
is not a general purpose extensibility mechanism and does not apply to primitive   
datatypes. Suppose a schema author wanted to introduce a new set of primitive   
datatypes, say a core set of mathematical datatypes not based on the Number   
datatype defined as a built-in primitive by this specification.  Such a schema   
author might try to define those datatypes, associate a unique namespace with   
them and expect schema processors to understand them. Unfortunately, such   
a scenario would not work.  Each such datatype would need specialized validation   
code and there are still many unresolved issues regarding standard mechanisms   
for sharing such code.   
</p>   
<p>   
As described in more detail in <specref ref='defining-generated-datatypes'/>,   
each user-generated datatype must be defined in terms of a base type included   
in this specification, by assigning facets which serve to constrain the value   
set of the user-generated datatype to a subset of the base type.  Such a mechanism   
works because all schema processors are required to be able to validate datatypes   
defined by subsetting the value space of a datatype included in this specification.   
</p>   
</div2>   
<div2 id='built-in-primitive-datatypes'>   
<head>Primitive datatypes</head>   
<p>   
The primitive datatypes are described below.  For each primitive   
datatype we discuss the fundamental facets, if any, and the constraining   
facets, if any.   
</p>   
<div3 id='NMTOKEN'>   
<head>NMTOKEN</head>   
<p>   
<termdef id='dt-NMTOKEN' term='NMTOKEN'>The <term>NMTOKEN</term> datatype represents the   
<xnt href='&xmlspec;#NT-TokenizedType'>NMTOKEN attribute type</xnt> from   
<bibref ref='XML'/>.   
The value space of <term>NMTOKEN</term> is the set of all tokens that match the   
<xnt href='&xmlspec;#NT-Nmtoken'>Nmtoken</xnt> production in <bibref ref='XML'/>.   
The lexical space of <term>NMTOKEN</term> is the set of all strings that match   
the <xnt href='&xmlspec;#NT-Nmtoken'>Nmtoken</xnt> production in <bibref ref='XML'/>   
</termdef>.   
</p>   
<p>   
<!--   
<term>NMTOKEN</term> has no fundamental or constraining facets.   
  -->   
For compatibility (see <specref ref='terminology'/>) this datatype should be used only on attributes.   
</p>   
<p>   
NMTOKEN has the following subtypes:   
</p>   
<ulist>   
<item><p><specref ref='Name'/></p></item>   
</ulist>   
</div3>   
<div3 id='NMTOKENS'>   
<head>NMTOKENS</head>   
<p>   
<termdef id='dt-NMTOKENS' term='NMTOKENS'>The <term>NMTOKENS</term> datatype represents the   
<xnt href='&xmlspec;#NT-TokenizedType'>NMTOKENS attribute type</xnt> from   
<bibref ref='XML'/>.  It consists of a null-separated list of NMTOKENs.   
The value space of <term>NMTOKENS</term> is the set of all tokens that match the   
<xnt href='&xmlspec;#NT-Nmtokens'>Nmtokens</xnt> production in <bibref ref='XML'/>.   
The lexical space of <term>ID</term> is the set of all strings that match   
the <xnt href='&xmlspec;#NT-Nmtokens'>Nmtokens</xnt> production in <bibref ref='XML'/>.</termdef>   
</p>   
<p>   
<term>NMTOKENS</term> has no fundamental or constraining facets.   
For compatibility (see <specref ref='terminology'/>) this datatype should be used only on attributes.   
</p>   
</div3>   
<div3 id='string'>   
<head>string</head>   
<p>   
<termdef id='dt-string' term='string'>The <term>string</term>   
datatype represents character strings in XML.  The value space   
of the string datatype is the set of finite sequences of UCS characters   
(<bibref ref='ISO10646'/> and <bibref ref='Unicode'/>).  A UCS   
character (or just character, for short) is an atomic unit of communication;   
it is not further specified except to note that every UCS character   
has a corresponding UCS code point, which is an integer.</termdef>   
</p>   
<ednote>   
<edtext>   
We need to harmonize this definition with the I18N character model.   
</edtext>   
</ednote>   
<div4 id='string-lexical-representation'>   
<head>Lexical Representation</head>   
<p>   
The <term>string</term> datatype has an optional constraining   
facet called <specref ref='lexical-representation'/>. The value of this   
facet is a <term>regular expression</term>.   
Regular expression constraints are discussed in Appendix <specref ref='regexs'/>.   
If this facet is not present, there is no restriction on the   
lexical representation.   
</p>   
</div4>   
<div4 id='string-length'>   
<head>Length</head>   
<p>   
The <term>string</term> datatype has an optional constraining   
facet called <specref ref='length'/>.  If length is specified we have   
a fixed length character string, whether <term>length</term> is   
measured in the number of characters in the string.   
If length is not specified we have a variable length character string.   
The value of the length facet must be a   
positive integer.   
</p>   
</div4>   
<div4 id='string-maxlength'>   
<head>Maximum Length</head>   
<p>   
The <term>string</term> datatype has an optional constraining   
facet called <specref ref='maxlength'/>.  If   
maxlength is specified for a variable length string it represents an   
upper bound of the length of the string. The value of the maxlength facet must be a   
positive integer.   
Both <specref ref='length'/>   
and <specref ref='maxlength'/> cannot be specified   
for the same datatype.  The absolute maximum length of variable length   
character strings depends on the XML parser implementation.   
</p>   
</div4>   
<div4 id='string-maxmin'>   
<head>Maximum and Minimum Values</head>   
<p>   
The <term>string</term> datatype also has the following constraining facets:   
</p>   
<ulist>   
<item><p>maxInclusive</p></item>   
<item><p>maxExclusive</p></item>   
<item><p>minInclusive</p></item>   
<item><p>minExclusive</p></item>   
</ulist>   
<p>   
Clearly, the effect of these constraining facets depends on   
the collating sequence used to define the <specref ref='order'/>   
property for strings.   
</p>   
<ednote>   
<edtext>   
The issue of collating sequences for strings is complex.  It will be discussed   
in detail in a subsequent version of this specification.   
</edtext>   
</ednote>   
</div4>   
</div3>   
<div3 id='boolean'>   
<head>boolean</head>   
<p>   
<termdef id='dt-boolean' term='boolean'>The  <term>boolean</term> datatype   
has the value space required to support	the mathematical concept of binary-valued   
logic: {true, false}.</termdef>   
</p>   
<div4 id='boolean-lexical-representation'>   
<head>Lexical Representation</head>   
<p>   
An instance of a datatype that is defined as <emph>boolean</emph>   
can have the following legal lexical values {true, false}.    
The lexical representation is fixed and cannot be changed.  The   
lexical representation facet is not supported.   
</p>   
</div4>   
</div3>   
<div3 id='real'>   
<head>real</head>   
<p>
<termdef id="dt-real" term="real">The <term>real</term> datatype
represents the standard mathematical concept of the real numbers.</termdef>
</p>
<p>
<term>real</term> has the following constraining facets:
</p>
<ulist>
<item><p><specref ref="minAbsoluteValue"/></p></item>
<item><p><specref ref="maxAbsoluteValue"/></p></item>
<item><p><specref ref="maxInclusive"/></p></item>
<item><p><specref ref="maxExclusive"/></p></item>
<item><p><specref ref="minInclusive"/></p></item>
<item><p><specref ref="minExclusive"/></p></item>
</ulist>
<p>
<term>real</term> has the following subtype:
</p>
<ulist>
<item><p><specref ref="decimal"/></p></item>
</ulist>
<div4 id="real-lexical-representation">
<head>Lexical representation</head>
<p>
<term>real</term> values have a single standard lexical representation consisting of
a mantissa followed, optionally, by the character "E" followed by an
exponent.  The exponent must be an integer.  The
mantissa must be a decimal number. The
representations for exponent and mantissa must follow the default
lexical rules for integer and decimal numbers discussed above.
If the "E" and the the following exponent are omitted, an exponent
value of 1 is assumed.     
For example: -1E4, 1267.43233E12, 12.78E-2, 12.
</p>
</div4>
</div3>
<div3 id="timeInstant">
<head>timeInstant</head>
<p>
<termdef id="dt-timeInstant" term="timeInstant">The <term>timeInstant</term>
datatype represents a combination of date and time values representing a single
instant of time, encoded as a single string.</termdef>
A single lexical representation, which is a subset of the lexical
representaions allowed by <bibref ref="ISO8601"/>, is allowed for <term>timeInstant</term>.
</p>
<issue id="non-gregorian-dates">
<p>
As an internationalization
issue, do we want support for non-gregorian dates?  This issue also
applies to <specref ref="timeDuration"/>, <specref ref="date"/> and <specref
ref="time"/>.
</p>
</issue>
<div4 id="timeInstant-lexical-repr">
<head>Lexical Representation</head>
<p>
The lexical representation for <term>timeInstant</term> is the <bibref ref="ISO8601"/> 
representation CCYYMMDDThhmmss.sss where "CC" represents the century, "YY"
the year, "MM" the month and "DD" the day.  The letter "T" is the
date/time separator and "hh", "mm", "ss.sss" represent hour, minute
and second respectively.  Note that this representation allows for fractional
seconds.
</p>
<ednote>
<edtext>
We need a more complete description of the lexical space, which,
for instance, makes it clear that seconds can be represented to
any precision desired, not just thousandths of a second.  This note
also applies to the lexical representations of <specref ref="timeDuration"/>,
<specref ref="recurringInstant"/> and <specref ref="time"/>.
</edtext>
</ednote>
<p>
This representation can be immediately followed by a "Z" to indicate
Coordinated Universal Time.  To indicate the time zone, i.e. the difference 
between the local time and Coordinated Universal Time, the difference
immediately follows the time and consists of a sign, + or -, followed
by hhmm.</p>
<p>
For example, to indicate 1:20 pm on May the 31st, 1999 for Eastern Daylight Time
which is 5 hours behind Coordinated Universal Time, one
would write: 19990531T132000-0500.
</p>
</div4>
<!--
<div4 id="timeInstant-truncated-repr">
<head>Truncated Representations</head>
<p>Left truncated forms of the above representation are used as the lexical
representation for the <specref ref="recurringInstant"/> datatype and
its subtype <specref ref="time"/>.
Right truncated forms of this representation are used as the lexical
representation for the datatype <specref ref="date"/>.</p>  
</div4>
  -->
</div3>
<div3 id="timeDuration">
<head>timeDuration</head>
<p>
<termdef id="dt-timeDuration" term="timeDuration">The <term>timeDuration</term>
datatype represents a combination of year, month, day and time values
representing a single duration of time, encoded as a single string.</termdef>
A single lexical representation, which is a subset of the lexical
representaions allowed by <bibref ref="ISO8601"/>, is allowed for <term>timeDuration</term>.
</p>
<div4 id="timeDuration-lexical-repr">
<head>Lexical Representation</head>
<p>
The lexical representation for <term>timeDuration</term> is the <bibref ref="ISO8601"/> 
representation CCYYMMDDThhmmss.sss, preceded by an optional sign (+ or -),
where "CC" represents the number of centuries, "YY" the number of years,
"MM" the number of months and "DD" the number of days.  The letter "T" is the
date/time separator and "hh", "mm", "ss.sss" represent number of hours, minutes
and seconds respectively.  Note that this representation allows for fractional
seconds.
</p>
<p>
For example, to indicate a duration of 1 year, 2 months, 3 days, 10
hours, and 30 minutes, one would write: 00010203T103000. 
</p>
<p>
Time periods, i.e. specific durations of time, can be represented by supplying
two items of information: a start instant and a duration or a start instant and
an end instant or an end instant and a duration.
</p>
</div4>
</div3>
<div3 id="recurringInstant">
<head>recurringInstant</head>
<p>
<termdef id="dt-recurringInstant" term="recurringInstant"> The <term>recurringInstant</term>
datatype represents an instant of time that recurs with a specific
<specref ref="timeDuration"/></termdef>.  Note that we do not attempt to support 
general recurring instants of time, just those that needed to support
the generated <specref ref="date"/> and <specref ref="time"/> datatypes and those
that arise from truncated and reduced lexical representations of
<specref ref="timeInstant"/>.
</p>
<p>
<term>recurringInstant</term> has a single constraining facet.
</p>
<ulist>
<item><p><specref ref="period"/></p></item>
</ulist>
<p>
which can be used to constrain the frequency of recurrence.  Values of
the period facet must be of type <specref ref="timeDuration"/>.
</p>
<p>
<term>recurringInstant</term> has the following subtypes:
</p>
<ulist>
<item><p><specref ref="date"/></p></item>
<item><p><specref ref="time"/></p></item>
</ulist>
<div4 id="recurringInstant-lexical-repr">
<head>Lexical Representation</head>
<p>
The lexical representation for <term>recurringInstant</term> is the left truncated
<bibref ref="ISO8601"/> representation for <specref ref="timeInstant"/>.
For example, if the century "CC" is omitted from the timeInstant
representation it means a timeInstant that recurs every hundred years.
<!--
Another way of interpreting such a representation is to assume it refers
to a single instant in the current century.
  -->
Similarly, if "CCYY" is omitted it designates a time instant that recurs
every year.
<!--
Alternatively, it designates a time instant for the current year.
  -->
</p>
<p>
Every two character "unit" of the representation that is omitted is indicated by
a single hyphen "-".  For example, to indicate 1:20 pm on May the 31st
every year, one would write write: --0531T132000-0500.
</p> 
</div4>
</div3>
<!--
<div3 id="recurringDuration">
<head>recurringDuration</head>
<p>
<termdef id="dt-recurringDuration" term="recurringDuration"> The
<term>recurringDuration</term> datatype represents an duration of time
that recurs with at a specific <specref ref="timeDuration"/>.  Note that we
do not attempt to support general recurring durations of time, just those
that we need to support the <specref ref="date"/> and <specref ref="time"/>
datatypes and those that arise from truncated lexical representations of
<specref ref="timeDuration"/>.
</p>
<div4 id="recurringDuration-lexical-repr">
<head>Lexical Representation</head>
<p>
The lexical representation for recurringDuration is the left truncated <bibref ref="ISO8601"/> 
representation for <specref ref="timeDuration"/>.
For example, if the century "CC" is omitted from the timeInstant
representation it means a timeDuration that recurs every hundred years.
Another way of interpreting such a representation is to assume it refers
to a single duration in the current century.  Similarly, if "CCYY" is omitted it designates a
time duration that recurs every year.  Alternatively, it designates 
a time instant for the current year.  
</p>
</div4>
</div3>
  --> 
<div3 id='binary'>   
<head>binary</head>   
<p>   
<termdef id='dt-term' term='binary'>The <term>binary</term>   
datatype represents strings (blobs) of binary data.</termdef>  It   
has three fundamental facets.  The optional <specref ref='length'/>   
facet specifies the length of the data in bits.  This defines a   
datatype with a fixed length. If the length is not specified, a   
datatype with variable length is specified .  In this case, the optional  <specref ref='maxlength'/>   
facet specifies the maximum length of the data in bits.  If the maximum length is not   
specified the default is unlimited length.  The optional "encoding"   
facet specifies the encoding which may be "hex" for hexadecimal digits   
or "base64" for MIME style Base64 data.   
</p>   
<issue id='application-specific-binary-formats'>   
<p>   
Should we add a facet to allow a binary datatype to be restricted    
to an application-specific format such as video, audio, image?   
</p>   
</issue>   
<issue id='binary-mime-type'>   
<p>   
should we add facet(s) for mime-type/subtype?   
</p>   
</issue>   
<issue id='binary-value-space'>   
<p>   
Is this really a datatype?  What is the value space of this datatype: the   
set of encoded strings or the set of binary streams after decoding?   
</p>   
</issue>   
</div3>   
<div3 id='uri'>   
<head>uri</head>   
<p>   
<termdef id='dt-uri' term='uri'>The <term>uri</term> datatype   
represents a Universal Resource Identifier (URI) Reference as defined   
in <bibref ref='RFC2396'/></termdef>. It has no fundamental or   
constraining facets.   
</p>   
<issue id='uri-scheme-facet'>   
<p>   
should we have a facet to allow a limitation to a specific scheme?  It   
might be useful to able to say that something was not only a URI, but that   
it was a "mailto" and not a "http://...".   
</p>   
</issue>   
</div3>   
<div3 id='language'>   
<head>language</head>   
<p>   
<termdef id='dt-language' term='language'>The <term>language</term> datatype   
represents natural language identifiers as defined by <bibref ref='RFC1766'/>   
The value space of <term>language</term> is the set of all tokens that match the   
<xnt href='&xmlspec;#NT-LanguageID'>LanguageID</xnt> production in <bibref ref='XML'/>.   
The lexical space of <term>language</term> is the set of all strings that match   
the <xnt href='&xmlspec;#NT-LanguageID'>LanguageID</xnt> production in   
<bibref ref='XML'/>.</termdef>   
</p>   
</div3>   
<!--   
     remove uuid datatype for now...   
	 we might reintroduce it later if we get EVERYTHING else done and   
	 there is still time   
   
<div3 id="uuid">   
<head>uuid</head>   
<p>   
<termdef id="dt-uuid" term="uuid">The <term>uuid</term> datatype   
represents Universally Unique IDentifiers as defined in <bibref ref="uuids"/>.   
The value space of the uuid datatype is the set of 128-bit sequences, groupped   
into 16 octects, where some bits of octet 8 (called the variant field)   
determine finer structure (see <bibref ref="uuids"/> for a complete description).   
The lexical space of the uuid datatype is the set of all 36 character   
strings where the 9th, 14th, 19th and 24th characters are "-" and all other   
characters are hexidecimal digits.</termdef>   
</p>   
<p>   
The <term>uuid</term> datatype has no fundamental or constraining facets.   
</p>   
</div3>   
  -->   
</div2>   
<div2 id='built-in-generated'>   
<head>Generated datatypes</head>   
<p>   
This section gives conceptual definitions for all built-in generated   
datatypes defined by this specification, including a description of   
the facets which apply to each datatype.  The <termref def='key-abstractSyntax'/>   
used to define generated datatypes (whether built-in or user-generated) is   
given in section <specref ref='defining-generated-datatypes'/> and the complete   
definitions of the built-in generated datatypes (written in the   
<termref def='key-concreteSyntax'/> based on that abstract syntax    
given in Appendix <specref ref='schema'/>) are provided in Appendix   
<specref ref='schema'/>.   
</p>   
<div3 id='Name'>   
<head>Name</head>   
<p>   
<termdef id='dt-Name' term='Name'>The <term>Name</term>   
datatype represents XML Names.  The value space of this datatype is   
the set of all tokens which match the <xspecref href='&xmlspec;#NT-Name'>   
Name</xspecref> production of <bibref ref='XML'/>.  The lexical space of   
this datatype is the set of all strings which match the   
<xnt href='&xmlspec;#NT-Name'>Name</xnt> production of   
<bibref ref='XML'/>.    The basetype of <term>Name</term>   
is <specref ref='NMTOKEN'/>.</termdef>   
</p>   
<p>   
Name has the following subtypes:   
</p>   
<ulist>   
<item><p><specref ref='NCName'/></p></item>   
<item><p><specref ref='ID'/></p></item>   
<item><p><specref ref='ENTITY'/></p></item>   
<item><p><specref ref='NOTATION'/></p></item>   
</ulist>   
</div3>   
<div3 id='NCName'>   
<head>NCName</head>   
<p>   
<termdef id='dt-NCName' term='NCName'>The <term>NCName</term>   
datatype represents XML "non-colonized" Names.  The value space of this datatype is   
the set of all tokens which match the <xspecref href='&xmlnsspec;#NT-NCName'>   
NCName</xspecref> production of <bibref ref='XMLNS'/>.  The lexical space of   
this datatype is the set of all strings which match the   
<xnt href='&xmlnsspec;#NT-NCName'>NCName</xnt> production of   
<bibref ref='XMLNS'/>.  The basetype of <term>NCName</term>   
is <specref ref='Name'/>. </termdef>   
</p>   
</div3>   
<div3 id='ID'>   
<head>ID</head>   
<p>   
<termdef id='dt-ID' term='ID'>The <term>ID</term> datatype represents the   
<xnt href='&xmlspec;#NT-TokenizedType'>ID attribute type</xnt> from   
<bibref ref='XML'/>.   
The value space of <term>ID</term> is the set of all tokens that match the   
<xnt href='&xmlspec;#NT-Name'>Name</xnt> production in <bibref ref='XML'/>   
and have been used in an XML document.  The lexical space of <term>ID</term>   
is the set of all strings that match the <xnt href='&xmlspec;#NT-Name'>   
Name</xnt> production in <bibref ref='XML'/>.  The basetype of <term>ID</term>   
is <specref ref='Name'/>.</termdef>   
</p>   
<p>   
<term>ID</term> has no fundamental or constraining facets.   
   
For compatibility (see <specref ref='terminology'/>) this datatype should be used only on attributes.    
</p>   
<constraintnote type='svc' id='id'>   
<head>ID Unique</head>
<p>   
An <term>ID</term> must not appear   
more than once in an XML document as a value of this type; i.e.,   
ID values must uniquely identify the elements which bear them.   
</p>   
</constraintnote>   
<p>   
ID has the following subtypes:   
</p>   
<ulist>   
<item><p><specref ref='IDREF'/></p></item>   
</ulist>   
<issue id='better-reference-mechanisms'>   
<p>   
There are several situations in which we need better reference mechanisms than those   
provided by ID and IDREF/IDREFS.  For example, it would be desirable to   
extend IDs and IDREFs to be typed and scoped to better represent primary key/foreign   
key relationships in a database. XSL has recently introduced the concept of   
xsl:key and xsl:keyref whereby a single property of an element can be used   
as a key.  We need such a mechanism for XML as a whole and it would be nice   
if this were extended to support multi-part keys.   
</p></issue>   
</div3>   
<div3 id='IDREF'>   
<head>IDREF</head>   
<p>   
<termdef id='dt-IDREF' term='IDREF'>The <term>IDREF</term> datatype represents the   
<xnt href='&xmlspec;#NT-TokenizedType'>IDREF attribute type</xnt> from   
<bibref ref='XML'/>.   
The value space of <term>IDREF</term> is the set of all tokens that match the   
<xnt href='&xmlspec;#NT-Name'>Name</xnt> production in <bibref ref='XML'/>   
and have been used in an XML document as the value of an element or attribute   
of type <term>ID</term>.  The lexical space of <term>IDREF</term>   
is the set of all strings that match the <xnt href='&xmlspec;#NT-Name'>   
Name</xnt> production in <bibref ref='XML'/>.  The basetype of <term>IDREF</term>   
is <specref ref='ID'/>.</termdef>   
</p>   
<p>   
<term>IDREF</term> has no fundamental or constraining facets.   
For compatibility (see <specref ref='terminology'/>) this datatype should be used only on attributes.   
</p>   
</div3>   
<div3 id='IDREFS'>   
<head>IDREFS</head>   
<p>   
<termdef id='dt-IDREFS' term='IDREFS'>The <term>IDREFS</term> datatype represents the   
<xnt href='&xmlspec;#NT-TokenizedType'>IDREFS attribute type</xnt> from   
<bibref ref='XML'/>.  It consists of a null-separated list of IDREFs.   
The value space of <term>IDREFS</term> is the set of all tokens that match the   
<xnt href='&xmlspec;#NT-Names'>Names</xnt> production in <bibref ref='XML'/>   
and have been used in an XML document as the value of an element or attribute   
of type <term>ID</term>.  The lexical space of <term>IDREFS</term>   
is the set of all strings that match the <xnt href='&xmlspec;#NT-Names'>   
Names</xnt> production in <bibref ref='XML'/>.     
</termdef></p>   
<p>   
<term>IDREFS</term> has no fundamental or constraining facets.   
For compatibility (see <specref ref='terminology'/>) this datatype should be used only on attributes.   
</p>   
</div3>   
<div3 id='ENTITY'>   
<head>ENTITY</head>   
<p>   
<termdef id='dt-ENTITY' term='ENTITY'>The <term>ENTITY</term> datatype represents the   
<xnt href='&xmlspec;#NT-TokenizedType'>ENTITY attribute type</xnt> from   
<bibref ref='XML'/>.   
The value space of <term>ENTITY</term> is the set of all tokens that match the   
<xnt href='&xmlspec;#NT-Name'>Name</xnt> production in <bibref ref='XML'/>   
and have been declared as an <xspecref href='&xsdl;#declare-entity'>Unparsed   
Entity</xspecref> in a schema.  The lexical space of <term>ENTITY</term>   
is the set of all strings that match the <xnt href='&xmlspec;#NT-Name'>   
Name</xnt> production in <bibref ref='XML'/>.  The basetype of <term>ENTITY</term>   
is <specref ref='Name'/>.</termdef>   
</p>   
<p>   
<term>ENTITY</term> has no fundamental or constraining facets.   
For compatibility (see <specref ref='terminology'/>) this datatype should be used only on attributes.   
</p>   
</div3>   
<div3 id='ENTITIES'>   
<head>ENTITIES</head>   
<p>   
<termdef id='dt-ENTITIES' term='ENTITIES'>The <term>ENTITIES</term> datatype   
represents the <xnt href='&xmlspec;#NT-TokenizedType'>ENTITIES attribute type</xnt> from   
<bibref ref='XML'/>. It consists of a null-separated list of ENTITYs.   
The value space of <term>ENTITIES</term> is the set of all   
tokens that match the <xnt href='&xmlspec;#NT-Name'>Name</xnt> production in   
<bibref ref='XML'/> and have been declared as an   
<xspecref href='&xsdl;#declare-entity'>Unparsed Entity</xspecref> in a schema.   
The lexical space of <term>ENTITIES</term> is the set of all strings that match   
the <xnt href='&xmlspec;#NT-Name'> Name</xnt> production in <bibref ref='XML'/>.</termdef>   
</p>   
<p>   
<term>ENTITIES</term> has no fundamental or constraining facets.   
For compatibility (see <specref ref='terminology'/>) this datatype should be used only on attributes.   
</p>   
</div3>   
   
<div3 id='NOTATION'>   
<head>NOTATION</head>   
<p>   
<termdef id='dt-NOTATION' term='NOTATION'>The <term>NOTATION</term> datatype represents the   
<xnt href='&xmlspec;#NT-NotationType'>NOTATION attribute type</xnt> from   
<bibref ref='XML'/>.   
The value space of <term>NOTATION</term> is the set of all   
<xspecref href='&xsdl;#declare-notation'>notations declared</xspecref> in   
a schema.   
The lexical space of <term>NOTATION</term>   
is the set of all strings that match the <xnt href='&xmlspec;#NT-Name'>   
Name</xnt> production in <bibref ref='XML'/>.  The basetype of <term>NOTATION</term>   
is <specref ref='Name'/>.</termdef>   
</p>   
<p>   
For compatibility (see <specref ref='terminology'/>) this datatype should be used only on attributes.   
</p>   
<div4 id='NOTATION-enumeration'>   
<head>enumeration</head>   
<p>   
This required facet is used to specify the list of notations.   
</p>   
<ednote>   
<name>PVB</name>   
<date>19990601</date>   
<edtext>   
this definition is NOT correct   
</edtext>   
</ednote>   
</div4>   
</div3>   
<div3 id='decimal'>   
<head>decimal</head>   
<p>   
<termdef id='dt-decimal' term='decimal'>The <term>decimal</term>   
datatype restricts allowable values to real numbers with an exact fractional   
part.</termdef>  The basetype of decimal is <specref ref='real'/>.   
</p>   
<p>   
Decimal has the following required fundamental facets:   
</p>   
<ulist>   
<item><p>precision: the total number of decimal digits in the number.</p></item>   
<item><p>scale: the number of decimal digits to the right the decimal point.  Must be   
	less than or equal to precision.</p></item>   
</ulist>   
<p>   
Decimal has the following constraining facets:   
</p>   
<ulist>   
<item><p>maxInclusive</p></item>   
<item><p>maxExclusive</p></item>   
<item><p>minInclusive</p></item>   
<item><p>minExclusive</p></item>   
</ulist>   
<p>   
decimal has the following subtypes:   
</p>   
<ulist>   
<item><p><specref ref='integer'/></p></item>   
</ulist>   
<div4 id='decimal-lexical-representation'>   
<head>Lexical representation</head>   
<p>   
Decimal values have a single standard lexical representation.   
This consists of a string of digits separated by a period   
as a decimal indicator, in accordance with the scale and precision facets,   
 with an optional leading sign to indicate a negative number. If the sign is   
omitted, "+" is assumed.  Leading and trailing zeroes are optional.    
 For example: -1.23,   
12678967.543233, 100000.00.    
</p>   
</div4>   
</div3>   
<div3 id='integer'>   
<head>integer</head>   
<p>   
<termdef id='dt-integer' term='integer'>The <term>integer</term>   
datatype is the standard mathematical concept of the integer numbers.   
The basetype of integer is <specref ref='decimal'/>.   
The value space of the integer datatype is the infinite set   
{-&infin;,...,-2,-1,0,1,2,...,&infin;}</termdef> although computer implementations   
restrict this to a finite set.   
</p>   
<p>   
Integer has the following constraining facets:   
</p>   
<ulist>   
<item><p>maxInclusive</p></item>   
<item><p>maxExclusive</p></item>   
<item><p>minInclusive</p></item>   
<item><p>minExclusive</p></item>   
</ulist>   
<p>   
integer has the following subtypes:   
</p>   
<ulist>   
<item><p><specref ref='non-negative-integer'/></p></item>   
<item><p><specref ref='non-positive-integer'/></p></item>   
</ulist>   
<div4 id='integer-lexical-representation'>   
<head>Lexical representation</head>   
<p>   
Integer values have a single, standard lexical representation.   
This consists of a string of digits with an optional leading sign.   
If the sign is omitted, "+" is assumed.  For example: -1, 0,   
12678967543233, +100000.   
</p>   
<!-- <p>   
This facet must be specified if other lexical representations are desired   
such as the European format that allows periods after every three   
digits or surrounding parentheses to indicate a negative integer.   
</p> -->   
</div4>   
</div3>   
<div3 id='non-negative-integer'>   
<head>non-negative-integer</head>   
<p>   
<termdef id='dt-non-negative-integer' term='non-negative-integer'>The   
<term>non-negative-integer</term>   
datatype is the standard mathematical concept of the non-negative integers.   
The value space of the non-negative-integer datatype is the infinite set   
{0,1,2,...,&infin;}</termdef> although computer implementations   
restrict this to a finite set. The basetype of integer is <specref ref='integer'/>.   
</p>   
<p>   
non-negative-integer has the following constraining facets:   
</p>   
<ulist>   
<item><p>maxInclusive</p></item>   
<item><p>maxExclusive</p></item>   
<item><p>minInclusive</p></item>   
<item><p>minExclusive</p></item>   
</ulist>   
<p>   
non-negative-integer has the following subtypes:   
</p>   
<ulist>   
<item><p><specref ref='positive-integer'/></p></item>   
</ulist>   
<div4 id='non-negative-integer-lexical-representation'>   
<head>Lexical representation</head>   
<p>   
Non-negative-integer values have a single, standard lexical representation.   
This consists of a string of digits with an optional leading "+" sign.   
If the sign is omitted, "+" is assumed.  For example: 1, 0,   
12678967543233, +100000.    
</p>   
</div4>   
</div3>   
<div3 id='positive-integer'>   
<head>positive-integer</head>   
<p>   
<termdef id='dt-positive-integer' term='positive-integer'>The   
<term>positive-integer</term>   
datatype is the standard mathematical concept of the positive integers.   
The value space of the positive-integer datatype is the infinite set   
{1,2,...,&infin;}</termdef> although computer implementations   
restrict this to a finite set. The basetype of integer is <specref ref='non-negative-integer'/>.   
</p>   
<p>   
positive-integer has the following constraining facets:   
</p>   
<ulist>   
<item><p>maxInclusive</p></item>   
<item><p>maxExclusive</p></item>   
<item><p>minInclusive</p></item>   
<item><p>minExclusive</p></item>   
</ulist>   
<div4 id='positive-integer-lexical-representation'>   
<head>Lexical representation</head>   
<p>   
positive-integer values have a single, standard lexical representation.   
This consists of a string of digits with an optional leading "+" sign.   
For example: 1, 12678967543233, +100000.    
</p>   
</div4>   
</div3>   
<div3 id='non-positive-integer'>   
<head>non-positive-integer</head>   
<p>   
<termdef id='dt-non-positive-integer' term='non-positive-integer'>The   
<term>non-positive-integer</term>   
datatype is the standard mathematical concept of the non-positive integers.   
The value space of the non-positive-integer datatype is the infinite set   
{-&infin;,...,-2,-1,0}</termdef> although computer implementations   
restrict this to a finite set. The basetype of integer is <specref ref='integer'/>.   
</p>   
<p>   
non-positive-integer has the following constraining facets:   
</p>   
<ulist>   
<item><p>maxInclusive</p></item>   
<item><p>maxExclusive</p></item>   
<item><p>minInclusive</p></item>   
<item><p>minExclusive</p></item>   
</ulist>   
<p>   
non-positive-integer has the following subtypes:   
</p>   
<ulist>   
<item><p><specref ref='negative-integer'/></p></item>   
</ulist>   
<div4 id='non-positive-integer-lexical-representation'>   
<head>Lexical representation</head>   
<p>   
Non-positive-integer values have a single, standard lexical representation.   
This consists of a string of digits with a leading "-" sign.   
For example: -1, 0, -12678967543233, -100000.    
</p>   
</div4>   
</div3>   
<div3 id='negative-integer'>   
<head>negative-integer</head>   
<p>   
<termdef id='dt-negative-integer' term='negative-integer'>The   
<term>negative-integer</term>   
datatype is the standard mathematical concept of the negative integers.   
The value space of the negative-integer datatype is the infinite set   
{-&infin;,...,-2,-1}</termdef> although computer implementations   
restrict this to a finite set. The basetype of integer   
is <specref ref='non-positive-integer'/>.   
</p>   
<p>   
negative-integer has the following constraining facets:   
</p>   
<ulist>   
<item><p>maxInclusive</p></item>   
<item><p>maxExclusive</p></item>   
<item><p>minInclusive</p></item>   
<item><p>minExclusive</p></item>   
</ulist>   
<div4 id='negative-integer-lexical-representation'>   
<head>Lexical representation</head>   
<p>   
negative-integer values have a single, standard lexical representation.   
This consists of a string of digits with a leading "-" sign.   
For example: -1, -12678967543233, -100000.    
</p>   
</div4>   
</div3>
<div3 id="date">
<head>date</head>
<p>
<termdef id="dt-date" term="date"/>The <term>date</term> datatype
represents a <specref ref="timeDuration"/> that starts at midnight
of a specified day and lasts for 24 hours. The basetype of <term>date</term> is
<specref ref="recurringInstant"/>. <term>date</term> is generated from
<specref ref="recurringInstant"/> by setting the value of the
<term>period</term> facet equal to 24 hours.
</p>
<div4 id="date-lexical-repr">
<head>Lexical Representation</head>
<p>
The lexical representation for <term>date</term> is the reduced (right truncated)
lexical representation for <specref ref="recurringInstant"/>: CCYYMMDD.
For example, to indicate May the 31st, 1999, one would write: 19990531.
</p>
<p>
Left truncated
representations can be used to represent recurring dates.  If the CC is
omitted it signifies a date that occurs every century.  If the YY is
omitted it signifies a date every year and so on. Every two character "unit" of the
representation that is omitted is indicated by a single hyphen "-".
For example, ---05 signifies the fifth day of every month.
</p>
</div4>
</div3>
<div3 id="time">
<head>time</head>
<p>
<termdef id="dt-time" term="time">The <term>time</term> datatype
represents a recurring instant of time that recurs every day.
The basetype of time is <specref ref="recurringInstant"/></termdef>.
The <term>time</term> datatype can be considered to be a shorthand to designate a
specific truncated representation for <specref ref="recurringInstant"/>.
<term>time</term> is generated from <specref ref="recurringInstant"/> by
setting the value of the <emph>period</emph> facet equal to 24 hours.
</p>
<div4 id="time-lexical-repr">
<head>Lexical Representation</head>
<p>
The lexical representation for <term>time</term> is the left truncated lexical
representation for <specref ref="timeInstant"/>: hhmmss.sss.
For example, to indicate 1:20 pm for Eastern Daylight Time
which is 5 hours behind Coordinated Universal Time, one
would write: 132000-0500.
</p>
</div4>
</div3>
</div2>   
</div1>   
<div1 id='defining-generated-datatypes'>   
<head>Defining Generated Datatypes</head>   
<p>   
A generated datatype can be defined from a primitive datatype (or another   
generated datatype)   
by adding optional constraining facets.  For example, it may be useful   
to define a datatype called <emph>i4</emph> (signed 4-byte integer) from the built-in   
datatype <emph>integer</emph> by supplying <emph>maxInclusive</emph>   
and <emph>minInclusive</emph> facets.  In this case, <emph>i4</emph>   
is the name of the new user-generated datatype, <emph>integer</emph> is its   
base type and <emph>maxInclusive</emph> and <emph>minInclusive</emph> are the   
constraining facets.   
</p>   
<note role='example'>   
<eg><![CDATA[<datatype name="i4">   
   <basetype name="integer"/>   
   <minInclusive>   
      2147483648   
   </minInclusive>   
   <maxInclusive>   
      -2147483648   
   </maxInclusive>   
</datatype>]]></eg>   
</note>   
<p>   
This section defines the abstract syntax used for defining generated datatypes.     
This abstract syntax is used for defining both <specref ref='built-in-generated'/>   
and user-generated datatypes; the only difference between the built-in and user-generated   
datatypes being that the datatype definitions for built-in generated datatypes are   
included in the <specref ref='schema'/> while the datatype definitions for   
user-generated datatypes appear in schemas written by users.   
</p>   
<p>   
<termdef id='key-abstractSyntax' term='abstract syntax'> An <term>abstract   
syntax</term> provides a formal specification of the information   
provided for each generated datatype definition. </termdef>   
The abstract syntax is presented using a simplified BNF. Defined terms are to   
the left. Their components are to the right, with a small amount of   
meta-syntax: ()s for grouping, | to separate alternatives, ? for optionality, *   
and + for iteration.   
</p>   
<p>   
<termdef id='key-concreteSyntax' term='concrete syntax'>The <term>concrete   
syntax</term> for generated datatype definitions is the exact   
element and attribute names used in definitions.</termdef>.   
The concrete syntax is a key feature of its proposed design.   
The concrete syntax is the form in which the schema   
language is used by datatype designers. Though its elements   
and attributes are often different from the terms of the abstract   
syntax bnf, the features and expressive power of the two are congruent.   
</p>   
<p>   
We include a preliminary concrete syntax in this draft, via examples, as well as   
in <specref ref='schema'/> (defined using the schema language of   
<bibref ref='structural-schemas'/>) and <specref ref='dtd-for-datatypeDefs'/>.   
The emphasis in this version has been to stay quite close to the abstract syntax.   
</p>   
<ednote>   
<edtext>   
The abstract syntax proposed here (and hence, the concrete syntax) are   
preliminary, as they allow datatype definitions which are logically inconsistent   
(e.g., they allow numeric facets on non-numeric datatypes). This will be corrected   
in future drafts, as the XML Schema language comes to allow the specification   
of tighter constraints.   
</edtext>   
</ednote>   
<ednote>   
<edtext>   
This section needs  more explanatory text describing the productions and   
their relationship to the conceptual framework described in sections   
<specref ref='typesystem'/> and <specref ref='built-in-datatypes'/>.   
</edtext>   
</ednote>   
<scrap>   
<head>Datatype definitions</head>   
<prod id='nt-datatypedef'>   
<lhs>datatypeDefn</lhs>   
<rhs>   
	<xnt href='&xmlnsspec;#NT-NCName'>NCName</xnt>   
	<nt def='nt-basetype'>basetype</nt>   
	<nt def='nt-facets'>facets</nt>   
</rhs>   
<constraint def='uniquename'/>   
</prod>   
<prod id='nt-basetype'>   
<lhs>basetype</lhs>   
<rhs>   
	<nt def='nt-datatypename'>datatypename</nt>   
</rhs>   
</prod>   
<prod id='nt-facets'>   
<lhs>facets</lhs>   
<rhs>   
	<nt def='nt-ordered'>ordered</nt>?    
	<nt def='nt-unordered'>unordered</nt>?
</rhs>   
<constraint def='c-basetype'/>   
</prod>   
</scrap>   
<p>   
The following is the definition for a possible built-in   
generated datatype "currency".  This datatype definition   
would appear in the schema which defines datatypes for XML Schemas   
and shows that a generated datatype can have the same   
value space as its basetype, which might mean that   
it is just an "alias" or "renaming" of basetype.  In this case,   
the specification would probably also define some   
"semantics" for currency which went beyond those of decimal.   
</p>   
<note role='example'>   
<eg><![CDATA[<datatype name="currency">   
   <basetype name="decimal"/>   
</datatype>]]></eg>   
</note>   
<constraintnote id='uniquename' type='cos'>   
<head>Unique datatype definitions</head>   
<p>   
The name of the datatype being defined must be unique among the datatypes   
defined in the containing schema.   
</p>   
</constraintnote>   
<constraintnote id='c-basetype' type='cos'>   
<head>Appropriate facets</head>   
<p>   
If the value space of the basetype is ordered, then only ordered   
facets may appear in a datatype definition.   
</p>   
</constraintnote>   
<scrap>   
<head>Datatype names</head>   
<prod id='nt-datatypename'>   
<lhs>datatypename</lhs>   
<rhs>   
	<nt def='nt-builtinname'>builtinname</nt> |   
	<nt def='nt-usergenname'>usergenname</nt>   
</rhs>   
</prod>   
<prod id='nt-builtinname'>   
<lhs>builtinname</lhs>   
<rhs>Name | NCName |</rhs>   
<rhs>ID | IDREF |IDREFS |</rhs>   
<rhs>NMTOKEN | NMTOKENS |</rhs>   
<rhs>ENTITY | ENTITIES |</rhs>   
<rhs>string | uri |</rhs>
<rhs>timeInstant | timeDuration | recurringInstant</rhs>
<rhs>binary |</rhs>   
<rhs>real | decimal |integer |</rhs>   
<rhs>non-negative-integer | positive-integer |</rhs>   
<rhs>non-positive-integer | negative-integer |</rhs>   
<rhs>date | time | language</rhs>   
<!--   
					<constraint def="dtns"/>   
  -->   
</prod>   
<prod id='nt-usergenname'>   
<lhs>usergenname</lhs>   
<rhs>   
	<xnt href='&xmlnsspec;#NT-NCName'>NCName</xnt>   
	<xnt href='&xsdl;#nt-schemaRef'>schemaRef</xnt>   
<!--							   
							|   
						<xnt href="&xmlnsspec;#NT-QName">   
							QName</xnt>    
  -->   
</rhs>   
<constraint def='dtname'/>   
<!--   
					<constraint def="dtqname"/>   
  -->   
</prod>   
</scrap>   
<note>   
<p>   
The <nt def='nt-datatypename'>datatypename</nt> production   
above is not to be confused with that labeled   
<xnt href='&xsdl;/#nt-datatypeName'>datatypeName</xnt> in   
<bibref ref='structural-schemas'/>.   
</p>   
</note>   
<!--   
			<constraintnote id="dtns" type="dt">   
				<head>datatype namespace prefix</head>   
				<p>   
					The <bibref ref="XMLNS"/> prefix "dt" is used throughout the   
					productions and examples of this specification as the prefix   
					associated with the namespace defined by this specification.   
					A schema instance may define whatever prefix it desires as   
					being associated with the datatype namespace, in which case   
					that prefix should be used in <nt def="builtinname"/>.   
				</p>   
			</constraintnote>   
  -->   
<constraintnote id='dtname' type='cos'>   
<head>Datatype name</head>   
<p>   
The name specified must be the name of a datatype defined in   
the schema in which the user-generated datatype is defined.   
</p>   
</constraintnote>   
<!--   
			<constraintnote id="dtqname" type="dt">   
				<head>External datatype name</head>   
				<p>   
					The namesp   
ace qualified name specified must be the name of a datatype   
					defined in the schema associated with the namespace prefix   
					used in the qualified name.   
				</p>   
			</constraintnote>   
  -->   
<!--   
			<issue id="QNames-in-attribute-values">   
				<p>   
					This syntax uses namespace qualified names in attribute values,   
					which differs from the approach taken in XSDL.  Should it be   
					modified to be consist with that approach?   
				</p>   
			</issue>   
  -->   
<scrap>   
<head>Facets</head>   
<prod id='nt-ordered'>   
<lhs>ordered</lhs>   
<rhs>   
	<nt def='nt-bounds'>bounds</nt>?   
	<nt def='nt-numeric'>numeric</nt>?
</rhs>   
</prod>   
<prod id='nt-unordered'>   
<lhs>unordered</lhs>   
<rhs>   
	<nt def='nt-lexical'>lexicalRepresentation</nt>?   
	<nt def='nt-enumeration'>enumeration</nt>?
	<nt def='nt-length'>length</nt>?
	<nt def='nt-maxLength'>maxLength</nt>?
	<nt def='nt-encoding'>encoding</nt>?
</rhs>   
</prod>   
</scrap>   
<scrap>   
<head>Ordered facets</head>   
<prod id='nt-bounds'>   
<lhs>bounds</lhs>   
<rhs>    (<nt def='nt-minincl'>minInclusive</nt> | <nt def='nt-maxincl'>maxInclusive</nt>)?   
         (<nt def='nt-minExcl'>minExclusive</nt> | <nt def='nt-maxExcl'>maxExclusive</nt>)?   
</rhs>   
</prod>   
<prod id='nt-maxincl'>   
<lhs>maxInclusive</lhs>   
<rhs>   
	<nt def='nt-literal-value'>literalValue</nt>   
</rhs>   
<constraint def='literal-type'/>   
</prod>   
<prod id='nt-minincl'>   
<lhs>minInclusive</lhs>   
<rhs>   
	<nt def='nt-literal-value'>literalValue</nt>   
</rhs>   
<constraint def='literal-type'/>   
</prod>   
<prod id='nt-maxExcl'>   
<lhs>minExclusive</lhs>   
<rhs>   
	<nt def='nt-literal-value'>literalValue</nt>   
</rhs>   
<constraint def='literal-type'/>   
</prod>   
<prod id='nt-minExcl'>   
<lhs>maxExclusive</lhs>   
<rhs>   
	<nt def='nt-literal-value'>literalValue</nt>   
</rhs>   
<constraint def='literal-type'/>   
</prod>   
</scrap>   
<constraintnote id='literal-type' type='cos'>   
<head>Literal type</head>   
<p>   
The literal value give must be of the same type as the datatype   
as the basetype given in the datatype definition in which this facet   
appears.   
</p>   
</constraintnote>   
<scrap>   
<head>Numeric facets</head>   
<prod id='nt-numeric'>   
<lhs>numeric</lhs>   
<rhs>   
	(<nt def='nt-minAbsoluteValue'>minAbsoluteValue</nt>
	<nt def='nt-maxAbsoluteValue'>maxAbsoluteValue</nt>)?
</rhs>
<rhs>
	<nt def='nt-precision'>precision</nt>?   
	<nt def='nt-scale'>scale</nt>?
</rhs>   
</prod>
<prod id='nt-minAbsoluteValue'>   
<lhs>minAbsoluteValue</lhs>   
<rhs>   
	<nt def='nt-real-literal'>realLiteral</nt>   
</rhs>   
<constraint def='minMaxAbsoluteValue'/>
</prod>
<prod id='nt-maxAbsoluteValue'>   
<lhs>maxAbsoluteValue</lhs>   
<rhs>   
	<nt def='nt-real-literal'>realLiteral</nt>   
</rhs>   
<constraint def='minMaxAbsoluteValue'/>
</prod>   
<prod id='nt-precision'>   
<lhs>precision</lhs>   
<rhs>   
	<nt def='nt-int-literal'>integerLiteral</nt>   
</rhs>   
</prod>   
<prod id='nt-scale'>   
<lhs>scale</lhs>   
<rhs>   
	<nt def='nt-int-literal'>integerLiteral</nt>   
</rhs>   
</prod>   
</scrap>   
<constraintnote id="minMaxAbsoluteValue" type='cos'>
<head>minMaxAbsoluteValue</head>
<p>
In a generated subtype of <specref ref='real'/>, if a value is specified
for the <term>minAbsoluteValue</term> facet a value must also be specified
for the <term>maxAbsoluteValue</term> facet.
</p>
</constraintnote>
<p>   
The following is the definition of a user-generated datatype   
which could be used to represent monetary amounts, such as in a financial   
management application which generally do not have figures above $1M   
and only allow whole cents. This definition would appear in a schema   
authored by an "end-user" and shows how to define a datatype by specifying   
facet values which constrain the range of the basetype in a manner   
specific to the basetype (different than specifying max/min values as before)   
</p>   
<note role="example">
<eg><![CDATA[<datatype name="ieee32">
   <basetype name="real"/>
   <minAbsoluteValue>
      1.40239846e-45
   </minAbsoluteValue>
   <maxAbsoluteValue>
      3.40282347e38
   </maxAbsoluteValue>
</datatype>]]></eg>
<p>
The above subtype of <term>real</term> represents an IEEE 32-bit floating.
While the explanation is beyond the scope of this specification, the above
minimum and maximum absolute values correspond to values which are representable
with the 32-bit floating point format, which has 1 bit for sign, 8 bits of
exponent and 23 bits of mantissa.
</p>
</note>
<note role="example">
<eg><![CDATA[<datatype name="ieee64">
   <basetype name="real"/>
   <minAbsoluteValue>
      4.90465645841246544e-324
   </minAbsoluteValue>
   <maxAbsoluteValue>
      1.79769313486231570e308
   </maxAbsoluteValue>
</datatype>]]></eg>
<p>
The above subtype of <term>real</term> represents an IEEE 64-bit floating point number.
While the explanation is beyond the scope of this specification, the above
minimum and maximum absolute values correspond to values which are representable
with the IEEE 64-bit floating point format, which has 1 bit for sign, 11 bits of
exponent and 52 bits of mantissa.
</p>
</note>
<note role="example">
<eg><![CDATA[<datatype name="ibmhex32">
   <basetype name="real"/>
   <minAbsoluteValue>
      5.2e-85
   </minAbsoluteValue>
   <maxAbsoluteValue>
      7.2e75
   </maxAbsoluteValue>
</datatype>]]></eg>
<p>
The above subtype of <term>real</term> represents an IEEE 32-bit floating point number.
While the explanation is beyond the scope of this specification, the above
minimum and maximum absolute values correspond to values which are representable
with the IBM 32-bit hexidecimal floating point format, which has 1 bit for sign, 8 bits of
exponent and 23 bits of mantissa.
</p>
</note>

<note role='example'>   
<p>   
This type could just as well have been defined with the   
potential built-in generated type "currency" (defined above) as its   
basetype.   
</p>   
<eg><![CDATA[<datatype name="amount">   
   <basetype name="decimal"/>   
   <precision>   
      8   
   </precision>   
   <scale>   
      2   
   </scale>   
</datatype>]]></eg>   
</note>   
<scrap>   
<head>Unordered facets</head>   
<prod id='nt-length'>   
<lhs>length</lhs>   
<rhs>   
	<nt def='nt-int-literal'>integerLiteral</nt>   
</rhs>   
</prod>   
<prod id='nt-maxLength'>   
<lhs>maxLength</lhs>   
<rhs>   
	<nt def='nt-int-literal'>integerLiteral</nt>   
</rhs>   
</prod>   
<prod id='nt-enumeration'>   
<lhs>enumeration</lhs>   
<rhs>   
	<nt def='nt-literal'>literal</nt>+   
</rhs>   
</prod>   
<prod id='nt-lexicalRep'>   
<lhs>lexicalRepresentation</lhs>   
<rhs>   
	<nt def='nt-lexical'>lexical</nt>+   
</rhs>   
</prod>   
<prod id='nt-lexical'>   
<lhs>lexical</lhs>   
<rhs>   
	lexicalSpec
</rhs>   
<constraint def='lex-spec'/>   
</prod>
<prod id='nt-encoding'>
<lhs>encoding</lhs>
<rhs>
	'hex' | 'base64'
</rhs>
</prod>
</scrap>   
<constraintnote id='lex-spec' type='cos'>   
<head>Lexical specification</head>   
<p>   
The lexical specification must be of the "correct" kind, i.e.,   
a <term>real</term> lexical specification for datatypes generated from
<specref ref='real'/>.   
</p>   
</constraintnote> 
<p>   
The following example is a datatype definition for a user-generated   
datatype which limits the possible literal values of dates to the four   
US holidays enumerated. This datatype definition would appear in a schema   
authored by an "end-user" and shows how to define a datatype by enumerating   
the values in its value space.  The enumerated values must be   
type-valid literals for the basetype.   
</p>   
<note role='example'>   
<eg><![CDATA[<datatype name="holidays">   
   <basetype name="date"/>   
   <enumeration>   
      <literal>   
        --0101    <!-- New Year's day -->   
      </literal>   
      <literal>   
        --0704    <!-- 4th of July -->   
      </literal>   
      <literal>   
        --1125    <!-- Thanksgiving -->   
      </literal>   
      <literal>   
        --1225    <!-- Christmas -->   
      </literal>   
   </enumeration>   
</datatype>]]></eg>   
</note>   
<scrap>   
<head>Literals</head>   
<prod id='nt-literal'>   
<lhs>literal</lhs>   
<rhs>   
	<nt def='nt-literal-value'>literalValue</nt>   
</rhs>   
</prod>   
<prod id='nt-literal-value'>   
<lhs>literalValue</lhs>   
<rhs>   
	<nt def='nt-string-literal'>stringLiteral</nt> |   
	<nt def='nt-numeric-literal'>numericLiteral</nt> |   
	<nt def='nt-dateTime-literal'>dateTimeLiteral</nt> |
	<nt def='nt-uri-literal'>uriLiteral</nt> |   
	<nt def='nt-language-literal'>languageLiteral</nt><!-- |   
	<nt def="nt-uuid-literal">uuidLiteral</nt>-->   
</rhs>   
</prod>   
<prod id='nt-string-literal'>   
<lhs>stringLiteral</lhs>   
<rhs>   
	(see <specref ref='string'/>)   
</rhs>   
</prod>   
<prod id='nt-uri-literal'>   
<lhs>uriLiteral</lhs>   
<rhs>   
	(see <specref ref='uri'/>)   
</rhs>   
</prod>   
<prod id='nt-language-literal'>   
<lhs>languageLiteral</lhs>   
<rhs>   
	(see <specref ref='language'/>)   
</rhs>   
</prod>   
<!--   
<prod id="nt-uuid-literal">   
<lhs>uuidLiteral</lhs>   
<rhs>   
	<nt def="nt-hex">hex * 8</nt>   
	'-'   
	<nt def="nt-hex">hex * 4</nt>   
	'-'   
	<nt def="nt-hex">hex * 4</nt>   
	'-'   
	<nt def="nt-hex">hex * 4</nt>   
	'-'   
	<nt def="nt-hex">hex * 12</nt>   
</rhs>   
</prod>   
<prod id="nt-hex">   
<lhs>hex</lhs>   
<rhs>   
	<nt def="nt-digit">digit</nt> |   
</rhs>   
<rhs>   
	'a' | 'b' | 'c' | 'd' | 'e' | 'f' |   
</rhs>   
<rhs>   
	'A' | 'B' | 'C' | 'D' | 'E' | 'F' |   
</rhs>   
</prod>   
  -->   
</scrap>
<scrap>
<head>Numeric Literals</head>
<prod id='nt-numeric-literal'>   
<lhs>numericLiteral</lhs>   
<rhs>   
	<nt def='nt-real-literal'>realLiteral</nt> |   
	<nt def='nt-decimal-literal'>decimalLiteral</nt> |   
	<nt def='nt-int-literal'>integerLiteral</nt>   
</rhs>   
</prod>   
<prod id='nt-real-literal'>   
<lhs>realLiteral</lhs>   
<rhs>   
	(<nt def='nt-mantissa'>mantissa</nt>   
	<nt def='nt-exponent'>exponent</nt>?) |   
	NaN | INF | -INF   
</rhs>   
</prod>   
<prod id='nt-mantissa'>   
<lhs>mantissa</lhs>   
<rhs>   
	<nt def='nt-decimal-literal'>decimalLiteral</nt>   
</rhs>   
</prod>   
<prod id='nt-exponent'>   
<lhs>exponent</lhs>   
<rhs>   
	('E' | 'e')   
	<nt def='nt-int-literal'>integerLiteral</nt>   
</rhs>   
</prod>   
<prod id='nt-decimal-literal'>   
<lhs>decimalLiteral</lhs>   
<rhs>   
	(<nt def='nt-int-literal'>integerLiteral</nt>   
	('.' <nt def='nt-digit'>digit</nt>*)?) |   
	NaN | INF | -INF   
</rhs>   
</prod>   
<prod id='nt-int-literal'>   
<lhs>integerLiteral</lhs>   
<rhs>   
	(('+' | '-')?   
	<nt def='nt-digit'>digit</nt>+) |   
	NaN | INF | -INF   
</rhs>   
</prod>   
<!--   
<prod id="nt-sign">   
<lhs>sign</lhs>   
<rhs>'+' | '-'</rhs>   
</prod>   
  -->   
<!--   
<prod id="nt-digits">   
<lhs>digits</lhs>   
<rhs>   
	<nt def="nt-digit">digit</nt>?   
</rhs>   
</prod>   
  -->   
<prod id='nt-digit'>   
<lhs>digit</lhs>   
<rhs>   
'0' | '1' | '2' | '3' | '4' |   
</rhs>   
<rhs>   
'5' | '6' | '7' | '8' | '9'   
</rhs>   
</prod>   
</scrap>
<scrap>   
<head>Date and Time Literals</head>   
<prod id='nt-dateTime-literal'>   
<lhs>dateTimeLiteral</lhs>   
<rhs>   
	<nt def='timeInstantLiteral'>timeInstantLiteral</nt> |
	<nt def='timeInstantLiteral'>timeDurationLiteral</nt> |
	<nt def='timeInstantLiteral'>recurringInstantLiteral</nt> |
	<nt def='timeInstantLiteral'>dateLiteral</nt> |
	<nt def='timeInstantLiteral'>timeLiteral</nt>
</rhs>
</prod>
<prod id='timeInstantLiteral'>
<lhs>timeInstantLiteral</lhs>
<rhs>
	<nt def='dateLiteral'>dateLiteral</nt>
	'T'
	<nt def='timeLiteral'>timeLiteral</nt>
</rhs>
</prod>
<prod id='timeDurationLiteral'>
<lhs>timeDurationLiteral</lhs>
<rhs>
	<nt def='dateLiteral'>dateLiteral</nt>
	'T'
	<nt def='timeLiteral'>timeLiteral</nt>
</rhs>
</prod>
<prod id='recurringInstantLiteral'>
<lhs>recurringInstantLiteral</lhs>
<rhs>
	<nt def='dateLiteral'>dateLiteral</nt>
	'T'
	<nt def='timeLiteral'>timeLiteral</nt>
</rhs>
</prod>
<prod id='dateLiteral'>
<lhs>dateLiteral</lhs>
<rhs>
	CCYYMMDD
</rhs>
</prod>
<prod id='timeLiteral'>
<lhs>timeLiteral</lhs>
<rhs>
	hhmmss.sss
	<nt def='timeZoneOffset'>timeZoneOffset</nt>?
</rhs>
</prod>
<prod id='timeZoneOffset'>
<lhs>timeZoneOffset</lhs>
<rhs>
	'Z' | (('+' | '-') hhmmss (.sss?))
</rhs>
</prod>
</scrap>
<issue id='definition-overriding'>   
<p>   
In some cases it may be desirable to specify datatype constraints in   
instance documents rather than in a schema.  Should this be allowed?   
   
If the document does not have a schema then, clearly, the only   
possibility of adding datatype constraints is in the document instance.   
Even if the document has a schema the document instance may want to   
further restrict the content.  For example, the schema may specify a value   
to be a string but the instance may want to impose a particular   
regex constraint on it.   
   
If we decide to allow datatype specification or specialization in   
instance document what syntax should be used?   
This needs to be coordinated with   
the structural schema editorial team.   
</p>   
</issue>   
<issue id='non-positive-integer-literal'>   
<p>   
Do we need productions for the literals of non-negative-integer,   
positive-integer, non-positive-integer and negative-integer?   
</p>   
</issue>   
</div1>   
<div1 id='conformance'>   
<head>Conformance</head>   
<ednote>   
<edtext>   
This section (both its abstract content and its concrete wording)   
has not yet garnered consensus among WG members.   
</edtext>   
</ednote>   
<p>   
The XML specification <bibref ref='XML'/> defines two levels of conformance.   
Well-formed documents conform to valid XML syntax but may or may not obey   
the constraints defined by a DTD. Valid  XML documents conform to the structure   
laid down in a DTD.  Thus, if a DTD defines an attribute as an ID, instances   
of XML documents conforming to the DTD can only be valid if the values of   
such attributes are valid XML names and are unique in the document. By   
introducing   
additional datatypes to XML, this specification extends the notion of validity   
in the sense that values defined to have a certain datatype in the schema   
must conform to the lexical representations allowed for that datatype.   
Values that do not conform to the datatype defined for them in the schema   
raise a conformance error.  As, for example, the appearance of a   
letter in a value defined as "integer".  Similarly, for a value   
defined as string with length facet equal to 5, a value of "ABC" would   
raise an error -- length too short -- as would a value of "abcdefgh" -- length   
too long.   
</p>   
<p>   
Since the datatypes discussed in this document can be used independently   
of XML Schema it is desirable that datatype conformance be specified   
as an independent, optional piece that other processors can use as   
they see fit.  To this end, we define the datatypes processor as a   
separate abstract interface.  Processors can call this interface with   
the value to be validated and the datatype, along with all its facets,   
that it should be validated against.  The processor will return a   
boolean value which will be true or false depending on whether the   
value is valid for the datatype or not.  If the value is valid it may   
also return a canonical representation for the value.  If the value is   
invalid the processor will return error information including the   
facets that caused the value to be declared invalid.   
</p>   
<p>   
User-generated datatypes are defined by giving values to certain, optional   
facets.  For example,  an integer within a certain range could be defined by   
giving values to maxInclusive and minInclusive facets.  A switch on the   
datatypes processor could be used to turn validation off for these facets.   
This could be used by a processor that used the datatypes processor to eliminate   
validation of user-genarated datatypes.   
</p>   
<p>   
If a particular processor, for reasons of speed or size decided not   
to validate datatypes, it can use a default stub interface.  This   
always returns <emph>true</emph>.   
</p>   
<p>   
It also needs to be said that there are no expressions on   
datatypes; neither are there operations on datatypes.   
</p>   
<p>   
If we decide to allow datatype specification or specialization in instance   
documents (see issue "definition-overriding" above) then   
validating XML processors should be able to validate the format   
of values in XML documents in these cases as well by using the   
datatypes processor.   
</p>   
</div1>   
   
</body><back>   
<div1 id='schema'>   
<head>Schema for Datatype Definitions (normative)</head>   
<ednote>   
<edtext>   
This section (both its abstract content and its concrete wording)   
has not yet garnered consensus among WG members.   
</edtext>   
</ednote>   
<eg><![CDATA[<?xml version='1.0'?>
<!-- $Id: datatypes.xml,v 1.1 1999/09/24 18:20:06 hugo Exp $ -->
<!DOCTYPE schema PUBLIC "-//W3C//DTD XMLSCHEMA 19990923//EN" "../structures/structures.dtd" >
<schema xmlns='http://www.w3.org/1999/09/23-xmlschema/' targetNS='http://www.w3.org/1999/09/23-xmlschema/datatypes/' version='0.4'>

  <element name='datatype'>
     <archetype order='all'>
        <element ref='basetype'/>
        <element archRef='maxBound' minOccurs='0'/>
        <element archRef='minBound' minOccurs='0'/>
        <element ref='minAbsoluteValue' minOccurs='0'/>
        <element ref='maxAbsoluteValue' minOccurs='0'/>
        <element ref='maxInclusive' minOccurs='0'/>
        <element ref='minInclusive' minOccurs='0'/>
        <element ref='precision' minOccurs='0'/>
        <element ref='scale' minOccurs='0'/>
        <element ref='length' minOccurs='0'/>
        <element ref='maxLength' minOccurs='0'/>
        <element ref='enumeration' minOccurs='0'/>
        <element ref='lexicalRepresentation' minOccurs='0'/>
        <element ref='encoding' minOccurs='0'/>
        <attribute name='name' type='NMTOKEN' minOccurs='1'/>
        <attribute name='export' type='boolean' default='true'/>
     </archetype>
  </element>

  <element name='basetype'>
     <archetype content='empty'>
        <attribute name='name' type='NMTOKEN' minOccurs='1'/>
        <attribute name='schemaAbbrev' type='NMTOKEN'/>
        <attribute name='schemaName' type='uri'/>
     </archetype>
  </element>

  <!-- these are here to bridge between the content model above
       and the elements below -->
  <archetype name='minBound'/>
  <archetype name='maxBound'/>

  <!-- these can only be applied when the base type is 'real'
       and must be used in concert with one another -->
  <element name='minAbsoluteValue' type='real'>
  <element name='maxAbsoluteValue' type='real'>
  
  <!-- the true datatype of the four following depends on the basetype -->
  <element name='maxExclusive' type='string'>
     <archetype>
       <refines name='maxBound'/>
     </archetype>
  </element>
  <element name='maxInclusive' type='string'>
     <archetype>
       <refines name='maxBound'/>
     </archetype>
  </element>
  <element name='minExclusive' type='string'>
     <archetype>
       <refines name='minBound'/>
     </archetype>
  </element>
  <element name='minInclusive' type='string'>
     <archetype>
       <refines name='minBound'/>
     </archetype>
  </element>

  <element name='precision' type='integer'/>
  <element name='scale' type='integer'/>

  <element name='length' type='integer'/>
  <element name='maxLength' type='integer'/>

  <!-- the following datatype is used to limit the
       possible values for the encoding facet on
	   the binary datatype -->
  <datatype name='encodings'>
     <basetype name='NMTOKEN'/>
	 <enumeration>
	    <literal>hex</literal>
		<literal>base64</literal>
     <enumeration>
  </datatype>
  <element name='encoding' type='encodings'/>

  <element name='period' type='timeDuration'/>

  <element name='enumeration'>
    <archetype>
       <element ref='literal' minOccurs='1' maxOccurs='*'/>
    </archetype>
  </element>
  <!-- the true datatype of the following depends on the basetype -->
  <element name='literal' type='string'/>

  <element name='lexicalRepresentation'>
     <archetype>
        <element ref='lexical' minOccurs='1' maxOccurs='*'/>
     </archetype>
  </element>
    <!-- the true datatype of the following depends on the basetype -->
  <element name='lexical' type='string'/>

<!-- built-in generated datatypes -->
<!-- only has a few for now, eventually needs to have all of them -->

  <datatype name='integer'>
    <basetype name='decimal'/>
    <scale>0</scale>
  </datatype>
	
  <datatype name='non-negative-integer'>
    <basetype name='integer'/>
    <minInclusive>0</minInclusive>
  </datatype>

  <datatype name='positive-integer'>
    <basetype name='non-negative-integer'/>
    <minInclusive>1</minInclusive>
  </datatype>

  <datatype name='non-positive-integer'>
    <basetype name='integer'/>
    <maxInclusive>0</maxInclusive>
  </datatype>

  <datatype name='negative-integer'>
    <basetype name='non-positive-integer'/>
    <maxInclusive>-1</maxInclusive>
  </datatype>

  <datatype name='date'>
    <basetype name='recurringInstant'/>
    <period>000000T2400</period>
  </datatype>

  <datatype name='time'>
    <basetype name='recurringInstant'/>
    <period>000000T2400</period>
  </datatype>
</schema>
]]></eg>   
</div1>   
<div1 id='dtd-for-datatypeDefs'>   
<head>DTD for Datatype Definitions (normative)</head>   
<ednote>   
<edtext>   
This section (both its abstract content and its concrete wording)   
has not yet garnered consensus among WG members.   
</edtext>   
</ednote>   
<eg><![CDATA[<!-- Note that the expansion of 'facets' below is less
     restrictive than that imposed by the XML Schema schema for
     datatypes:  There should in fact be no more than one of each of
     minInclusive, minExclusive, maxInclusive, maxExclusive,
     precision, scale, lexicalRepresentation, enumeration,
     length, maxLength within datatype -->
<!ENTITY % minBound '(minInclusive | minExclusive)'>
<!ENTITY % maxBound '(maxInclusive | maxExclusive)'>
<!ENTITY % bounds '%minBound; | %maxBound;'>
<!ENTITY % numeric '(maxAbsoluteValue, minAbsoluteValue)? | precision | scale'>
<!ENTITY % ordered '%bounds; | %numeric;'>   
<!ENTITY % unordered
   'lexicalRepresentation | enumeration | length | maxLength | encoding'>   
<!ENTITY % facets '%ordered; | %unordered;'>
<!ELEMENT datatype (basetype, (%facets;)*)>   
<!ATTLIST datatype   
    name NMTOKEN #REQUIRED   
    export (true|false) 'true'>   
   
<!ELEMENT basetype EMPTY>   
<!ATTLIST basetype   
    name NMTOKEN #REQUIRED   
    schemaAbbrev NMTOKEN #IMPLIED   
    schemaName CDATA #IMPLIED>   

<!ELEMENT minAbsoluteValue (#PCDATA)>
<!ELEMENT maxAbsoluteValue (#PCDATA)>

<!ELEMENT maxExclusive (#PCDATA)>   
<!ELEMENT minExclusive (#PCDATA)>   
<!ELEMENT maxInclusive (#PCDATA)>   
<!ELEMENT minInclusive (#PCDATA)>   
   
<!ELEMENT precision (#PCDATA)>   
<!ELEMENT scale (#PCDATA)>   
   
<!ELEMENT length (#PCDATA)>   
<!ELEMENT maxLength (#PCDATA)>   
<!ELEMENT enumeration (literal)+>   
<!ELEMENT literal (#PCDATA)>   
<!ELEMENT lexicalRepresentation (lexical)+>   
<!ELEMENT lexical (#PCDATA)>
<!ELEMENT encoding (#PCDATA)>
]]></eg>   
</div1>   
<div1>   
<head>Datatypes and Facets</head>   
<ednote>   
<edtext>   
This section (both its abstract content and its concrete wording)   
has not yet garnered consensus among WG members.   
</edtext>   
</ednote>   
<p>   
The following table shows the values of the fundamental facets   
for each built-in datatype.   
</p>   
<ednote>   
<name>PVB</name>   
<date>1999-07-09</date>   
<edtext>   
Some entries in this table might   
conflict with what it says elsewhere in this draft, as creating   
this table pointed out to me some problems with the way some   
of the fundamental facets are defined (not to mention any   
transcription errors on my part in creating the table).   
<p>   
We obviously need more introductory text here explaining this   
table to the reader   
</p>   
</edtext>   
</ednote>   
<table border='1' bgcolor='&cellfront;'>
<tbody>
<tr>   
<th>&nbsp;</th>   
<th>Datatype</th>   
<th><specref ref='order'/></th>   
<th><specref ref='bounds'/></th>   
<th><specref ref='cardinality'/></th>   
<th><specref ref='exact-approx'/></th>   
<th><specref ref='numeric'/></th>   
</tr>   
<tr>   
<td rowspan='10'>Primitive</td>   
<td><specref ref='NMTOKEN'/></td>   
<td>no</td>   
<td>none</td>   
<td>countably infinite</td>   
<td>exact</td>   
<td>no</td>   
</tr>   
<tr>   
<td><specref ref='string'/></td>   
<td>yes</td>   
<td>none</td>   
<td>countably infinite</td>   
<td>exact</td>   
<td>no</td>   
</tr>   
<tr>   
<td><specref ref='boolean'/></td>   
<td>no</td>   
<td>none</td>   
<td>finite</td>   
<td>exact</td>   
<td>no</td>   
</tr>   
<tr>   
<td><specref ref='real'/></td>   
<td>yes</td>   
<td>none</td>   
<td>uncountably infinite</td>   
<td>approximate</td>   
<td>yes</td>   
</tr>   
<!--
<tr>   
<td><specref ref='dateTime'/></td>   
<td>yes</td>   
<td>no</td>   
<td>uncountably infinite</td>   
<td>approximate</td>   
<td>no</td>   
</tr>   
  -->
<tr>
<td><specref ref='timeInstant'/></td>
<td>yes</td>
<td>no</td>
<td>uncountably infinite</td>
<td>approximate</td>
<td>no</td>
</tr>
<tr>
<td><specref ref='timeDuration'/></td>
<td>yes</td>
<td>no</td>
<td>uncountably infinite</td>
<td>approximate</td>
<td>no</td>
</tr>
<tr>
<td><specref ref='recurringInstant'/></td>
<td>yes</td>
<td>no</td>
<td>uncountably infinite</td>
<td>approximate</td>
<td>no</td>
</tr>
<tr>
<td><specref ref='binary'/></td>   
<td>no</td>   
<td>no</td>   
<td>?</td>   
<td>?</td>   
<td>no</td>   
</tr>   
<tr>   
<td><specref ref='uri'/></td>   
<td>no</td>   
<td>no</td>   
<td>uncountably infinite</td>   
<td>exact</td>   
<td>no</td>   
</tr>   
<tr>   
<td><specref ref='language'/></td>   
<td>no</td>   
<td>no</td>   
<td>countably infinite</td>   
<td>exact</td>   
<td>no</td>   
</tr>   
<tr><td colspan='7'/></tr>   
<tr>   
<td rowspan='17'>Generated</td>   
<td><specref ref='Name'/></td>   
<td>no</td>   
<td>no</td>   
<td>countably infinite</td>   
<td>exact</td>   
<td>no</td>   
</tr>   
<tr>   
<td><specref ref='NCName'/></td>   
<td>no</td>   
<td>no</td>   
<td>countably infinite</td>   
<td>exact</td>   
<td>no</td>   
</tr>   
<tr>   
<td><specref ref='ID'/></td>   
<td>no</td>   
<td>no</td>   
<td>countably infinite</td>   
<td>exact</td>   
<td>no</td>   
</tr>   
<tr>   
<td><specref ref='IDREF'/></td>   
<td>no</td>   
<td>no</td>   
<td>countably infinite</td>   
<td>exact</td>   
<td>no</td>   
</tr>   
<tr>   
<td><specref ref='IDREFS'/></td>   
<td>no</td>   
<td>no</td>   
<td>countably infinite</td>   
<td>exact</td>   
<td>no</td>   
</tr>   
<tr>   
<td><specref ref='ENTITY'/></td>   
<td>no</td>   
<td>no</td>   
<td>countably infinite</td>   
<td>exact</td>   
<td>no</td>   
</tr>   
<tr>   
<td><specref ref='ENTITIES'/></td>   
<td>no</td>   
<td>no</td>   
<td>countably infinite</td>   
<td>exact</td>   
<td>no</td>   
</tr>   
<tr>   
<td><specref ref='NMTOKENS'/></td>   
<td>no</td>   
<td>no</td>   
<td>countably infinite</td>   
<td>exact</td>   
<td>no</td>   
</tr>   
<tr>   
<td><specref ref='NOTATION'/></td>   
<td>no</td>   
<td>no</td>   
<td>countably infinite</td>   
<td>exact</td>   
<td>no</td>   
</tr>   
<tr>   
<td><specref ref='decimal'/></td>   
<td>yes</td>   
<td>no</td>   
<td>countably infinite</td>   
<td>exact</td>   
<td>yes</td>   
</tr>   
<tr>   
<td><specref ref='integer'/></td>   
<td>yes</td>   
<td>no</td>   
<td>countably infinite</td>   
<td>exact</td>   
<td>yes</td>   
</tr>   
<tr>   
<td><specref ref='non-negative-integer'/></td>   
<td>yes</td>   
<td>yes</td>   
<td>countably infinite</td>   
<td>exact</td>   
<td>yes</td>   
</tr>   
<tr>   
<td><specref ref='positive-integer'/></td>   
<td>yes</td>   
<td>yes</td>   
<td>countably infinite</td>   
<td>exact</td>   
<td>yes</td>   
</tr>   
<tr>   
<td><specref ref='non-positive-integer'/></td>   
<td>yes</td>   
<td>yes</td>   
<td>countably infinite</td>   
<td>exact</td>   
<td>yes</td>   
</tr>   
<tr>   
<td><specref ref='negative-integer'/></td>   
<td>yes</td>   
<td>yes</td>   
<td>countably infinite</td>   
<td>exact</td>   
<td>yes</td>   
</tr>   
<tr>   
<td><specref ref='date'/></td>   
<td>yes</td>   
<td>no</td>   
<td>countably infinite</td>   
<td>exact</td>   
<td>no</td>   
</tr>   
<tr>   
<td><specref ref='time'/></td>   
<td>yes</td>   
<td>no</td>   
<td>uncountably infinite</td>   
<td>approximate</td>   
<td>no</td>   
</tr>   
</tbody>
</table>   
<p>   
The following table shows the constraining facets which apply   
to each built-in datatype.   
</p>   
<ednote>
<!--
<name>PVB</name>   
<date>1999-07-09</date>   
  -->
<edtext>   
Some entries in this table might   
conflict with what it says elsewhere in this draft, as creating   
this table pointed out to me some problems with the way some   
of the constraining facets and datatypes are defined (not to   
mention any transcription errors on my part in creating the table).   
<p>   
We obviously need more introductory text here explaining this   
table to the reader (especially since this <emph>one table</emph>   
is broken into three pieces so that it will print nicely)   
</p>   
</edtext>   
</ednote>   
<table border='1' bgcolor='&cellfront;'>   
<thead>
<tr>   
<th>&nbsp;</th>   
<th>Datatype</th>   
<th><specref ref='length'/></th>   
<th><specref ref='maxlength'/></th>   
<th><specref ref='lexical-representation'/></th>   
<th><specref ref='enumeration'/></th>   
</tr>   
</thead>
<tbody>
<tr>   
<td rowspan='10'>Primitive</td>   
<td><specref ref='NMTOKEN'/></td>   
<td>?</td>   
<td>?</td>   
<td>&nbsp;</td>   
<td>X</td>   
</tr>   
<tr>   
<td><specref ref='string'/></td>   
<td>X</td>   
<td>X</td>   
<td>X</td>   
<td>X</td>   
</tr>   
<tr>   
<td><specref ref='boolean'/></td>   
<td>&nbsp;</td>   
<td>&nbsp;</td>   
<td>&nbsp;</td>   
<td>&nbsp;</td>   
</tr>   
<tr>   
<td><specref ref='real'/></td>   
<td>&nbsp;</td>   
<td>&nbsp;</td>   
<td>&nbsp;</td>   
<td>X</td>   
</tr>   
<tr>   
<td><specref ref='timeInstant'/></td>   
<td>&nbsp;</td>   
<td>&nbsp;</td>   
<td>&nbsp;</td>   
<td>X</td>   
</tr>   
<tr>   
<td><specref ref='timeDuration'/></td>   
<td>&nbsp;</td>   
<td>&nbsp;</td>   
<td>&nbsp;</td>   
<td>X</td>   
</tr>   
<tr>   
<td><specref ref='recurringInstant'/></td>   
<td>&nbsp;</td>   
<td>&nbsp;</td>   
<td>&nbsp;</td>   
<td>X</td>   
</tr>   
<tr>   
<td><specref ref='binary'/></td>   
<td>X</td>   
<td>?</td>   
<td>?</td>   
<td>&nbsp;</td>   
</tr>   
<tr>   
<td><specref ref='uri'/></td>   
<td>&nbsp;</td>   
<td>&nbsp;</td>   
<td>&nbsp;</td>   
<td>X</td>   
</tr>   
<tr>   
<td><specref ref='language'/></td>   
<td>?</td>   
<td>&nbsp;</td>   
<td>&nbsp;</td>   
<td>X</td>   
</tr>   
<tr><td colspan='6'/></tr>   
<tr>   
<td rowspan='17'>Generated</td>   
<td><specref ref='Name'/></td>   
<td>?</td>   
<td>?</td>   
<td>&nbsp;</td>   
<td>X</td>   
</tr>   
<tr>   
<td><specref ref='NCName'/></td>   
<td>?</td>   
<td>?</td>   
<td>&nbsp;</td>   
<td>X</td>   
</tr>   
<tr>   
<td><specref ref='ID'/></td>   
<td>?</td>   
<td>?</td>   
<td>&nbsp;</td>   
<td>X</td>   
</tr>   
<tr>   
<td><specref ref='IDREF'/></td>   
<td>?</td>   
<td>?</td>   
<td>&nbsp;</td>   
<td>X</td>   
</tr>   
<tr>   
<td><specref ref='IDREFS'/></td>   
<td>?</td>   
<td>?</td>   
<td>&nbsp;</td>   
<td>X</td>   
</tr>   
<tr>   
<td><specref ref='ENTITY'/></td>   
<td>?</td>   
<td>?</td>   
<td>&nbsp;</td>   
<td>X</td>   
</tr>   
<tr>   
<td><specref ref='ENTITIES'/></td>   
<td>?</td>   
<td>?</td>   
<td>&nbsp;</td>   
<td>X</td>   
</tr>   
<tr>   
<td><specref ref='NMTOKENS'/></td>   
<td>?</td>   
<td>?</td>   
<td>&nbsp;</td>   
<td>X</td>   
</tr>   
<tr>   
<td><specref ref='NOTATION'/></td>   
<td>?</td>   
<td>?</td>   
<td>&nbsp;</td>   
<td>X</td>   
</tr>   
<tr>   
<td><specref ref='decimal'/></td>   
<td>&nbsp;</td>   
<td>&nbsp;</td>   
<td>&nbsp;</td>   
<td>X</td>   
</tr>   
<tr>   
<td><specref ref='integer'/></td>   
<td>&nbsp;</td>   
<td>&nbsp;</td>   
<td>&nbsp;</td>   
<td>X</td>   
</tr>   
<tr>   
<td><specref ref='non-negative-integer'/></td>   
<td>&nbsp;</td>   
<td>&nbsp;</td>   
<td>&nbsp;</td>   
<td>X</td>   
</tr>   
<tr>   
<td><specref ref='positive-integer'/></td>   
<td>&nbsp;</td>   
<td>&nbsp;</td>   
<td>&nbsp;</td>   
<td>X</td>   
</tr>   
<tr>   
<td><specref ref='non-positive-integer'/></td>   
<td>&nbsp;</td>   
<td>&nbsp;</td>   
<td>&nbsp;</td>   
<td>X</td>   
</tr>   
<tr>   
<td><specref ref='negative-integer'/></td>   
<td>&nbsp;</td>   
<td>&nbsp;</td>   
<td>&nbsp;</td>   
<td>X</td>   
</tr>   
<tr>   
<td><specref ref='date'/></td>   
<td>&nbsp;</td>   
<td>&nbsp;</td>   
<td>&nbsp;</td>   
<td>X</td>   
</tr>   
<tr>   
<td><specref ref='time'/></td>   
<td>&nbsp;</td>   
<td>&nbsp;</td>   
<td>&nbsp;</td>   
<td>X</td>   
</tr>   
</tbody>
</table>   
<p>   
constraining facets table, cont.   
</p>   
<table border='1' bgcolor='&cellfront;'>   
<thead>
<tr>   
<th>&nbsp;</th>   
<th>Datatype</th>   
<th><specref ref='maxInclusive'/></th>   
<th><sp