//*****************************************************************************
/*
** FILE:   xe_Tokenizer.java
**
** (c) 1997, 1998 Steve Withall.
**
** HISTORY:
**    06Oct97  stevew  Created.
**    25Oct97  stevew  Introduced hashtable-driven parsing in AutoParseNextToken().
**    06Apr98  stevew  Added xe_ParseListener.
**    17Apr98  stevew  Removed auto-parse functionality - and moved it to new
**                      sub-class xe_ParseManagerAuto.
**    06May98  stevew  Split from xe_ParseManager.
*/
package xe;

import xm.xm_ParseException;

import xg.xg_NamespaceDecl;
import xg.xg_Node;

import xa.xa_CharChecker;

import eh.eh_Debug;

import java.util.Enumeration;
import java.io.IOException;
import java.io.Reader;

//*****************************************************************************
/** Perform low-level parsing of an XML source and keep the state of the
 *  current parse.</p>
 *
 *  <p>The source is supplied via a Reader, which must be set by calling
 *  SetSource() prior to calling the range of methods which perform detailed
 *  parsing of various sorts. Most of the parsing methods pass back their
 *  results in the form of xe_Tokens.</p>
 */
public class xe_Tokenizer
{
    // Control flags.
    /** If true, we validate the resulting parse tree (node by node while parsing);
     *  if false, we don't. */
    boolean           ValidateFlag = false;

    /** If true, we verify the resulting parse tree (node by node while parsing);
     *  if false, we don't. */
    boolean           VerifyFlag   = true;

    /** Parsing attempts to verify the internal consistency of each node after
     *  it has been parsed. This is only possible in special derived classes;
     *  standard xg nodes can do no more than check well-formedness and validate
     *  them. xg nodes therefore never return a verification error. If true,
     *  parsing stops if a node has a verification error (and can result in an
     *  otherwise well-formed or valid XML document failing to parse, because
     *  this sort of checking goes beyond XML's realm of control); if false, a
     *  warning is issued and parsing continues. */
    boolean           StopIfVerifyErrorFlag = true;

    /** If true, the rules of XML syntax are mercilessly applied; if false, a
     *  degree of latitude is permitted. */
    boolean           StrictXmlSyntaxFlag = false;

    // Data attributes.
    /** Place from which to read the XML source */
    Reader            SourceReader;

    /** The last character read from the source */
    int               CurrentCharAsInt = -1;

    /** The last character read from the source */
    char              CurrentChar;

    /** The number of characters parsed when the last minor milestone was reached.
     *  This is used as a convenient way to track the start of current item which
     *  is being parsed. */
    int               LastSignificantOffset = 1;

    // Statistics.
    /** Count of lines read */
    int               LineCount      = 1;

    /** Count of characters in the current line */
    int               ColumnCount    = 0;

    /** Count of characters read */
    int               TotalCharCount = 0;

    // Constants.
    /** Typical name length */
    final static int  DEFAULT_NAME_LENGTH = 16;

    //*****************************************************************************
    /** Default constructor.
     */
    public xe_Tokenizer()
    {
    }

    //*****************************************************************************
    /** <p>Parse the next piece of the content of an element, and return it as a
     *  token. Content can comprise any combination of the following types of
     *  entity:</p>
     *
     *  <p>Reference:               <b>&Name;</b> or <b>&#nn;</b> or <b>&#xnn;</b><br>
     *     CDATA section:           <b><![CDATA[...]]></b><br>
     *     Comment:                 <b><!--...--></b><br>
     *     Processing instruction:  <b><?...?></b><br>
     *     Element:                 <b><Name>...</Name></b><br>
     *     PCData:                  <b>Text</b></p>
     *
     *  @return     Token representing the piece of content parsed
     *  @exception  xm_ParseException  XML wellformedness error
     *  @exception  IOException        Error reading from source reader
     */
    public xe_Token parseContentToken() throws xm_ParseException, IOException
    {
        eh_Debug.add(8, "xe_Tokenizer.parseContentToken:");
        LastSignificantOffset = TotalCharCount;

        // All whitespace is treated as part of the content.

        xe_Token      NextToken        = null;
        StringBuffer  TokenValueBuffer = new StringBuffer(DEFAULT_NAME_LENGTH);

        if (atEnd())
            NextToken = new xe_Token(xe_TokenType.END_OF_SOURCE);
        else
        {
            TokenValueBuffer.append(CurrentChar);
            switch (CurrentChar)
            {
                case '&':
                    // Treat this as the start of an entity reference.
                    readNextChar();
                    NextToken = parseEntityRefToken(TokenValueBuffer);
                    break;

                case '<':
                    readNextChar();
                    NextToken = parseEntityName(TokenValueBuffer);
                    break;

                default:
                    // Treat this as text (PCData) - whatever it is.
                    readNextChar();
                    NextToken = parsePCDataToken(TokenValueBuffer);
            }
        }

        eh_Debug.add(8, "xe_Tokenizer.parseContentToken: Return: (" + NextToken + ")");
        return NextToken;
    }

    //*****************************************************************************
    /** Parse the next token, and make sure it is of the expected type; an exception
     *  is thrown if it is not.
     *
     *  @param  InputExpectedTokenType      The type of token we expect
     *  @param  InputConsumeWhitespaceFlag  If true, all preceding whitespace is
     *                                       separated; otherwise, it is treated
     *                                       as part of the token itself
     *  @return                             Token representing the value parsed
     *  @exception  xm_ParseException  XML wellformedness error
     *  @exception  IOException        Error reading from source reader
     */
    public xe_Token parseNextTokenExpected(int      InputExpectedTokenType,
                                           boolean  InputConsumeWhitespaceFlag)
                                                throws xm_ParseException, IOException
    {
//        eh_Debug.add(8, "xe_Tokenizer.parseNextTokenExpected:");
        xe_Token  NextToken = parseNextToken(InputConsumeWhitespaceFlag);
        if (NextToken.getType() != InputExpectedTokenType)
            if (NextToken.getStringValue() == null)
                throwParseException("Expected " + xe_TokenType.toString(InputExpectedTokenType)
                                       + ", but found " + xe_TokenType.toString(NextToken.getType()));
            else
                throwParseException("Expected " + xe_TokenType.toString(InputExpectedTokenType)
                                       + ", but found '" + NextToken.getStringValue() + "'");
        return NextToken;
    }

    //*****************************************************************************
    /** Parse the next token.
     *
     *  @param  InputConsumeWhitespaceFlag  If true, all preceding whitespace is
     *                                       separated; otherwise, it is treated
     *                                       as part of the token itself
     *  @return                             Token representing the value parsed
     *  @exception  xm_ParseException  XML wellformedness error
     *  @exception  IOException        Error reading from source reader
     */
    public xe_Token parseNextToken(boolean  InputConsumeWhitespaceFlag)
                                       throws xm_ParseException, IOException
    {
        eh_Debug.add(8, "xe_Tokenizer.parseNextToken:");
        LastSignificantOffset = TotalCharCount;

        // Get preceding whitespace if required.
        String        PrecedingWhitespace = parseWhitespace(InputConsumeWhitespaceFlag);

        xe_Token      NextToken           = null;
        StringBuffer  TokenValueBuffer    = new StringBuffer(DEFAULT_NAME_LENGTH);
        String        TokenValueString    = null;

        if (atEnd())
            // No more characters in source.
            NextToken = new xe_Token(xe_TokenType.END_OF_SOURCE);
        else
        {
            TokenValueBuffer.append(CurrentChar);
            switch (CurrentChar)
            {
                case '/':
                    readNextChar();
                    if (CurrentChar == '>')
                    {
                        NextToken = new xe_Token(xe_TokenType.EMPTY_TAG_END);
                        readNextChar();
                    }
                    else
                        NextToken = new xe_Token(xe_TokenType.SLASH_CHAR);
                    break;

                case '<':
                    readNextChar();
                    NextToken = parseEntityName(TokenValueBuffer);
                    break;

                case '?':
                    readNextChar();
                    if (CurrentChar == '>')
                    {
                        NextToken = new xe_Token(xe_TokenType.PI_END);
                        readNextChar();
                    }
                    else
                        NextToken = new xe_Token(xe_TokenType.QUESTION_MARK_CHAR);
                    break;

                default:
                    if (xa_CharChecker.haveNameChar(CurrentChar) )
                        // We have a word, scan it and return the value
                        NextToken = parseNameToken();
                    else if (xa_CharChecker.haveNumberChar(CurrentChar) )
                        // We have a number - parse it and consume it.
                        NextToken = parseNumberToken();
                    else
                    {
                        // Take care of any other valid single character.
                        xe_TokenTypeDefn  CurrentKeywordDefn
                                          = xe_TokenType.getDefn(TokenValueBuffer.toString() );

                        if (CurrentKeywordDefn == null)
                        {
                            // Have encountered an unexpected character.
                            NextToken = new xe_Token(xe_TokenType.UNKNOWN, TokenValueBuffer.toString() );
                            eh_Debug.add(2, "xe_Tokenizer.parseNextToken: Unexpected character ("
                                                  + CurrentChar + ")");
                        }
                        else
                            NextToken = new xe_Token(CurrentKeywordDefn.getType() );
                        readNextChar();
                    }
            }
        }

        // Store the preceding whitespace as part of the token.
        if (NextToken != null && PrecedingWhitespace != null)
            NextToken.setPrecedingWhitespace(PrecedingWhitespace);

        eh_Debug.add(8, "xe_Tokenizer.parseNextToken: Return (" + NextToken + ")");
        return NextToken;
    }

    //*****************************************************************************
    /** Parse the name of an entity, assuming that the entity start ('<') has
     *  already been read.
     *
     *  @param       InputTokenValueBuffer  The characters already parsed in this
     *                token (which ought to be '<'!)
     *  @return      The entity start token just parsed
     *  @exception  xm_ParseException  XML wellformedness error
     *  @exception  IOException        Error reading from source reader
     */
    private xe_Token parseEntityName(StringBuffer  InputTokenValueBuffer)
                                               throws xm_ParseException, IOException
    {
        eh_Debug.add(8, "xe_Tokenizer.parseEntityName: Start");
        xe_Token  NameToken = null;

        InputTokenValueBuffer.append(CurrentChar);
        switch (CurrentChar)
        {
            case '!':
                // This the beginning of a comment ('<!--'), a <![CDATA section
                // or a <!ATTLIST, <!DOCTYPE, etc. declaration.
                readNextChar();
                NameToken = parseBangEntityName(InputTokenValueBuffer);
                break;

            case '/':
                // This an end tag ('</'). Parse the tag name.
                readNextChar();
                NameToken = parseNameToken();
                NameToken.setType(xe_TokenType.END_TAG_START);
                break;

            case '?':
                // This a processing instruction ('<? ... ?>') or XML decl ('<?xml ... ?>").
                readNextChar();
                String  PIName = parseName();
                String  PITokenValue = new String(InputTokenValueBuffer + PIName);
                //TBD We could get the keyword type from the registry in xe_TokenType,
                //TBD so that special PI types can be detected automatically.
//                if (PITokenValue.equals(xe_TokenType.XML_DECL_START) )
                if (PIName.equals(xe_TokenType.XML_STRING) )
                    NameToken = new xe_Token(xe_TokenType.XML_DECL_START, PITokenValue);
                if (PIName.equals(xe_TokenType.XML_STRING + ":" + xg_NamespaceDecl.NAMESPACE_STRING) )
                    NameToken = new xe_Token(xe_TokenType.NAMESPACE_DECL_START, PITokenValue);
                else
                    NameToken = new xe_Token(xe_TokenType.PI_START, PITokenValue);
                break;

            default:
                if (xa_CharChecker.haveNameStartChar(CurrentChar) )
                {
                    // Parse the next characters as a name: the name of the entity.
                    NameToken = parseNameToken();
                    NameToken.setType(xe_TokenType.ELEMENT_START);
                }
                else
                    throwParseException("Character '" + CurrentChar
                                           + "' not allowed at the start of an entity name");
        }

        eh_Debug.add(7, "xe_Tokenizer.parseEntityName: Return (" + NameToken + ")");
        return NameToken;
    }

    //*****************************************************************************
    /** <p>Parse the name of an entity which begins '<!' (assuming these characters
     *  have already been read).</p>
     *
     *  <p>Such entities can be one of the following:</p>
     *
     *  <p><b><!--</b>         xe_TokenType.COMMENT_START:</p>
     *  <p><b><![CDATA[</b>    xe_TokenType.CDATA_START:</p>
     *  <p><b><!name</b>       <!ATTLIST, <!DOCTYPE, <!ELEMENT, <!ENTITY or <!NOTATION</p>
     *  <p></p>
     *  <p><b>xe_TokenType.UNKNOWN:</b>        An unexpected character was read</p>
     *  <p><b>xe_TokenType.END_OF_SOURCE:</b>  We've reached the end of the source</p>
     *
     *  @param      InputTokenValueBuffer  The characters already parsed in this
     *               token (which ought to be '<!'!)
     *  @return     The token just parsed
     *  @exception  xm_ParseException  XML wellformedness error
     *  @exception  IOException        Error reading from source reader
     */
    private xe_Token parseBangEntityName(StringBuffer  InputTokenValueBuffer)
                                            throws xm_ParseException, IOException
    {
//        eh_Debug.add(8, "xe_Tokenizer.parseBangEntityName: Start");
        xe_Token  BangToken   = null;
        String    NameString  = null;

        switch (CurrentChar)
        {
            case '-':
                // Is this the beginning of a comment ('<!--')?
                InputTokenValueBuffer.append(CurrentChar);
                readNextChar();
                InputTokenValueBuffer.append(CurrentChar);
                if (CurrentChar == '-')
                    BangToken = new xe_Token(xe_TokenType.COMMENT_START, "<!--");
                else
                    throwParseException("Expected '-' at start of comment, after '<!-'");
                readNextChar();
                break;

            case '[':
                // Is this the beginning of a <![CDATA[ section?
                readNextChar();
                NameString = parseName();
                if (    (NameString.equals("CDATA") )
                     && (CurrentChar == '[') )
                {
                    BangToken = new xe_Token(xe_TokenType.CDATA_START, "<![CDATA[");
                    readNextChar();
                }
                else
                    throwParseException("Expected 'CDATA[', after '<![', but found "
                                           + NameString + CurrentChar);
                break;

            default:
                if (xa_CharChecker.haveNameStartChar(CurrentChar) )
                {
                    // Parse the next characters as a name - the name of the entity.
                    parseName(InputTokenValueBuffer);
                    int TokenType = xe_TokenType.convertToType(InputTokenValueBuffer.toString());
                    if (TokenType == xe_TokenType.UNKNOWN)
                        throwParseException("Unacceptable '<!' name (" + InputTokenValueBuffer.toString() + ")");
                    BangToken = new xe_Token(TokenType, InputTokenValueBuffer.toString());
                }
                else
                    throwParseException("Unexpected non-name character (" + CurrentChar + ")");
        }

//        eh_Debug.add(7, "xe_Tokenizer.parseBangEntityName: Return (" + BangToken + ")");
        return BangToken;
    }

    //*****************************************************************************
    /** Parse the next token as an entity reference, assuming its starting '&' has
     *  already been parsed (and is already in the InputTokenValueBuffer).
     *  We handle references of the form: &Name; or &#nn; or &#xnn;
     *
     *  @param      InputTokenValueBuffer  A buffer which is assumed to contain
     *                                      the initial '&' already
     *  @return     Either an ENTITY_REFERENCE or a CHAR_REFERENCE token
     *  @exception  xm_ParseException  XML wellformedness error
     *  @exception  IOException        Error reading from source reader
     */
    private xe_Token parseEntityRefToken(StringBuffer  InputTokenValueBuffer)
                                            throws xm_ParseException, IOException
    {
        eh_Debug.add(8, "xe_Tokenizer.parseEntityRefToken:");
        xe_Token  NextToken = null;

        if (atEnd())
            NextToken = new xe_Token(xe_TokenType.END_OF_SOURCE);
        else
        {
            InputTokenValueBuffer.append(CurrentChar);
//            ReadNextChar();

            if (CurrentChar == '#')
            {
                readNextChar();
                if (CurrentChar == 'x')
                {
                    readNextChar();
                    NextToken = new xe_Token(xe_TokenType.CHAR_REFERENCE_DEC, parseNumber() );
                }
                else
                    NextToken = new xe_Token(xe_TokenType.CHAR_REFERENCE_HEX, parseHexNumber() );
            }
            else if (xa_CharChecker.haveNameStartChar(CurrentChar) )
            {
                // Treat this as a name.
                NextToken = new xe_Token(xe_TokenType.ENTITY_REFERENCE, parseName() );
            }
            else
                throwParseException("Illegal character ('" + CurrentChar
                                      + "') inside entity reference");
        }

        if (CurrentChar != ';')
            throwParseException("Reference must end in ';', but '"
                                   + CurrentChar + "' found instead");
        readNextChar();
        return NextToken;
    }

    //*****************************************************************************
    /** Parse the next token as a PCData (plain content within an element). PCData
     *  is assumed to end when a character not allowed inside PCData ('&', '<' or
     *  '^') is encountered. The token's value is everything parsed up to this point.
     *
     *  @param      InputTokenValueBuffer  A buffer which is assumed to contain
     *                                      the first character of PCData already
     *  @return     PCData token
     *  @exception  xm_ParseException  XML wellformedness error
     *  @exception  IOException        Error reading from source reader
     */
    private xe_Token parsePCDataToken(StringBuffer  InputTokenValueBuffer)
                                                throws xm_ParseException, IOException
    {
        eh_Debug.add(8, "xe_Tokenizer.parsePCDataToken:");

        // Keep adding character until we encounter a non-PCData character.
        while (!atEnd() && xa_CharChecker.havePCDataChar(CurrentChar) )
        {
            InputTokenValueBuffer.append(CurrentChar);
            readNextChar();
        }

        return new xe_Token(xe_TokenType.PCDATA, InputTokenValueBuffer.toString());
    }

    //*****************************************************************************
    /** Parse the next token as a name, and check it has the expected value. It
     *  should only be called if the next token must be a name - and throws an
     *  exception if it is not. It also throws an exception if the name does not
     *  have the expected value.
     *
     *  @param      InputExpectedValue          The value the token is expected to have
     *  @param      InputConsumeWhitespaceFlag  If true, all preceding whitespace
     *                                           is separated; otherwise, it is not
     *                                           allowed (ie. is an error)
     *  @return     NAME token
     *  @exception  xm_ParseException  XML wellformedness error
     *  @exception  IOException        Error reading from source reader
     */
    public xe_Token parseNameTokenExpected(String   InputExpectedValue,
                                           boolean  InputConsumeWhitespaceFlag)
                                                throws xm_ParseException, IOException
    {
        xe_Token  NameToken = parseNameToken(InputConsumeWhitespaceFlag);
        if (NameToken.getType() != xe_TokenType.NAME)
            throwParseException("Expected name, but found '"
                                   + NameToken.getStringValue() + "' instead");

        if (NameToken.getStringValue().compareTo(InputExpectedValue) != 0)
            throwParseException("Expected '" + InputExpectedValue + "', but found '"
                                   + NameToken.getStringValue() + "' instead");
        return NameToken;
    }

    //*****************************************************************************
    /** Parse the next token as a name. It should only be called if the next token
     *  must be a name - and throws an exception if it is not.
     *
     *  @param      InputConsumeWhitespaceFlag  If true, all preceding whitespace
     *                                           is separated; otherwise, it is not
     *                                           allowed (ie. is an error)
     *  @return     NAME token
     *  @exception  xm_ParseException  XML wellformedness error
     *  @exception  IOException        Error reading from source reader
     */
    protected xe_Token parseNameToken(boolean  InputConsumeWhitespaceFlag)
                                          throws xm_ParseException, IOException
    {
        // Get preceding whitespace if required.
        String  PrecedingWhitespace = parseWhitespace(InputConsumeWhitespaceFlag);

        xe_Token  NameToken = new xe_Token(xe_TokenType.NAME, parseName() );
        NameToken.setPrecedingWhitespace(PrecedingWhitespace);
        return NameToken;
    }

    //*****************************************************************************
    /** Parse the next token as a name. It should only be called if a valid name
     *  start character has already been found, or the next token must be a name.
     *
     *  @return     NAME token
     *  @exception  xm_ParseException  XML wellformedness error
     *  @exception  IOException        Error reading from source reader
     */
    protected xe_Token parseNameToken() throws xm_ParseException, IOException
    {
        return new xe_Token(xe_TokenType.NAME, parseName() );
    }

    //*****************************************************************************
    /** Parse a name.
     *
     *  @return     A name as a string
     *  @exception  xm_ParseException  XML wellformedness error
     *  @exception  IOException        Error reading from source reader
     */
    public String parseName() throws xm_ParseException, IOException
    {
        StringBuffer NameStringBuffer = new StringBuffer(DEFAULT_NAME_LENGTH);
        parseName(NameStringBuffer);
        return NameStringBuffer.toString();
    }

    //*****************************************************************************
    /** Parse a name, which must start with a character acceptable at the start of
     *  a name and whose remaining characters must be acceptable within a name.
     *  (It's rather obvious, really!) The name which is parsed will be added to
     *  the InputNameStringBuffer (which doesn't have to be empty beforehand).
     *
     *  Currently no namespace support is provided.
     *
     *  @param      InputNameStringBuffer  Buffer to which the name parsed is added
     *  @return                            A name as a string
     *  @exception  xm_ParseException      XML wellformedness error
     *  @exception  IOException            Error reading from source reader
     */
    protected void parseName(StringBuffer  InputNameStringBuffer)
                                              throws xm_ParseException, IOException
    {
        eh_Debug.add(8, "xe_Tokenizer.parseName:");
        LastSignificantOffset = TotalCharCount - 1;

        // Check the name starts with a valid character.
        if (!xa_CharChecker.haveNameStartChar(CurrentChar) )
            throwParseException("Expect name, but it starts with an unacceptable character ("
                                   + CurrentChar + ")");
        InputNameStringBuffer.append(CurrentChar);
        readNextChar();

        // Keep adding the current character while it is valid for a word.
        while (!atEnd() && xa_CharChecker.haveNameChar(CurrentChar) )
        {
            InputNameStringBuffer.append(CurrentChar);
            readNextChar();
        }
    }

    //*****************************************************************************
    /** Parse the next token as a number. It will always get one, as it should only
     *  be called if we have already encountered the first digit.
     *
     *  @return     NUMBER token
     *  @exception  xm_ParseException  XML wellformedness error
     *  @exception  IOException        Error reading from source reader
     */
    private xe_Token parseNumberToken() throws xm_ParseException, IOException
    {
        return new xe_Token(xe_TokenType.NUMBER, parseNumber() );
    }

    //*****************************************************************************
    /** Parse a decimal integer number, which can only include 0-9.
     *
     *  @return     The number as a string
     *  @exception  xm_ParseException  XML wellformedness error
     *  @exception  IOException        Error reading from source reader
     */
    protected String parseNumber() throws xm_ParseException, IOException
    {
        eh_Debug.add(8, "xe_Tokenizer.parseNumber:");
        StringBuffer NumberStringBuffer = new StringBuffer(DEFAULT_NAME_LENGTH);
        String       NumberString       = null;

        // Keep adding the current character while it is valid for a number.
        while (!atEnd() && xa_CharChecker.haveNumberChar(CurrentChar) )
        {
            NumberStringBuffer.append(CurrentChar);
            readNextChar();
        }

        if (NumberStringBuffer.length() > 0)
            NumberString = NumberStringBuffer.toString();
        //TBD Should we throw an exception if its length is 0 (because a non-existent
        //TBD thing is hardly a number)?

        return NumberString;
    }

    //*****************************************************************************
    /** Parse the next token as a hex number. Will always get one as only called
     *  if have at least one digit.
     *
     *  @return     HEX_NUMBER token
     *  @exception  xm_ParseException  XML wellformedness error
     *  @exception  IOException        Error reading from source reader
     */
/*    private xe_Token parseHexNumberToken() throws xm_ParseException
    {
//        eh_Debug.add(8, "xe_Tokenizer.parseHexNumberToken:");
        return new xe_Token(xe_TokenType.HEX_NUMBER, parseHexNumber() );
    }
*/
    //*****************************************************************************
    /** Parse a hexadecimal integer number, which can only include 0-9, a-f and A-F.
     *
     *  @return  The hex number as a string
     *  @exception  xm_ParseException  XML wellformedness error
     *  @exception  IOException        Error reading from source reader
     */
    private String parseHexNumber() throws xm_ParseException, IOException
    {
        eh_Debug.add(8, "xe_Tokenizer.parseHexNumber:");
        StringBuffer NumberStringBuffer = new StringBuffer(DEFAULT_NAME_LENGTH);
        String       NumberString       = null;

        // Keep adding the current character while it is valid for a number.
        while (!atEnd() && xa_CharChecker.haveHexNumberChar(CurrentChar) )
        {
            NumberStringBuffer.append(CurrentChar);
            readNextChar();
        }

        if (NumberStringBuffer.length() > 0)
            NumberString = NumberStringBuffer.toString();
        //TBD Should we throw an exception if its length is 0 (because a non-existent
        //TBD thing is hardly a number)?

        return NumberString;
    }

    //*****************************************************************************
    /** Parse all whitespace characters, until we find a non-whitespace character.
     *
     *  @param      InputWantWhitespaceFlag  If true, we parse whitespace; if false
     *                                        we do nothing. While the latter seems
     *                                        pointless, it obviates checks elsewhere.
     *  @return     String of the whitespace which was read, or a null String if
     *               there was no whitespace
     *  @exception  xm_ParseException  XML wellformedness error
     *  @exception  IOException        Error reading from source reader
     */
    public String parseWhitespace(boolean  InputWantWhitespaceFlag)
                                       throws xm_ParseException, IOException
    {
        if (    !InputWantWhitespaceFlag
             || !xa_CharChecker.isWhitespace(CurrentChar) )   // Purely for efficiency!
            return null;

        StringBuffer WhitespaceBuffer = new StringBuffer(DEFAULT_NAME_LENGTH);

        // Keep adding characters until we reach a non-whitespace character.
        boolean WhitespaceEndFlag = false;
        while (!atEnd() && !WhitespaceEndFlag)
        {
            if (xa_CharChecker.isWhitespace(CurrentChar) )
            {
                WhitespaceBuffer.append(CurrentChar);
                readNextChar();
            }
            else
                WhitespaceEndFlag = true;
        }

        String  WhitespaceString = null;
        if (WhitespaceBuffer.length() > 0)
            WhitespaceString = WhitespaceBuffer.toString();

        return WhitespaceString;
    }

    //*****************************************************************************
    /** Parse the next token as a literal. We expect an opening quote (either
     *  single or double), some text and then a closing quote (the same as the
     *  opening one). The quotes themselves are not stored as part of the literal
     *  itself.
     *
     *  @param      InputConsumeWhitespaceFlag  If true, all preceding whitespace is
     *                                           separated; otherwise, whitespace is
     *                                           treated as part of the main string
     *  @return     Token representing the value parsed
     *  @exception  xm_ParseException  XML wellformedness error
     *  @exception  IOException        Error reading from source reader
     */
    public xe_Token parseLiteralToken(boolean  InputConsumeWhitespaceFlag)
                                                  throws xm_ParseException, IOException
    {
//        eh_Debug.add(8, "xe_Tokenizer.parseLiteralToken:");
        LastSignificantOffset = TotalCharCount;

        // Get preceding whitespace if required.
        String  PrecedingWhitespace = parseWhitespace(InputConsumeWhitespaceFlag);

        char    LiteralQuoteChar = CurrentChar;
        if (    LiteralQuoteChar != '\''
             && LiteralQuoteChar != '"')
            throwParseException("Expected ' or \" to begin quote, but found '"
                                    + LiteralQuoteChar + "' instead");
        readNextChar();

        xe_Token  NewLiteralToken = parseLiteralToken(LiteralQuoteChar);
        NewLiteralToken.setPrecedingWhitespace(PrecedingWhitespace);
        return NewLiteralToken;
    }

    //*****************************************************************************
    /** Parse the next token as a literal, assuming that its opening quote (either
     *  single or double) has already been parsed. We expect some text and then a
     *  closing quote (the same as the opening one). The quotes themselves are not
     *  stored as part of the literal itself.
     *
     *  @param      InputStartQuoteChar  The opening quote character
     *  @return                          Token representing the literal value parsed
     *  @exception  xm_ParseException  XML wellformedness error
     *  @exception  IOException        Error reading from source reader
     */
    public xe_Token parseLiteralToken(char  InputStartQuoteChar)
                                        throws xm_ParseException, IOException
    {
//        eh_Debug.add(8, "xe_Tokenizer.parseLiteralToken: starting with "
//                              + InputStartQuoteChar);
        String  LiteralValue = parseUntil(InputStartQuoteChar);

        // If the literal is empty, create an empty String object instead of null.
        if (LiteralValue == null)
            LiteralValue = new String("");

        xe_Token  NewLiteralToken  = new xe_Token(xe_TokenType.LITERAL, LiteralValue);
        return NewLiteralToken;
    }

    //*****************************************************************************
    /** Parse until we encounter the InputEndString (or the end of the source). The
     *  InputEndString is omitted from the end of the resulting value.
     *
     *  @param  InputEndString  The string to look for
     *  @return                 String representing the value parsed
     *  @exception  xm_ParseException  XML wellformedness error
     *  @exception  IOException        Error reading from source reader
     */
    public String parseUntil(String  InputEndString) throws xm_ParseException, IOException
    {
//        eh_Debug.add(8, "xe_Tokenizer.parseUntil: '" + InputEndString + "'");
        StringBuffer  ResultStringBuffer = new StringBuffer(DEFAULT_NAME_LENGTH);
        String        ResultString       = null;
        int           ResultStringLength = 0;
        int           EndStringLength    = InputEndString.length();
        int           CheckIndex         = 0;
        boolean       CheckOkFlag        = true;
        boolean       FoundEndFlag       = false;

        // Keep adding the current character while it is valid for a word.
        while (!FoundEndFlag && !atEnd())
        {
            ResultStringBuffer.append(CurrentChar);
            ResultStringLength += 1;
            readNextChar();

            if (ResultStringLength >= EndStringLength)
            {
                // Check whether we've found the end string - by checking the
                // characters at the end of the string read match it.
                CheckOkFlag = true;
                for (CheckIndex = 0;
                     CheckIndex < EndStringLength && CheckOkFlag;
                     CheckIndex++)
                {
                    if (InputEndString.charAt(CheckIndex)
                          != ResultStringBuffer.charAt(CheckIndex + ResultStringLength - EndStringLength))
                        CheckOkFlag = false;
                }
                if (CheckOkFlag)
                    FoundEndFlag = true;
            }
        }

        // Truncate the result - to remove the end string from it.
        if (FoundEndFlag)
            ResultStringBuffer.setLength(ResultStringLength - EndStringLength);

        if (ResultStringBuffer.length() > 0)
            ResultString = ResultStringBuffer.toString();
//        eh_Debug.add(7, "xe_Tokenizer.parseUntil: Return '" + ResultString + "'");
        return ResultString;
    }

    //*****************************************************************************
    /** Parse until we encounter the InputEndChar (or the end of the source). The
     *  InputEndChar is omitted from the end of the resulting value.
     *
     *  @param  InputEndChar  The character to look for
     *  @return               String representing the value parsed
     *  @exception  xm_ParseException  XML wellformedness error
     *  @exception  IOException        Error reading from source reader
     */
    public String parseUntil(char  InputEndChar) throws xm_ParseException, IOException
    {
//        eh_Debug.add(8, "xe_Tokenizer.parseUntil: '" + InputEndChar + "'");
        StringBuffer  ResultStringBuffer = new StringBuffer(DEFAULT_NAME_LENGTH);
        String        ResultString       = null;
        boolean       FoundEndFlag       = false;

        // Keep adding the characters until we find the InputEndChar.
        while (!FoundEndFlag)
        {
            if (atEnd())
                throwParseException("Unexpected end of source");

            if (CurrentChar == InputEndChar)
                FoundEndFlag = true;
            else
                ResultStringBuffer.append(CurrentChar);
            readNextChar();
        }

        if (ResultStringBuffer.length() > 0)
            ResultString = ResultStringBuffer.toString();
//        eh_Debug.add(7, "xe_Tokenizer.parseUntil: Return '" + ResultString + "'");
        return ResultString;
    }

    //*****************************************************************************
    // Error-reporting methods.
    //*****************************************************************************
    /** Throw a parse exception, using the InputErrorMessage.
     *
     *  @param      InputErrorMessage   Message text
     *  @param      InputStartPosition  Start of area in source to highlight
     *  @param      InputEndPosition    End of area in source to highlight
     *  @exception  xm_ParseException   An exception whose throwing has been
     *                                   explicitly requested
     */
    public void throwParseException(String  InputErrorMessage,
                                    int     InputStartPosition,
                                    int     InputEndPosition) throws xm_ParseException
    {
        throw new xm_ParseException(InputErrorMessage,
                                    LineCount,
                                    ColumnCount,
                                    InputStartPosition,
                                    InputEndPosition);
    }

    //*****************************************************************************
    /** Throw a parse exception, using the InputErrorMessage.
     *
     *  @exception  xm_ParseException   An exception whose throwing has been
     *                                   explicitly requested
     */
    public void throwParseException(String  InputErrorMessage) throws xm_ParseException
    {
        // If there is only one character in error, make sure we can see it!
        int  ErrorEndOffset = TotalCharCount - 1;
        if (ErrorEndOffset == LastSignificantOffset)
            ErrorEndOffset++;

        throw new xm_ParseException(InputErrorMessage + formatSourceLocationString(0),
                                    LineCount,
                                    ColumnCount,
                                    LastSignificantOffset,
                                    ErrorEndOffset);
    }

    //*****************************************************************************
    /** Report a warning message.
     *
     *  @param  InputMessage  The text of the main message to report
     */
    public void reportWarning(String  InputMessage)
    {
        reportWarning(InputMessage, 0);
    }

    //*****************************************************************************
    /** Report a warning message.
     *
     *  @param  InputMessage       The text of the main message to report
     *  @param  InputColumnOffset  The number of characters back from where we are
     *                              now at which the event occurred
     */
    public void reportWarning(String  InputMessage, int  InputColumnOffset)
    {
        eh_Debug.add(2, "xe warning: " + InputMessage
                                       + formatSourceLocationString(InputColumnOffset) );
    }

    //*****************************************************************************
    /** Format a string which describes the current location in the source (which
     *  is expected to be used when reporting errors and warnings).
     *
     *  @param   InputColumnOffset  The number of characters back from where we are
     *                               now at which the event occurred
     *  @return                     Source location string
     */
    private String formatSourceLocationString(int  InputColumnOffset)
    {
        String  LocationString = null;
        int     SourceLine     = getLineCount();
        int     SourceColumn   = ColumnCount - InputColumnOffset;
        if (SourceColumn > 0)
            LocationString = new String(" (Line " + SourceLine
                                            + ", column " + SourceColumn + ")");
        else
        {
            SourceLine -= 1;
            LocationString = new String(" (at end of line " + SourceLine + ")");
        }
        return LocationString;
    }

    //*****************************************************************************
    /** Set the reader from which the XML source is to be read.
     *
     *  @param      InputSourceStream  The reader from which to read source to parse
     *  @exception  IOException        Error reading first character from source
     *                                  reader
     */
    public void setSource(Reader  InputSourceReader) throws IOException
    {
//        eh_Debug.add(7, "xe_Tokenizer.SetSource:");
        SourceReader = InputSourceReader;
        resetStatistics();
        readNextChar();          // Read the first character in the source
    }

    //*****************************************************************************
    /** Specify whether we validate the source.
     *
     *  @param InputValidateFlag  True means we validate
     */
    public void setValidateFlag(boolean  InputValidateFlag)
    {
        ValidateFlag = InputValidateFlag;
    }

    //*****************************************************************************
    /** Specify whether we verify the source.
     *
     *  @param InputVerifyFlag  True means we verify
     */
    public void setVerifyFlag(boolean  InputVerifyFlag)
    {
        VerifyFlag = InputVerifyFlag;
    }

    //*****************************************************************************
    /** Specify whether we check the semantics of the source.
     *
     *  @param InputStopIfVerifyErrorFlag  True means we check semantics
     */
    public void setStopIfVerifyErrorFlag(boolean  InputStopIfVerifyErrorFlag)
    {
        StopIfVerifyErrorFlag = InputStopIfVerifyErrorFlag;
    }

    //*****************************************************************************
    /** Specify whether strict XML syntax is to be rigidly applied.
     *
     *  @param InputStrictXmlSyntaxFlag  True means apply strict XML syntax
     */
    public void setStrictXmlSyntaxFlag(boolean  InputStrictXmlSyntaxFlag)
    {
        StrictXmlSyntaxFlag = InputStrictXmlSyntaxFlag;
    }

    //*****************************************************************************
    /** Reset the counts of lines, etc., ready for another parse.
     */
    public void resetStatistics()
    {
        LineCount      = 1;
        ColumnCount    = 0;
        TotalCharCount = 0;
    }

    //*****************************************************************************
    /** Find out whether we validate the source.
     *
     *  @returns True means we validate
     */
    public boolean getValidateFlag()
    {
        return ValidateFlag;
    }

    //*****************************************************************************
    /** Find out whether we verify the source.
     *
     *  @returns True means we verify
     */
    public boolean getVerifyFlag()
    {
        return VerifyFlag;
    }

    //*****************************************************************************
    /** Find out whether we check the semantics of the source.
     *
     *  @returns  True means we check semantics
     */
    public boolean getStopIfVerifyErrorFlag()
    {
        return StopIfVerifyErrorFlag;
    }

    //*****************************************************************************
    /** Find out whether strict XML syntax is to be rigidly applied.
     *
     *  @returns  True means we apply strict XML syntax
     */
    public boolean getStrictXmlSyntaxFlag()
    {
        return StrictXmlSyntaxFlag;
    }

    //*****************************************************************************
    /** Return the number of lines parsed so far.
     *
     *  @return  The number of lines parsed up to this point
     */
    public int getLineCount()
    {
        return LineCount;
    }

    //*****************************************************************************
    /** Return the number of characters parsed so far.
     *
     *  @return  The number of characters parsed up to this point
     */
    public int getTotalCharCount()
    {
        return TotalCharCount;
    }

    //*****************************************************************************
    /** Determine if we have reached the end of the input source.
     *
     *  @return  True if at end of source
     */
    protected boolean atEnd()
    {
        return CurrentCharAsInt == -1;
    }

    //*****************************************************************************
    /** Read the next character from the source. Input is not buffered.
     *
     *  @exception  IOException  Error reading from source reader
     */
    protected void readNextChar() throws IOException
    {
        CurrentCharAsInt = SourceReader.read();
        CurrentChar      = (char)CurrentCharAsInt;
        if (CurrentChar == '\n')
        {
            LineCount++;
            ColumnCount = 0;
        }
        else
            ColumnCount++;
        TotalCharCount++;
//        eh_Debug.add(9, "xe_Tokenizer.readNextChar: '" + (char)CurrentChar + "'");
    }

    //*****************************************************************************
    /** Get the last character read from the source.
     *
     *  @returns  The character which was last read from the source reader
     */
//    private char getCurrentChar()
//    {
//        return CurrentChar;
//    }
}

//*****************************************************************************
