Index: src/Common/NetTopologySuite/Utilities/RToolsUtil/StreamTokenizer.cs =================================================================== diff -u -r8f6ae890fed8e8eae3a32f9c0498a10f82e0ddf9 -r5fc71a385897af92ccb092f2f969b5709afab85a --- src/Common/NetTopologySuite/Utilities/RToolsUtil/StreamTokenizer.cs (.../StreamTokenizer.cs) (revision 8f6ae890fed8e8eae3a32f9c0498a10f82e0ddf9) +++ src/Common/NetTopologySuite/Utilities/RToolsUtil/StreamTokenizer.cs (.../StreamTokenizer.cs) (revision 5fc71a385897af92ccb092f2f969b5709afab85a) @@ -36,148 +36,229 @@ namespace RTools_NTS.Util { - // --------------------------------------------------------------------- - #region Exceptions - // --------------------------------------------------------------------- + // --------------------------------------------------------------------- - /// - /// Exception class for unterminated tokens. - /// - public class StreamTokenizerUntermException : Exception - { - /// - /// Construct with a particular message. - /// - /// The message to store in this object. - public StreamTokenizerUntermException(string msg) : base(msg) {} - } + #region Exceptions - /// - /// Exception class for unterminated quotes. - /// - public class StreamTokenizerUntermQuoteException : StreamTokenizerUntermException - { - /// - /// Construct with a particular message. - /// - /// The message to store in this object. - public StreamTokenizerUntermQuoteException(string msg) : base(msg) {} - } + // --------------------------------------------------------------------- - /// - /// Exception class for unterminated block comments. - /// - public class StreamTokenizerUntermCommentException : StreamTokenizerUntermException - { - /// - /// Construct with a particular message. - /// - /// The message to store in this object. - public StreamTokenizerUntermCommentException(string msg) : base(msg) {} - } + /// + /// Exception class for unterminated tokens. + /// + public class StreamTokenizerUntermException : Exception + { + /// + /// Construct with a particular message. + /// + /// The message to store in this object. + public StreamTokenizerUntermException(string msg) : base(msg) {} + } - #endregion + /// + /// Exception class for unterminated quotes. + /// + public class StreamTokenizerUntermQuoteException : StreamTokenizerUntermException + { + /// + /// Construct with a particular message. + /// + /// The message to store in this object. + public StreamTokenizerUntermQuoteException(string msg) : base(msg) {} + } - // --------------------------------------------------------------------- - #region Enumerations - // --------------------------------------------------------------------- + /// + /// Exception class for unterminated block comments. + /// + public class StreamTokenizerUntermCommentException : StreamTokenizerUntermException + { + /// + /// Construct with a particular message. + /// + /// The message to store in this object. + public StreamTokenizerUntermCommentException(string msg) : base(msg) {} + } - /// - /// Bitwise enumeration for character types. - /// - [Flags] - public enum CharTypeBits : byte - { - /// word characters (usually alpha, digits, and domain specific) - Word = 1, - /// # or something for line comments - Comment = 2, - /// whitespace - Whitespace = 4, - /// ' or " type - Quote = 8, - /// usually 0 to 9 - Digit = 16, - /// usually 0 to 9, a-f and A-F - HexDigit = 32, - /// eof char - Eof = 64 - } + #endregion - #endregion + // --------------------------------------------------------------------- - /// - /// This contains the settings that control the behavior of the tokenizer. - /// This is separated from the StreamTokenizer so that common settings - /// are easy to package and keep together. - /// - [Serializable] - public class StreamTokenizerSettings - { - // --------------------------------------------------------------------- - #region Properties - // --------------------------------------------------------------------- + #region Enumerations - private byte[] charTypes; - /// - /// This is the character type table. Each byte is bitwise encoded - /// with the character attributes, such as whether that character is - /// word or whitespace. - /// - public byte[] CharTypes { get { return(charTypes); } } + // --------------------------------------------------------------------- - bool grabWhitespace; - /// - /// Whether or not to return whitespace tokens. If not, they're ignored. - /// - public bool GrabWhitespace { get { return(grabWhitespace); } set { grabWhitespace = value; } } + /// + /// Bitwise enumeration for character types. + /// + [Flags] + public enum CharTypeBits : byte + { + /// word characters (usually alpha, digits, and domain specific) + Word = 1, - bool grabEol; - /// - /// Whether or not to return EolTokens on end of line. Eol tokens will not - /// break up other tokens which can be multi-line. For example block comments - /// and quotes will not be broken by Eol tokens. Therefore the number of - /// Eol tokens does not give you the line count of a stream. - /// - public bool GrabEol { get { return(grabEol); } set { grabEol = value; } } + /// # or something for line comments + Comment = 2, - bool slashSlashComments; - /// - /// Whether or not to look for // comments - /// - public bool SlashSlashComments { get { return(slashSlashComments); } set { slashSlashComments = value; } } + /// whitespace + Whitespace = 4, - bool slashStarComments; - /// - /// Whether or not to look for /* */ block comments. - /// - public bool SlashStarComments { get { return(slashStarComments); } set { slashStarComments = value; } } + /// ' or " type + Quote = 8, - bool grabComments; - /// - /// Whether or not to return comments. - /// - public bool GrabComments { get { return(grabComments); } set { grabComments = value; } } + /// usually 0 to 9 + Digit = 16, - bool doUntermCheck; - /// - /// Whether or not to check for unterminated quotes and block comments. - /// If true, and one is encoutered, an exception is thrown of the appropriate type. - /// - public bool DoUntermCheck { get { return(doUntermCheck); } set { doUntermCheck = value; } } + /// usually 0 to 9, a-f and A-F + HexDigit = 32, - bool parseNumbers; - /// - /// Whether or not digits are specified as Digit type in the - /// character table. - /// This setting is based on the character types table, so this - /// setting interacts with character type table manipulation. - /// This setting may become incorrect if you modify the character - /// types table directly. - /// - public bool ParseNumbers - { - get { return(parseNumbers); } + /// eof char + Eof = 64 + } + + #endregion + + /// + /// This contains the settings that control the behavior of the tokenizer. + /// This is separated from the StreamTokenizer so that common settings + /// are easy to package and keep together. + /// + [Serializable] + public class StreamTokenizerSettings + { + // --------------------------------------------------------------------- + + #region Properties + + // --------------------------------------------------------------------- + + /// + /// This is the character type table. Each byte is bitwise encoded + /// with the character attributes, such as whether that character is + /// word or whitespace. + /// + public byte[] CharTypes { get; private set; } + + private bool grabWhitespace; + + /// + /// Whether or not to return whitespace tokens. If not, they're ignored. + /// + public bool GrabWhitespace + { + get + { + return (grabWhitespace); + } + set + { + grabWhitespace = value; + } + } + + private bool grabEol; + + /// + /// Whether or not to return EolTokens on end of line. Eol tokens will not + /// break up other tokens which can be multi-line. For example block comments + /// and quotes will not be broken by Eol tokens. Therefore the number of + /// Eol tokens does not give you the line count of a stream. + /// + public bool GrabEol + { + get + { + return (grabEol); + } + set + { + grabEol = value; + } + } + + private bool slashSlashComments; + + /// + /// Whether or not to look for // comments + /// + public bool SlashSlashComments + { + get + { + return (slashSlashComments); + } + set + { + slashSlashComments = value; + } + } + + private bool slashStarComments; + + /// + /// Whether or not to look for /* */ block comments. + /// + public bool SlashStarComments + { + get + { + return (slashStarComments); + } + set + { + slashStarComments = value; + } + } + + private bool grabComments; + + /// + /// Whether or not to return comments. + /// + public bool GrabComments + { + get + { + return (grabComments); + } + set + { + grabComments = value; + } + } + + private bool doUntermCheck; + + /// + /// Whether or not to check for unterminated quotes and block comments. + /// If true, and one is encoutered, an exception is thrown of the appropriate type. + /// + public bool DoUntermCheck + { + get + { + return (doUntermCheck); + } + set + { + doUntermCheck = value; + } + } + + private bool parseNumbers; + + /// + /// Whether or not digits are specified as Digit type in the + /// character table. + /// This setting is based on the character types table, so this + /// setting interacts with character type table manipulation. + /// This setting may become incorrect if you modify the character + /// types table directly. + /// + public bool ParseNumbers + { + get + { + return (parseNumbers); + } /* dropped for speed, this means this property isn't accurate if * character types table is modified directly. * { @@ -192,38 +273,40 @@ return(true); } */ - set - { - if (value) - { - for (int i = '0'; i <= '9'; i++) - charTypes[i] |= (byte)CharTypeBits.Digit; - } - else - { - byte digit = (byte)CharTypeBits.Digit; + set + { + if (value) + { + for (int i = '0'; i <= '9'; i++) + { + CharTypes[i] |= (byte) CharTypeBits.Digit; + } + } + else + { + byte digit = (byte) CharTypeBits.Digit; - for (int i = '0'; i <= '9'; i++) - { - charTypes[i] &= (byte)(~digit); // not digit - } - } - parseNumbers = value; - } - } + for (int i = '0'; i <= '9'; i++) + { + CharTypes[i] &= (byte) (~digit); // not digit + } + } + parseNumbers = value; + } + } - bool parseHexNumbers; + private bool parseHexNumbers; - /// - /// Whether or not to parse Hex (0xABCD...) numbers. - /// This setting is based on the character types table, so this - /// setting interacts with character type table manipulation. - /// - public bool ParseHexNumbers - { - get - { - return(parseHexNumbers); + /// + /// Whether or not to parse Hex (0xABCD...) numbers. + /// This setting is based on the character types table, so this + /// setting interacts with character type table manipulation. + /// + public bool ParseHexNumbers + { + get + { + return (parseHexNumbers); // for (int i = 'A'; i <= 'F'; i++) // { // if (!IsCharType((char)i, CharTypeBits.Digit)) @@ -241,1357 +324,1494 @@ // if (!IsCharType('x', CharTypeBits.Digit)) return(false); // // return(true); - } - set - { - parseHexNumbers = value; - if (parseHexNumbers) - { - for (int i = '0'; i <= '9'; i++) - charTypes[i] |= (byte)CharTypeBits.HexDigit; - for (int i = 'A'; i <= 'F'; i++) - charTypes[i] |= (byte)CharTypeBits.HexDigit; - for (int i = 'a'; i <= 'f'; i++) - charTypes[i] |= (byte)CharTypeBits.HexDigit; - charTypes[(int)'x'] |= (byte)CharTypeBits.HexDigit; - } - else - { - byte digit = (byte)CharTypeBits.HexDigit; + } + set + { + parseHexNumbers = value; + if (parseHexNumbers) + { + for (int i = '0'; i <= '9'; i++) + { + CharTypes[i] |= (byte) CharTypeBits.HexDigit; + } + for (int i = 'A'; i <= 'F'; i++) + { + CharTypes[i] |= (byte) CharTypeBits.HexDigit; + } + for (int i = 'a'; i <= 'f'; i++) + { + CharTypes[i] |= (byte) CharTypeBits.HexDigit; + } + CharTypes[(int) 'x'] |= (byte) CharTypeBits.HexDigit; + } + else + { + byte digit = (byte) CharTypeBits.HexDigit; - for (int i = 'A'; i <= 'F'; i++) - { - charTypes[i] &= (byte)(~digit); // not digit - } - for (int i = 'a'; i <= 'f'; i++) - { - charTypes[i] &= (byte)(~digit); // not digit - } - charTypes[(int)'x'] &= (byte)(~digit); - } - } - } + for (int i = 'A'; i <= 'F'; i++) + { + CharTypes[i] &= (byte) (~digit); // not digit + } + for (int i = 'a'; i <= 'f'; i++) + { + CharTypes[i] &= (byte) (~digit); // not digit + } + CharTypes[(int) 'x'] &= (byte) (~digit); + } + } + } - #endregion + #endregion - // --------------------------------------------------------------------- - #region Constructors/Destructor - // --------------------------------------------------------------------- + // --------------------------------------------------------------------- - /// - /// Default constructor. - /// - public StreamTokenizerSettings() - { - charTypes = new byte[StreamTokenizer.NChars + 1]; // plus an EOF entry - SetDefaults(); - } + #region Constructors/Destructor - /// - /// Copy constructor. - /// - public StreamTokenizerSettings(StreamTokenizerSettings other) - { - Copy(other); - } + // --------------------------------------------------------------------- - /// - /// Sets this object to be the same as the specified object. - /// Note that some settings which are entirely embodied by the character - /// type table. - /// - public void Copy(StreamTokenizerSettings other) - { - charTypes = new byte[StreamTokenizer.NChars + 1]; // plus an EOF entry - Array.Copy(other.charTypes, 0, charTypes, 0, charTypes.Length); - - grabWhitespace = other.grabWhitespace; - grabEol = other.grabEol; - slashSlashComments = other.slashSlashComments; - slashStarComments = other.slashStarComments; - grabComments = other.grabComments; - doUntermCheck = other.doUntermCheck; + /// + /// Default constructor. + /// + public StreamTokenizerSettings() + { + CharTypes = new byte[StreamTokenizer.NChars + 1]; // plus an EOF entry + SetDefaults(); + } - parseHexNumbers = other.parseHexNumbers; - } + /// + /// Copy constructor. + /// + public StreamTokenizerSettings(StreamTokenizerSettings other) + { + Copy(other); + } - #endregion + /// + /// Sets this object to be the same as the specified object. + /// Note that some settings which are entirely embodied by the character + /// type table. + /// + public void Copy(StreamTokenizerSettings other) + { + CharTypes = new byte[StreamTokenizer.NChars + 1]; // plus an EOF entry + Array.Copy(other.CharTypes, 0, CharTypes, 0, CharTypes.Length); - // --------------------------------------------------------------------- - #region main Setup - // --------------------------------------------------------------------- + grabWhitespace = other.grabWhitespace; + grabEol = other.grabEol; + slashSlashComments = other.slashSlashComments; + slashStarComments = other.slashStarComments; + grabComments = other.grabComments; + doUntermCheck = other.doUntermCheck; - /// - /// Setup default parse behavior. - /// This resets to same behavior as on construction. - /// - /// bool - true for success. - public bool SetDefaults() - { - slashStarComments = false; - grabComments = false; - slashSlashComments = false; - grabWhitespace = false; - doUntermCheck = true; - grabEol = false; + parseHexNumbers = other.parseHexNumbers; + } - // setup table - ResetCharTypeTable(); - ParseNumbers = true; - ParseHexNumbers = true; - WordChars('A', 'Z'); - WordChars('a', 'z'); - WhitespaceChars(0, ' '); - QuoteChar('\''); - QuoteChar('"'); - WordChars('0', '9'); + #endregion - return(true); - } + // --------------------------------------------------------------------- - /// - /// Apply settings which are commonly used for code parsing - /// C-endCapStyle code, including C++, C#, and Java. - /// - /// - public bool SetupForCodeParse() - { - GrabWhitespace = true; - GrabComments = true; - SlashSlashComments = true; - DoUntermCheck = true; - SlashStarComments = true; - WordChar('_'); - ParseNumbers = true; - ParseHexNumbers = true; - return(true); - } + #region main Setup - #endregion + // --------------------------------------------------------------------- - // --------------------------------------------------------------------- - #region Character Table Setup - // --------------------------------------------------------------------- + /// + /// Setup default parse behavior. + /// This resets to same behavior as on construction. + /// + /// bool - true for success. + public bool SetDefaults() + { + slashStarComments = false; + grabComments = false; + slashSlashComments = false; + grabWhitespace = false; + doUntermCheck = true; + grabEol = false; - /// - /// Clear the character type settings. This leaves them unset, - /// as opposed to the default. Use SetDefaults() for default - /// settings. - /// - public void ResetCharTypeTable() - { - Array.Clear(charTypes, 0, charTypes.Length); - charTypes[StreamTokenizer.NChars] = (byte)CharTypeBits.Eof; // last entry for Eof - } + // setup table + ResetCharTypeTable(); + ParseNumbers = true; + ParseHexNumbers = true; + WordChars('A', 'Z'); + WordChars('a', 'z'); + WhitespaceChars(0, ' '); + QuoteChar('\''); + QuoteChar('"'); + WordChars('0', '9'); - /// - /// Specify that a particular character is a word character. - /// Character table type manipulation method. - /// This adds the type to the char(s), rather - /// than overwriting other types. - /// - /// The character. - public void WordChar(int c) - { - charTypes[c] |= (byte)CharTypeBits.Word; - } + return (true); + } - /// - /// Specify that a range of characters are word characters. - /// Character table type manipulation method. - /// This adds the type to the char(s), rather - /// than overwriting other types. - /// - /// First character. - /// Last character. - public void WordChars(int startChar, int endChar) - { - for (int i = startChar; i <= endChar; i++) - { - charTypes[i] |= (byte)CharTypeBits.Word; - } - } + /// + /// Apply settings which are commonly used for code parsing + /// C-endCapStyle code, including C++, C#, and Java. + /// + /// + public bool SetupForCodeParse() + { + GrabWhitespace = true; + GrabComments = true; + SlashSlashComments = true; + DoUntermCheck = true; + SlashStarComments = true; + WordChar('_'); + ParseNumbers = true; + ParseHexNumbers = true; + return (true); + } - /// - /// Specify that a string of characters are word characters. - /// Character table type manipulation method. - /// This adds the type to the char(s), rather - /// than overwriting other types. - /// - /// - public void WordChars(string s) - { - for (int i = 0; i < s.Length; i++) - charTypes[s[i]] |= (byte)CharTypeBits.Word; - } + #endregion - /// - /// Specify that a character is a whitespace character. - /// Character table type manipulation method. - /// This type is exclusive with other types. - /// - /// The character. - public void WhitespaceChar(int c) - { - charTypes[c] = (byte)CharTypeBits.Whitespace; - } + // --------------------------------------------------------------------- - /// - /// Specify that a range of characters are whitespace characters. - /// Character table type manipulation method. - /// This adds the characteristic to the char(s), rather - /// than overwriting other characteristics. - /// - /// First character. - /// Last character. - public void WhitespaceChars(int startChar, int endChar) - { - for (int i = startChar; i <= endChar; i++) - charTypes[i] = (byte)CharTypeBits.Whitespace; - } + #region Character Table Setup - /// - /// Remove other type settings from a range of characters. - /// Character table type manipulation method. - /// - /// - /// - public void OrdinaryChars(int startChar, int endChar) - { - for (int i = startChar; i <= endChar; i++) - charTypes[i] = 0; - } + // --------------------------------------------------------------------- - /// - /// Remove other type settings from a character. - /// Character table type manipulation method. - /// - /// - public void OrdinaryChar(int c) - { - charTypes[c] = 0; - } + /// + /// Clear the character type settings. This leaves them unset, + /// as opposed to the default. Use SetDefaults() for default + /// settings. + /// + public void ResetCharTypeTable() + { + Array.Clear(CharTypes, 0, CharTypes.Length); + CharTypes[StreamTokenizer.NChars] = (byte) CharTypeBits.Eof; // last entry for Eof + } - /// - /// Specify that a particular character is a comment-starting character. - /// Character table type manipulation method. - /// - /// - public void CommentChar(int c) - { - charTypes[c] = (byte)CharTypeBits.Comment; - } + /// + /// Specify that a particular character is a word character. + /// Character table type manipulation method. + /// This adds the type to the char(s), rather + /// than overwriting other types. + /// + /// The character. + public void WordChar(int c) + { + CharTypes[c] |= (byte) CharTypeBits.Word; + } - /// - /// Specify that a particular character is a quote character. - /// Character table type manipulation method. - /// - /// - public void QuoteChar(int c) - { - charTypes[c] = (byte)CharTypeBits.Quote; - } + /// + /// Specify that a range of characters are word characters. + /// Character table type manipulation method. + /// This adds the type to the char(s), rather + /// than overwriting other types. + /// + /// First character. + /// Last character. + public void WordChars(int startChar, int endChar) + { + for (int i = startChar; i <= endChar; i++) + { + CharTypes[i] |= (byte) CharTypeBits.Word; + } + } - #endregion + /// + /// Specify that a string of characters are word characters. + /// Character table type manipulation method. + /// This adds the type to the char(s), rather + /// than overwriting other types. + /// + /// + public void WordChars(string s) + { + for (int i = 0; i < s.Length; i++) + { + CharTypes[s[i]] |= (byte) CharTypeBits.Word; + } + } - // --------------------------------------------------------------------- - #region Utility Methods - // --------------------------------------------------------------------- + /// + /// Specify that a character is a whitespace character. + /// Character table type manipulation method. + /// This type is exclusive with other types. + /// + /// The character. + public void WhitespaceChar(int c) + { + CharTypes[c] = (byte) CharTypeBits.Whitespace; + } - /// - /// Return a string representation of a character type setting. - /// Since the type setting is bitwise encoded, a character - /// can have more than one type. - /// - /// The character type byte. - /// The string representation of the type flags. - public string CharTypeToString(byte ctype) - { - StringBuilder str = new StringBuilder(); + /// + /// Specify that a range of characters are whitespace characters. + /// Character table type manipulation method. + /// This adds the characteristic to the char(s), rather + /// than overwriting other characteristics. + /// + /// First character. + /// Last character. + public void WhitespaceChars(int startChar, int endChar) + { + for (int i = startChar; i <= endChar; i++) + { + CharTypes[i] = (byte) CharTypeBits.Whitespace; + } + } - if (IsCharType(ctype, CharTypeBits.Quote)) str.Append('q'); - if (IsCharType(ctype, CharTypeBits.Comment)) str.Append('m'); - if (IsCharType(ctype, CharTypeBits.Whitespace)) str.Append('w'); - if (IsCharType(ctype, CharTypeBits.Digit)) str.Append('d'); - if (IsCharType(ctype, CharTypeBits.Word)) str.Append('a'); - if (IsCharType(ctype, CharTypeBits.Eof)) str.Append('e'); - if (str.Length == 0) - { - str.Append('c'); - } - return(str.ToString()); - } + /// + /// Remove other type settings from a range of characters. + /// Character table type manipulation method. + /// + /// + /// + public void OrdinaryChars(int startChar, int endChar) + { + for (int i = startChar; i <= endChar; i++) + { + CharTypes[i] = 0; + } + } - /// - /// Check whether the specified char type byte has a - /// particular type flag set. - /// - /// The char type byte. - /// The CharTypeBits entry to compare to. - /// bool - true or false - public bool IsCharType(byte ctype, CharTypeBits type) - { - return((ctype & (byte)type) != 0); - } + /// + /// Remove other type settings from a character. + /// Character table type manipulation method. + /// + /// + public void OrdinaryChar(int c) + { + CharTypes[c] = 0; + } - /// - /// Check whether the specified char has a - /// particular type flag set. - /// - /// The character. - /// The CharTypeBits entry to compare to. - /// bool - true or false - public bool IsCharType(char c, CharTypeBits type) - { - return((charTypes[c] & (byte)type) != 0); - } + /// + /// Specify that a particular character is a comment-starting character. + /// Character table type manipulation method. + /// + /// + public void CommentChar(int c) + { + CharTypes[c] = (byte) CharTypeBits.Comment; + } - /// - /// Check whether the specified char has a - /// particular type flag set. - /// - /// The character. - /// The CharTypeBits entry to compare to. - /// bool - true or false - public bool IsCharType(int c, CharTypeBits type) - { - return((charTypes[c] & (byte)type) != 0); - } + /// + /// Specify that a particular character is a quote character. + /// Character table type manipulation method. + /// + /// + public void QuoteChar(int c) + { + CharTypes[c] = (byte) CharTypeBits.Quote; + } - #endregion + #endregion - // --------------------------------------------------------------------- - #region Standard Methods - // --------------------------------------------------------------------- + // --------------------------------------------------------------------- - /// - /// Display the state of this object. - /// - public void Display() - { - Display(String.Empty); - } + #region Utility Methods - /// - /// Display the state of this object, with a per-line prefix. - /// - /// The pre-line prefix. - public void Display(string prefix) - { - } - #endregion - } + // --------------------------------------------------------------------- - /// - /// A StreamTokenizer similar to Java's. This breaks an input stream - /// (coming from a TextReader) into Tokens based on various settings. The settings - /// are stored in the TokenizerSettings property, which is a - /// StreamTokenizerSettings instance. - /// - /// - /// - /// This is configurable in that you can modify TokenizerSettings.CharTypes[] array - /// to specify which characters are which type, along with other settings - /// such as whether to look for comments or not. - /// - /// - /// WARNING: This is not internationalized. This treats all characters beyond - /// the 7-bit ASCII range (decimal 127) as Word characters. - /// - /// - /// There are two main ways to use this: 1) Parse the entire stream at - /// once and get an ArrayList of Tokens (see the Tokenize* methods), - /// and 2) call NextToken() successively. - /// This reads from a TextReader, which you can set directly, and this - /// also provides some convenient methods to parse files and strings. - /// This returns an Eof token if the end of the input is reached. - /// - /// - /// Here's an example of the NextToken() endCapStyle of use: - /// - /// StreamTokenizer tokenizer = new StreamTokenizer(); - /// tokenizer.GrabWhitespace = true; - /// tokenizer.Verbosity = VerbosityLevel.Debug; // just for debugging - /// tokenizer.TextReader = File.OpenText(fileName); - /// Token token; - /// while (tokenizer.NextToken(out token)) log.Info("Token = '{0}'", token); - /// - /// - /// - /// Here's an example of the Tokenize... endCapStyle of use: - /// - /// StreamTokenizer tokenizer = new StreamTokenizer("some string"); - /// ArrayList tokens = new ArrayList(); - /// if (!tokenizer.Tokenize(tokens)) - /// { - /// // error handling - /// } - /// foreach (Token t in tokens) Console.WriteLine("t = {0}", t); - /// - /// - /// - /// Comment delimiters are hardcoded (// and /*), not affected by char type table. - /// - /// - /// This sets line numbers in the tokens it produces. These numbers are normally - /// the line on which the token starts. - /// There is one known caveat, and that is that when GrabWhitespace setting - /// is true, and a whitespace token contains a newline, that token's line number - /// will be set to the following line rather than the line on which the token - /// started. - /// - /// - public class StreamTokenizer - { - // ---------------------------------------------------------------- - #region Constants - // ---------------------------------------------------------------- + /// + /// Return a string representation of a character type setting. + /// Since the type setting is bitwise encoded, a character + /// can have more than one type. + /// + /// The character type byte. + /// The string representation of the type flags. + public string CharTypeToString(byte ctype) + { + StringBuilder str = new StringBuilder(); - /// - /// This is the number of characters in the character table. - /// - public static readonly int NChars = 128; - private static readonly int Eof = NChars; - #endregion + if (IsCharType(ctype, CharTypeBits.Quote)) + { + str.Append('q'); + } + if (IsCharType(ctype, CharTypeBits.Comment)) + { + str.Append('m'); + } + if (IsCharType(ctype, CharTypeBits.Whitespace)) + { + str.Append('w'); + } + if (IsCharType(ctype, CharTypeBits.Digit)) + { + str.Append('d'); + } + if (IsCharType(ctype, CharTypeBits.Word)) + { + str.Append('a'); + } + if (IsCharType(ctype, CharTypeBits.Eof)) + { + str.Append('e'); + } + if (str.Length == 0) + { + str.Append('c'); + } + return (str.ToString()); + } - // ---------------------------------------------------------------- - #region Private Fields - // ---------------------------------------------------------------- + /// + /// Check whether the specified char type byte has a + /// particular type flag set. + /// + /// The char type byte. + /// The CharTypeBits entry to compare to. + /// bool - true or false + public bool IsCharType(byte ctype, CharTypeBits type) + { + return ((ctype & (byte) type) != 0); + } - // A class for verbosity/message handling - private Logger log; + /// + /// Check whether the specified char has a + /// particular type flag set. + /// + /// The character. + /// The CharTypeBits entry to compare to. + /// bool - true or false + public bool IsCharType(char c, CharTypeBits type) + { + return ((CharTypes[c] & (byte) type) != 0); + } - // The TextReader we're reading from - private TextReader textReader; + /// + /// Check whether the specified char has a + /// particular type flag set. + /// + /// The character. + /// The CharTypeBits entry to compare to. + /// bool - true or false + public bool IsCharType(int c, CharTypeBits type) + { + return ((CharTypes[c] & (byte) type) != 0); + } - // buffered wrap of reader - //private BufferedTextReader bufferedReader; // was slower + #endregion - // keep track of current line number during parse - private int lineNumber; + // --------------------------------------------------------------------- - // used to back up in the stream - private CharBuffer backString; + #region Standard Methods - // used to collect characters of the current (next to be - // emitted) token - private CharBuffer nextTokenSb; + // --------------------------------------------------------------------- - // for speed, construct these once and re-use - private CharBuffer tmpSb; - private CharBuffer expSb; + /// + /// Display the state of this object. + /// + public void Display() + { + Display(String.Empty); + } - #endregion + /// + /// Display the state of this object, with a per-line prefix. + /// + /// The pre-line prefix. + public void Display(string prefix) {} - // ---------------------------------------------------------------------- - #region Properties - // ---------------------------------------------------------------------- + #endregion + } - /// - /// This is the TextReader that this object will read from. - /// Set this to set the input reader for the parse. - /// - public TextReader TextReader - { - get { return(textReader); } - set { textReader = value; } - } + /// + /// A StreamTokenizer similar to Java's. This breaks an input stream + /// (coming from a TextReader) into Tokens based on various settings. The settings + /// are stored in the TokenizerSettings property, which is a + /// StreamTokenizerSettings instance. + /// + /// + /// + /// This is configurable in that you can modify TokenizerSettings.CharTypes[] array + /// to specify which characters are which type, along with other settings + /// such as whether to look for comments or not. + /// + /// + /// WARNING: This is not internationalized. This treats all characters beyond + /// the 7-bit ASCII range (decimal 127) as Word characters. + /// + /// + /// There are two main ways to use this: 1) Parse the entire stream at + /// once and get an ArrayList of Tokens (see the Tokenize* methods), + /// and 2) call NextToken() successively. + /// This reads from a TextReader, which you can set directly, and this + /// also provides some convenient methods to parse files and strings. + /// This returns an Eof token if the end of the input is reached. + /// + /// + /// Here's an example of the NextToken() endCapStyle of use: + /// + /// StreamTokenizer tokenizer = new StreamTokenizer(); + /// tokenizer.GrabWhitespace = true; + /// tokenizer.Verbosity = VerbosityLevel.Debug; // just for debugging + /// tokenizer.TextReader = File.OpenText(fileName); + /// Token token; + /// while (tokenizer.NextToken(out token)) log.Info("Token = '{0}'", token); + /// + /// + /// + /// Here's an example of the Tokenize... endCapStyle of use: + /// + /// StreamTokenizer tokenizer = new StreamTokenizer("some string"); + /// ArrayList tokens = new ArrayList(); + /// if (!tokenizer.Tokenize(tokens)) + /// { + /// // error handling + /// } + /// foreach (Token t in tokens) Console.WriteLine("t = {0}", t); + /// + /// + /// + /// Comment delimiters are hardcoded (// and /*), not affected by char type table. + /// + /// + /// This sets line numbers in the tokens it produces. These numbers are normally + /// the line on which the token starts. + /// There is one known caveat, and that is that when GrabWhitespace setting + /// is true, and a whitespace token contains a newline, that token's line number + /// will be set to the following line rather than the line on which the token + /// started. + /// + /// + public class StreamTokenizer + { + // ---------------------------------------------------------------- - private StreamTokenizerSettings settings; - /// - /// The settings which govern the behavior of the tokenization. - /// - public StreamTokenizerSettings Settings { get { return(settings); } } + #region Constants - /// - /// The verbosity level for this object's Logger. - /// - public VerbosityLevel Verbosity - { - get { return(log.Verbosity); } - set { log.Verbosity = value; } - } + // ---------------------------------------------------------------- - #endregion + /// + /// This is the number of characters in the character table. + /// + public static readonly int NChars = 128; - // --------------------------------------------------------------------- - #region Constructors/Destructor - // --------------------------------------------------------------------- + private static readonly int Eof = NChars; - /// - /// Default constructor. - /// - public StreamTokenizer() - { - Initialize(); - } + #endregion - /// - /// Construct and set this object's TextReader to the one specified. - /// - /// The TextReader to read from. - public StreamTokenizer(TextReader sr) - { - Initialize(); - textReader = sr; - } + // ---------------------------------------------------------------- - /// - /// Construct and set a string to tokenize. - /// - /// The string to tokenize. - public StreamTokenizer(string str) - { - Initialize(); - textReader = new StringReader(str); - } + #region Private Fields - /// - /// Utility function, things common to constructors. - /// - void Initialize() - { - log = new Logger("StreamTokenizer"); - log.Verbosity = VerbosityLevel.Warn; - backString = new CharBuffer(32); - nextTokenSb = new CharBuffer(1024); + // ---------------------------------------------------------------- - InitializeStream(); - settings = new StreamTokenizerSettings(); - settings.SetDefaults(); + // A class for verbosity/message handling + private Logger log; - expSb = new CharBuffer(); - tmpSb = new CharBuffer(); - } + // The TextReader we're reading from - /// - /// Clear the stream settings. - /// - void InitializeStream() - { - lineNumber = 1; // base 1 line numbers - textReader = null; - } + // buffered wrap of reader + //private BufferedTextReader bufferedReader; // was slower - #endregion + // keep track of current line number during parse + private int lineNumber; - // --------------------------------------------------------------------- - #region Standard Methods - // --------------------------------------------------------------------- + // used to back up in the stream + private CharBuffer backString; - /// - /// Display the state of this object. - /// - public void Display() - { - Display(String.Empty); - } + // used to collect characters of the current (next to be + // emitted) token + private CharBuffer nextTokenSb; - /// - /// Display the state of this object, with a per-line prefix. - /// - /// The pre-line prefix. - public void Display(string prefix) - { - log.WriteLine(prefix + "StreamTokenizer display:"); - log.WriteLine(prefix + " textReader: {0}", (textReader == null ? "null" : "non-null")); - log.WriteLine(prefix + " backString: {0}", backString); + // for speed, construct these once and re-use + private CharBuffer tmpSb; + private CharBuffer expSb; - if (settings != null) settings.Display(prefix + " "); - } + #endregion - #endregion + // ---------------------------------------------------------------------- - // --------------------------------------------------------------------- - #region NextToken (the state machine) - // --------------------------------------------------------------------- + #region Properties - /// - /// The states of the state machine. - /// - private enum NextTokenState - { - Start, - Whitespace, - Word, - Quote, - EndQuote, - MaybeNumber, // could be number or word - MaybeComment, // after first slash, might be comment or not - MaybeHex, // after 0, may be hex - HexGot0x, // after 0x, may be hex - HexNumber, - LineComment, - BlockComment, - EndBlockComment, - Char, - Eol, - Eof, - Invalid - } + // ---------------------------------------------------------------------- - /// - /// Pick the next state given just a single character. This is used - /// at the start of a new token. - /// - /// The type of the character. - /// The character. - /// The state. - private NextTokenState PickNextState(byte ctype, int c) - { - return(PickNextState(ctype, c, NextTokenState.Start)); - } + /// + /// This is the TextReader that this object will read from. + /// Set this to set the input reader for the parse. + /// + public TextReader TextReader { get; set; } - /// - /// Pick the next state given just a single character. This is used - /// at the start of a new token. - /// - /// The type of the character. - /// The character. - /// Exclude this state from the possible next state. - /// The state. - private NextTokenState PickNextState(byte ctype, int c, NextTokenState excludeState) - { - if (c == '/') - { - return(NextTokenState.MaybeComment); // overrides all other cats - } - else if ((excludeState != NextTokenState.MaybeHex) - && settings.ParseHexNumbers && (c == '0')) - { - return(NextTokenState.MaybeHex); - } - else if ((excludeState != NextTokenState.MaybeNumber) && settings.ParseNumbers - && (settings.IsCharType(ctype, CharTypeBits.Digit) || (c == '-') || (c == '.'))) - { - return(NextTokenState.MaybeNumber); - } - else if (settings.IsCharType(ctype, CharTypeBits.Word)) return(NextTokenState.Word); - else if (settings.GrabEol && (c == 10)) return(NextTokenState.Eol); - else if (settings.IsCharType(ctype, CharTypeBits.Whitespace)) return(NextTokenState.Whitespace); - else if (settings.IsCharType(ctype, CharTypeBits.Comment)) return(NextTokenState.LineComment); - else if (settings.IsCharType(ctype, CharTypeBits.Quote)) return(NextTokenState.Quote); - else if ((c == Eof) || (settings.IsCharType(ctype, CharTypeBits.Eof))) return(NextTokenState.Eof); - return(NextTokenState.Char); - } + /// + /// The settings which govern the behavior of the tokenization. + /// + public StreamTokenizerSettings Settings { get; private set; } - /// - /// Read the next character from the stream, or from backString - /// if we backed up. - /// - /// The next character. - private int GetNextChar() - { - int c; + /// + /// The verbosity level for this object's Logger. + /// + public VerbosityLevel Verbosity + { + get + { + return (log.Verbosity); + } + set + { + log.Verbosity = value; + } + } - // consume from backString if possible - if (backString.Length > 0) - { - c = backString[0]; - backString.Remove(0, 1); - #if DEBUG + #endregion + + // --------------------------------------------------------------------- + + #region Constructors/Destructor + + // --------------------------------------------------------------------- + + /// + /// Default constructor. + /// + public StreamTokenizer() + { + Initialize(); + } + + /// + /// Construct and set this object's TextReader to the one specified. + /// + /// The TextReader to read from. + public StreamTokenizer(TextReader sr) + { + Initialize(); + TextReader = sr; + } + + /// + /// Construct and set a string to tokenize. + /// + /// The string to tokenize. + public StreamTokenizer(string str) + { + Initialize(); + TextReader = new StringReader(str); + } + + /// + /// Utility function, things common to constructors. + /// + private void Initialize() + { + log = new Logger("StreamTokenizer"); + log.Verbosity = VerbosityLevel.Warn; + backString = new CharBuffer(32); + nextTokenSb = new CharBuffer(1024); + + InitializeStream(); + Settings = new StreamTokenizerSettings(); + Settings.SetDefaults(); + + expSb = new CharBuffer(); + tmpSb = new CharBuffer(); + } + + /// + /// Clear the stream settings. + /// + private void InitializeStream() + { + lineNumber = 1; // base 1 line numbers + TextReader = null; + } + + #endregion + + // --------------------------------------------------------------------- + + #region Standard Methods + + // --------------------------------------------------------------------- + + /// + /// Display the state of this object. + /// + public void Display() + { + Display(String.Empty); + } + + /// + /// Display the state of this object, with a per-line prefix. + /// + /// The pre-line prefix. + public void Display(string prefix) + { + log.WriteLine(prefix + "StreamTokenizer display:"); + log.WriteLine(prefix + " textReader: {0}", (TextReader == null ? "null" : "non-null")); + log.WriteLine(prefix + " backString: {0}", backString); + + if (Settings != null) + { + Settings.Display(prefix + " "); + } + } + + #endregion + + // --------------------------------------------------------------------- + + #region NextToken (the state machine) + + // --------------------------------------------------------------------- + + /// + /// The states of the state machine. + /// + private enum NextTokenState + { + Start, + Whitespace, + Word, + Quote, + EndQuote, + MaybeNumber, // could be number or word + MaybeComment, // after first slash, might be comment or not + MaybeHex, // after 0, may be hex + HexGot0x, // after 0x, may be hex + HexNumber, + LineComment, + BlockComment, + EndBlockComment, + Char, + Eol, + Eof, + Invalid + } + + /// + /// Pick the next state given just a single character. This is used + /// at the start of a new token. + /// + /// The type of the character. + /// The character. + /// The state. + private NextTokenState PickNextState(byte ctype, int c) + { + return (PickNextState(ctype, c, NextTokenState.Start)); + } + + /// + /// Pick the next state given just a single character. This is used + /// at the start of a new token. + /// + /// The type of the character. + /// The character. + /// Exclude this state from the possible next state. + /// The state. + private NextTokenState PickNextState(byte ctype, int c, NextTokenState excludeState) + { + if (c == '/') + { + return (NextTokenState.MaybeComment); // overrides all other cats + } + else if ((excludeState != NextTokenState.MaybeHex) + && Settings.ParseHexNumbers && (c == '0')) + { + return (NextTokenState.MaybeHex); + } + else if ((excludeState != NextTokenState.MaybeNumber) && Settings.ParseNumbers + && (Settings.IsCharType(ctype, CharTypeBits.Digit) || (c == '-') || (c == '.'))) + { + return (NextTokenState.MaybeNumber); + } + else if (Settings.IsCharType(ctype, CharTypeBits.Word)) + { + return (NextTokenState.Word); + } + else if (Settings.GrabEol && (c == 10)) + { + return (NextTokenState.Eol); + } + else if (Settings.IsCharType(ctype, CharTypeBits.Whitespace)) + { + return (NextTokenState.Whitespace); + } + else if (Settings.IsCharType(ctype, CharTypeBits.Comment)) + { + return (NextTokenState.LineComment); + } + else if (Settings.IsCharType(ctype, CharTypeBits.Quote)) + { + return (NextTokenState.Quote); + } + else if ((c == Eof) || (Settings.IsCharType(ctype, CharTypeBits.Eof))) + { + return (NextTokenState.Eof); + } + return (NextTokenState.Char); + } + + /// + /// Read the next character from the stream, or from backString + /// if we backed up. + /// + /// The next character. + private int GetNextChar() + { + int c; + + // consume from backString if possible + if (backString.Length > 0) + { + c = backString[0]; + backString.Remove(0, 1); +#if DEBUG log.Debug("Backup char '{0}'", (char)c); #endif - return(c); - } + return (c); + } - if (textReader == null) return(Eof); + if (TextReader == null) + { + return (Eof); + } - try - { - while((c = textReader.Read()) == 13) {} // skip LF (13) - } - catch(Exception) - { - return(Eof); - } + try + { + while ((c = TextReader.Read()) == 13) {} // skip LF (13) + } + catch (Exception) + { + return (Eof); + } - if (c == 10) - { - lineNumber++; - #if DEBUG + if (c == 10) + { + lineNumber++; +#if DEBUG log.Debug("Line number incremented to {0}", lineNumber); #endif - } - else if (c < 0) - { - c = Eof; - } + } + else if (c < 0) + { + c = Eof; + } - #if DEBUG +#if DEBUG log.Debug("Read char '{0}' ({1})", (char)c, c); #endif - return(c); - } + return (c); + } - /// - /// Get the next token. The last token will be an EofToken unless - /// there's an unterminated quote or unterminated block comment - /// and Settings.DoUntermCheck is true, in which case this throws - /// an exception of type StreamTokenizerUntermException or sub-class. - /// - /// The output token. - /// bool - true for success, false for failure. - public bool NextToken(out Token token) - { - token = null; - int thisChar = 0; // current character - byte ctype; // type of this character + /// + /// Get the next token. The last token will be an EofToken unless + /// there's an unterminated quote or unterminated block comment + /// and Settings.DoUntermCheck is true, in which case this throws + /// an exception of type StreamTokenizerUntermException or sub-class. + /// + /// The output token. + /// bool - true for success, false for failure. + public bool NextToken(out Token token) + { + token = null; + int thisChar = 0; // current character + byte ctype; // type of this character - NextTokenState state = NextTokenState.Start; - int prevChar = 0; // previous character - byte prevCtype = (byte)CharTypeBits.Eof; + NextTokenState state = NextTokenState.Start; + int prevChar = 0; // previous character + byte prevCtype = (byte) CharTypeBits.Eof; - // get previous char from nextTokenSb if there - // (nextTokenSb is a StringBuilder containing the characters - // of the next token to be emitted) - if (nextTokenSb.Length > 0) - { - prevChar = nextTokenSb[nextTokenSb.Length - 1]; - prevCtype = settings.CharTypes[prevChar]; - state = PickNextState(prevCtype, prevChar); - } + // get previous char from nextTokenSb if there + // (nextTokenSb is a StringBuilder containing the characters + // of the next token to be emitted) + if (nextTokenSb.Length > 0) + { + prevChar = nextTokenSb[nextTokenSb.Length - 1]; + prevCtype = Settings.CharTypes[prevChar]; + state = PickNextState(prevCtype, prevChar); + } - // extra state for number parse - int seenDot = 0; // how many .'s in the number - int seenE = 0; // how many e's or E's have we seen in the number - bool seenDigit = false; // seen any digits (numbers can start with -) + // extra state for number parse + int seenDot = 0; // how many .'s in the number + int seenE = 0; // how many e's or E's have we seen in the number + bool seenDigit = false; // seen any digits (numbers can start with -) - // lineNumber can change with each GetNextChar() - // tokenLineNumber is the line on which the token started - int tokenLineNumber = lineNumber; + // lineNumber can change with each GetNextChar() + // tokenLineNumber is the line on which the token started + int tokenLineNumber = lineNumber; - // State Machine: Produces a single token. - // Enter a state based on a single character. - // Generally, being in a state means we're currently collecting chars - // in that type of token. - // We do state machine until it builds a token (Eof is a token), then - // return that token. - thisChar = prevChar; // for first iteration, since prevChar is set to this - bool done = false; // optimization - while (!done) - { - prevChar = thisChar; - thisChar = GetNextChar(); - if (thisChar >= settings.CharTypes.Length) - { - // greater than 7-bit ascii, treat as word character - ctype = (byte)CharTypeBits.Word; - } - else ctype = settings.CharTypes[thisChar]; + // State Machine: Produces a single token. + // Enter a state based on a single character. + // Generally, being in a state means we're currently collecting chars + // in that type of token. + // We do state machine until it builds a token (Eof is a token), then + // return that token. + thisChar = prevChar; // for first iteration, since prevChar is set to this + bool done = false; // optimization + while (!done) + { + prevChar = thisChar; + thisChar = GetNextChar(); + if (thisChar >= Settings.CharTypes.Length) + { + // greater than 7-bit ascii, treat as word character + ctype = (byte) CharTypeBits.Word; + } + else + { + ctype = Settings.CharTypes[thisChar]; + } - #if DEBUG +#if DEBUG log.Debug("Before switch: state = {0}, thisChar = '{1}'", state, (char)thisChar); #endif - // see if we need to change states, or emit a token - switch(state) - { - case NextTokenState.Start: - // RESET - state = PickNextState(ctype, thisChar); - tokenLineNumber = lineNumber; - break; + // see if we need to change states, or emit a token + switch (state) + { + case NextTokenState.Start: + // RESET + state = PickNextState(ctype, thisChar); + tokenLineNumber = lineNumber; + break; - case NextTokenState.Char: - token = new CharToken((char)prevChar, tokenLineNumber); - done = true; - nextTokenSb.Length = 0; - break; + case NextTokenState.Char: + token = new CharToken((char) prevChar, tokenLineNumber); + done = true; + nextTokenSb.Length = 0; + break; - case NextTokenState.Word: - if ((!settings.IsCharType(ctype, CharTypeBits.Word)) - && (!settings.IsCharType(ctype, CharTypeBits.Digit))) - { - // end of word, emit - token = new WordToken(nextTokenSb.ToString(), tokenLineNumber); - done = true; - nextTokenSb.Length = 0; - } - break; + case NextTokenState.Word: + if ((!Settings.IsCharType(ctype, CharTypeBits.Word)) + && (!Settings.IsCharType(ctype, CharTypeBits.Digit))) + { + // end of word, emit + token = new WordToken(nextTokenSb.ToString(), tokenLineNumber); + done = true; + nextTokenSb.Length = 0; + } + break; - case NextTokenState.Whitespace: - if (!settings.IsCharType(ctype, CharTypeBits.Whitespace) - || (settings.GrabEol && (thisChar == 10))) - { - // end of whitespace, emit - if (settings.GrabWhitespace) - { - token = new WhitespaceToken(nextTokenSb.ToString(), tokenLineNumber); - done = true; - nextTokenSb.Length = 0; - } - else - { - // RESET - nextTokenSb.Length = 0; - tokenLineNumber = lineNumber; - state = PickNextState(ctype, thisChar); - } - } - break; + case NextTokenState.Whitespace: + if (!Settings.IsCharType(ctype, CharTypeBits.Whitespace) + || (Settings.GrabEol && (thisChar == 10))) + { + // end of whitespace, emit + if (Settings.GrabWhitespace) + { + token = new WhitespaceToken(nextTokenSb.ToString(), tokenLineNumber); + done = true; + nextTokenSb.Length = 0; + } + else + { + // RESET + nextTokenSb.Length = 0; + tokenLineNumber = lineNumber; + state = PickNextState(ctype, thisChar); + } + } + break; - case NextTokenState.EndQuote: - // we're now 1 char after end of quote - token = new QuoteToken(nextTokenSb.ToString(), tokenLineNumber); - done = true; - nextTokenSb.Length = 0; - break; + case NextTokenState.EndQuote: + // we're now 1 char after end of quote + token = new QuoteToken(nextTokenSb.ToString(), tokenLineNumber); + done = true; + nextTokenSb.Length = 0; + break; - case NextTokenState.Quote: - // looking for end quote matching char that started the quote - if (thisChar == nextTokenSb[0]) - { - // handle escaped backslashes: count the immediately prior backslashes - // - even (including 0) means it's not escaped - // - odd means it is escaped - int backSlashCount = 0; - for (int i = nextTokenSb.Length - 1; i >= 0; i--) - { - if (nextTokenSb[ i ] == '\\') backSlashCount++; - else break; - } + case NextTokenState.Quote: + // looking for end quote matching char that started the quote + if (thisChar == nextTokenSb[0]) + { + // handle escaped backslashes: count the immediately prior backslashes + // - even (including 0) means it's not escaped + // - odd means it is escaped + int backSlashCount = 0; + for (int i = nextTokenSb.Length - 1; i >= 0; i--) + { + if (nextTokenSb[i] == '\\') + { + backSlashCount++; + } + else + { + break; + } + } - if ((backSlashCount % 2) == 0) - { - state = NextTokenState.EndQuote; - } - } + if ((backSlashCount%2) == 0) + { + state = NextTokenState.EndQuote; + } + } - if ((state != NextTokenState.EndQuote) && (thisChar == Eof)) - { - if (settings.DoUntermCheck) - { - nextTokenSb.Length = 0; - throw new StreamTokenizerUntermQuoteException("Unterminated quote"); - } + if ((state != NextTokenState.EndQuote) && (thisChar == Eof)) + { + if (Settings.DoUntermCheck) + { + nextTokenSb.Length = 0; + throw new StreamTokenizerUntermQuoteException("Unterminated quote"); + } - token = new QuoteToken(nextTokenSb.ToString(), tokenLineNumber); - done = true; - nextTokenSb.Length = 0; - } - break; + token = new QuoteToken(nextTokenSb.ToString(), tokenLineNumber); + done = true; + nextTokenSb.Length = 0; + } + break; - case NextTokenState.MaybeComment: - if (thisChar == Eof) - { - token = new CharToken(nextTokenSb.ToString(), tokenLineNumber); - done = true; - nextTokenSb.Length = 0; - } - else - { - // if we get the right char, we're in a comment - if (settings.SlashSlashComments && (thisChar == '/')) - state = NextTokenState.LineComment; - else if (settings.SlashStarComments && (thisChar == '*')) - state = NextTokenState.BlockComment; - else - { - token = new CharToken(nextTokenSb.ToString(), tokenLineNumber); - done = true; - nextTokenSb.Length = 0; - } - } - break; + case NextTokenState.MaybeComment: + if (thisChar == Eof) + { + token = new CharToken(nextTokenSb.ToString(), tokenLineNumber); + done = true; + nextTokenSb.Length = 0; + } + else + { + // if we get the right char, we're in a comment + if (Settings.SlashSlashComments && (thisChar == '/')) + { + state = NextTokenState.LineComment; + } + else if (Settings.SlashStarComments && (thisChar == '*')) + { + state = NextTokenState.BlockComment; + } + else + { + token = new CharToken(nextTokenSb.ToString(), tokenLineNumber); + done = true; + nextTokenSb.Length = 0; + } + } + break; - case NextTokenState.LineComment: - if (thisChar == Eof) - { - if (settings.GrabComments) - { - token = new CommentToken(nextTokenSb.ToString(), tokenLineNumber); - done = true; - nextTokenSb.Length = 0; - } - else - { - // RESET - nextTokenSb.Length = 0; - tokenLineNumber = lineNumber; - state = PickNextState(ctype, thisChar); - } - } - else - { - if (thisChar == '\n') - { - if (settings.GrabComments) - { - token = new CommentToken(nextTokenSb.ToString(), tokenLineNumber); - done = true; - nextTokenSb.Length = 0; - } - else - { - // RESET - nextTokenSb.Length = 0; - tokenLineNumber = lineNumber; - state = PickNextState(ctype, thisChar); - } - } - } - break; + case NextTokenState.LineComment: + if (thisChar == Eof) + { + if (Settings.GrabComments) + { + token = new CommentToken(nextTokenSb.ToString(), tokenLineNumber); + done = true; + nextTokenSb.Length = 0; + } + else + { + // RESET + nextTokenSb.Length = 0; + tokenLineNumber = lineNumber; + state = PickNextState(ctype, thisChar); + } + } + else + { + if (thisChar == '\n') + { + if (Settings.GrabComments) + { + token = new CommentToken(nextTokenSb.ToString(), tokenLineNumber); + done = true; + nextTokenSb.Length = 0; + } + else + { + // RESET + nextTokenSb.Length = 0; + tokenLineNumber = lineNumber; + state = PickNextState(ctype, thisChar); + } + } + } + break; - case NextTokenState.BlockComment: - if (thisChar == Eof) - { - if (settings.DoUntermCheck) - { - nextTokenSb.Length = 0; - throw new StreamTokenizerUntermCommentException("Unterminated comment."); - } + case NextTokenState.BlockComment: + if (thisChar == Eof) + { + if (Settings.DoUntermCheck) + { + nextTokenSb.Length = 0; + throw new StreamTokenizerUntermCommentException("Unterminated comment."); + } - if (settings.GrabComments) - { - token = new CommentToken(nextTokenSb.ToString(), tokenLineNumber); - done = true; - nextTokenSb.Length = 0; - } - else - { - // RESET - nextTokenSb.Length = 0; - tokenLineNumber = lineNumber; - state = PickNextState(ctype, thisChar); - } - } - else - { - if ((thisChar == '/') && (prevChar == '*')) - { - state = NextTokenState.EndBlockComment; - } - } - break; + if (Settings.GrabComments) + { + token = new CommentToken(nextTokenSb.ToString(), tokenLineNumber); + done = true; + nextTokenSb.Length = 0; + } + else + { + // RESET + nextTokenSb.Length = 0; + tokenLineNumber = lineNumber; + state = PickNextState(ctype, thisChar); + } + } + else + { + if ((thisChar == '/') && (prevChar == '*')) + { + state = NextTokenState.EndBlockComment; + } + } + break; - // special case for 2-character token termination - case NextTokenState.EndBlockComment: - if (settings.GrabComments) - { - token = new CommentToken(nextTokenSb.ToString(), tokenLineNumber); - done = true; - nextTokenSb.Length = 0; - } - else - { - // RESET - nextTokenSb.Length = 0; - tokenLineNumber = lineNumber; - state = PickNextState(ctype, thisChar); - } - break; + // special case for 2-character token termination + case NextTokenState.EndBlockComment: + if (Settings.GrabComments) + { + token = new CommentToken(nextTokenSb.ToString(), tokenLineNumber); + done = true; + nextTokenSb.Length = 0; + } + else + { + // RESET + nextTokenSb.Length = 0; + tokenLineNumber = lineNumber; + state = PickNextState(ctype, thisChar); + } + break; - case NextTokenState.MaybeHex: - // previous char was 0 - if (thisChar != 'x') - { - // back up and try non-hex - // back up to the 0 - nextTokenSb.Append((char)thisChar); - backString.Append(nextTokenSb); - nextTokenSb.Length = 0; + case NextTokenState.MaybeHex: + // previous char was 0 + if (thisChar != 'x') + { + // back up and try non-hex + // back up to the 0 + nextTokenSb.Append((char) thisChar); + backString.Append(nextTokenSb); + nextTokenSb.Length = 0; - // reset state and don't choose MaybeNumber state. - // pull char from backString - thisChar = backString[0]; - backString.Remove(0, 1); - state = PickNextState(settings.CharTypes[thisChar], (int)thisChar, - NextTokenState.MaybeHex); - #if DEBUG + // reset state and don't choose MaybeNumber state. + // pull char from backString + thisChar = backString[0]; + backString.Remove(0, 1); + state = PickNextState(Settings.CharTypes[thisChar], (int) thisChar, + NextTokenState.MaybeHex); +#if DEBUG log.Debug("HexGot0x: Next state on '{0}' is {1}", (char)thisChar, state); #endif - } - else state = NextTokenState.HexGot0x; - break; + } + else + { + state = NextTokenState.HexGot0x; + } + break; - case NextTokenState.HexGot0x: - if (!settings.IsCharType(ctype, CharTypeBits.HexDigit)) - { - // got 0x but now a non-hex char - // back up to the 0 - nextTokenSb.Append((char)thisChar); - backString.Append(nextTokenSb); - nextTokenSb.Length = 0; + case NextTokenState.HexGot0x: + if (!Settings.IsCharType(ctype, CharTypeBits.HexDigit)) + { + // got 0x but now a non-hex char + // back up to the 0 + nextTokenSb.Append((char) thisChar); + backString.Append(nextTokenSb); + nextTokenSb.Length = 0; - // reset state and don't choose MaybeNumber state. - // pull char from backString - thisChar = backString[0]; - backString.Remove(0, 1); - state = PickNextState(settings.CharTypes[thisChar], (int)thisChar, - NextTokenState.MaybeHex); - #if DEBUG + // reset state and don't choose MaybeNumber state. + // pull char from backString + thisChar = backString[0]; + backString.Remove(0, 1); + state = PickNextState(Settings.CharTypes[thisChar], (int) thisChar, + NextTokenState.MaybeHex); +#if DEBUG log.Debug("HexGot0x: Next state on '{0}' is {1}", (char)thisChar, state); #endif - } - else state = NextTokenState.HexNumber; - break; + } + else + { + state = NextTokenState.HexNumber; + } + break; - case NextTokenState.HexNumber: - if (!settings.IsCharType(ctype, CharTypeBits.HexDigit)) - { - // emit the hex number we've collected - #if DEBUG + case NextTokenState.HexNumber: + if (!Settings.IsCharType(ctype, CharTypeBits.HexDigit)) + { + // emit the hex number we've collected +#if DEBUG log.Debug("Emit hex IntToken from string '{0}'", nextTokenSb); #endif - token = IntToken.ParseHex(nextTokenSb.ToString(), tokenLineNumber); - done = true; - nextTokenSb.Length = 0; - } - break; + token = IntToken.ParseHex(nextTokenSb.ToString(), tokenLineNumber); + done = true; + nextTokenSb.Length = 0; + } + break; - case NextTokenState.MaybeNumber: - // - // Determine whether or not to stop collecting characters for - // the number parse. We terminate when it's clear it's not - // a number or no longer a number. - // - bool term = false; + case NextTokenState.MaybeNumber: + // + // Determine whether or not to stop collecting characters for + // the number parse. We terminate when it's clear it's not + // a number or no longer a number. + // + bool term = false; - if (settings.IsCharType(ctype, CharTypeBits.Digit) - || settings.IsCharType(prevChar, CharTypeBits.Digit)) seenDigit = true; + if (Settings.IsCharType(ctype, CharTypeBits.Digit) + || Settings.IsCharType(prevChar, CharTypeBits.Digit)) + { + seenDigit = true; + } - // term conditions - if (thisChar == '.') - { - seenDot++; - if (seenDot > 1) term = true; // more than one dot, it aint a number - } - else if (((thisChar == 'e') || (thisChar == 'E'))) - { - seenE++; - if (!seenDigit) term = true; // e before any digits is bad - else if (seenE > 1) term = true; // more than 1 e is bad - else - { - term = true; // done regardless + // term conditions + if (thisChar == '.') + { + seenDot++; + if (seenDot > 1) + { + term = true; // more than one dot, it aint a number + } + } + else if (((thisChar == 'e') || (thisChar == 'E'))) + { + seenE++; + if (!seenDigit) + { + term = true; // e before any digits is bad + } + else if (seenE > 1) + { + term = true; // more than 1 e is bad + } + else + { + term = true; // done regardless - // scan the exponent, put its characters into - // nextTokenSb, if there are any - char c; - expSb.Clear(); - expSb.Append((char)thisChar); - if (GrabInt(expSb, true, out c)) - { - // we got a good exponent, tack it on - nextTokenSb.Append(expSb); - thisChar = c; // and continue after the exponent's characters - } - } - } - else if (thisChar == Eof) term = true; - // or a char that can't be in a number - else if ((!settings.IsCharType(ctype, CharTypeBits.Digit) - && (thisChar != 'e') && (thisChar != 'E') - && (thisChar != '-') && (thisChar != '.')) - || ((thisChar == '+') && (seenE == 0))) - { - // it's not a normal number character - term = true; - } - // or a dash not after e - else if ((thisChar == '-') && (!((prevChar == 'e') || (prevChar == 'E')))) term = true; + // scan the exponent, put its characters into + // nextTokenSb, if there are any + char c; + expSb.Clear(); + expSb.Append((char) thisChar); + if (GrabInt(expSb, true, out c)) + { + // we got a good exponent, tack it on + nextTokenSb.Append(expSb); + thisChar = c; // and continue after the exponent's characters + } + } + } + else if (thisChar == Eof) + { + term = true; + } + // or a char that can't be in a number + else if ((!Settings.IsCharType(ctype, CharTypeBits.Digit) + && (thisChar != 'e') && (thisChar != 'E') + && (thisChar != '-') && (thisChar != '.')) + || ((thisChar == '+') && (seenE == 0))) + { + // it's not a normal number character + term = true; + } + // or a dash not after e + else if ((thisChar == '-') && (!((prevChar == 'e') || (prevChar == 'E')))) + { + term = true; + } - if (term) - { - // we are terminating a number, or it wasn't a number - if (seenDigit) - { - if ((nextTokenSb.IndexOf('.') >= 0) - || (nextTokenSb.IndexOf('e') >= 0) - || (nextTokenSb.IndexOf('E') >= 0) - || (nextTokenSb.Length >= 19) // probably too large for Int64, use float - ) - { - token = new FloatToken(nextTokenSb.ToString(), tokenLineNumber); - #if DEBUG + if (term) + { + // we are terminating a number, or it wasn't a number + if (seenDigit) + { + if ((nextTokenSb.IndexOf('.') >= 0) + || (nextTokenSb.IndexOf('e') >= 0) + || (nextTokenSb.IndexOf('E') >= 0) + || (nextTokenSb.Length >= 19) // probably too large for Int64, use float + ) + { + token = new FloatToken(nextTokenSb.ToString(), tokenLineNumber); +#if DEBUG log.Debug("Emit FloatToken from string '{0}'", nextTokenSb); #endif - } - else - { - #if DEBUG + } + else + { +#if DEBUG log.Debug("Emit IntToken from string '{0}'", nextTokenSb); #endif - token = new IntToken(nextTokenSb.ToString(), tokenLineNumber); - } - done = true; - nextTokenSb.Length = 0; - } - else - { - // -whatever or -.whatever - // didn't see any digits, must have gotten here by a leading - - // and no digits after it - // back up to -, pick next state excluding numbers - nextTokenSb.Append((char)thisChar); - backString.Append(nextTokenSb); - nextTokenSb.Length = 0; + token = new IntToken(nextTokenSb.ToString(), tokenLineNumber); + } + done = true; + nextTokenSb.Length = 0; + } + else + { + // -whatever or -.whatever + // didn't see any digits, must have gotten here by a leading - + // and no digits after it + // back up to -, pick next state excluding numbers + nextTokenSb.Append((char) thisChar); + backString.Append(nextTokenSb); + nextTokenSb.Length = 0; - // restart on the - and don't choose MaybeNumber state - // pull char from backString - thisChar = backString[0]; - backString.Remove(0, 1); - state = PickNextState(settings.CharTypes[thisChar], (int)thisChar, - NextTokenState.MaybeNumber); - #if DEBUG + // restart on the - and don't choose MaybeNumber state + // pull char from backString + thisChar = backString[0]; + backString.Remove(0, 1); + state = PickNextState(Settings.CharTypes[thisChar], (int) thisChar, + NextTokenState.MaybeNumber); +#if DEBUG log.Debug("MaybeNumber: Next state on '{0}' is {1}", (char)thisChar, state); #endif - } - } - break; + } + } + break; - case NextTokenState.Eol: - // tokenLineNumber - 1 because the newline char is on the previous line - token = new EolToken(tokenLineNumber - 1); - done = true; - nextTokenSb.Length = 0; - break; + case NextTokenState.Eol: + // tokenLineNumber - 1 because the newline char is on the previous line + token = new EolToken(tokenLineNumber - 1); + done = true; + nextTokenSb.Length = 0; + break; - case NextTokenState.Eof: - token = new EofToken(tokenLineNumber); - done = true; - nextTokenSb.Length = 0; - return(false); + case NextTokenState.Eof: + token = new EofToken(tokenLineNumber); + done = true; + nextTokenSb.Length = 0; + return (false); - case NextTokenState.Invalid: - default: - // not a good sign, some unrepresented state? - log.Error("NextToken: Hit unrepresented state {0}", state); - return(false); - } + case NextTokenState.Invalid: + default: + // not a good sign, some unrepresented state? + log.Error("NextToken: Hit unrepresented state {0}", state); + return (false); + } - // use a StringBuilder to accumulate characters which are part of this token - if (thisChar != Eof) nextTokenSb.Append((char)thisChar); - #if DEBUG + // use a StringBuilder to accumulate characters which are part of this token + if (thisChar != Eof) + { + nextTokenSb.Append((char) thisChar); + } +#if DEBUG log.Debug("After switch: state = {0}, nextTokenSb = '{1}', backString = '{2}'", state, nextTokenSb, backString); #endif - } + } - #if DEBUG +#if DEBUG log.Debug("Got token {0}", token.ToDebugString()); #endif - return(true); - } + return (true); + } - /// - /// Starting from current stream location, scan forward - /// over an int. Determine whether it's an integer or not. If so, - /// push the integer characters to the specified CharBuffer. - /// If not, put them in backString (essentially leave the - /// stream as it was) and return false. - /// - /// If it was an int, the stream is left 1 character after the - /// end of the int, and that character is output in the thisChar parameter. - /// - /// The formats for integers are: 1, +1, and -1 - /// The + and - signs are included in the output buffer. - /// - /// The CharBuffer to append to. - /// Whether or not to consider + to be part - /// of an integer. - /// The last character read by this method. - /// true for parsed an int, false for not an int - private bool GrabInt(CharBuffer sb, bool allowPlus, out char thisChar) - { - tmpSb.Clear(); // use tmp CharBuffer + /// + /// Starting from current stream location, scan forward + /// over an int. Determine whether it's an integer or not. If so, + /// push the integer characters to the specified CharBuffer. + /// If not, put them in backString (essentially leave the + /// stream as it was) and return false. + /// + /// If it was an int, the stream is left 1 character after the + /// end of the int, and that character is output in the thisChar parameter. + /// + /// The formats for integers are: 1, +1, and -1 + /// The + and - signs are included in the output buffer. + /// + /// The CharBuffer to append to. + /// Whether or not to consider + to be part + /// of an integer. + /// The last character read by this method. + /// true for parsed an int, false for not an int + private bool GrabInt(CharBuffer sb, bool allowPlus, out char thisChar) + { + tmpSb.Clear(); // use tmp CharBuffer - // first character can be -, maybe can be + depending on arg - thisChar = (char)GetNextChar(); - if (thisChar == Eof) - { - return(false); - } - else if (thisChar == '+') - { - if (allowPlus) - { - tmpSb.Append(thisChar); - } - else - { - backString.Append(thisChar); - return(false); - } - } - else if (thisChar == '-') - { - tmpSb.Append(thisChar); - } - else if (settings.IsCharType(thisChar, CharTypeBits.Digit)) - { - // a digit, back this out so we can handle it in loop below - backString.Append(thisChar); - } - else - { - // not a number starter - backString.Append(thisChar); - return(false); - } + // first character can be -, maybe can be + depending on arg + thisChar = (char) GetNextChar(); + if (thisChar == Eof) + { + return (false); + } + else if (thisChar == '+') + { + if (allowPlus) + { + tmpSb.Append(thisChar); + } + else + { + backString.Append(thisChar); + return (false); + } + } + else if (thisChar == '-') + { + tmpSb.Append(thisChar); + } + else if (Settings.IsCharType(thisChar, CharTypeBits.Digit)) + { + // a digit, back this out so we can handle it in loop below + backString.Append(thisChar); + } + else + { + // not a number starter + backString.Append(thisChar); + return (false); + } - // rest of chars have to be digits - bool gotInt = false; - while(((thisChar = (char)GetNextChar()) != Eof) - && (settings.IsCharType(thisChar, CharTypeBits.Digit))) - { - gotInt = true; - tmpSb.Append(thisChar); - } + // rest of chars have to be digits + bool gotInt = false; + while (((thisChar = (char) GetNextChar()) != Eof) + && (Settings.IsCharType(thisChar, CharTypeBits.Digit))) + { + gotInt = true; + tmpSb.Append(thisChar); + } - if (gotInt) - { - sb.Append(tmpSb); - #if DEBUG + if (gotInt) + { + sb.Append(tmpSb); +#if DEBUG log.Debug("Grabbed int {0}, sb = {1}", tmpSb, sb); #endif - return(true); - } - else - { - // didn't get any chars after first - backString.Append(tmpSb); // put + or - back on - if (thisChar != Eof) backString.Append(thisChar); - return(false); - } - } + return (true); + } + else + { + // didn't get any chars after first + backString.Append(tmpSb); // put + or - back on + if (thisChar != Eof) + { + backString.Append(thisChar); + } + return (false); + } + } - #endregion + #endregion - // --------------------------------------------------------------------- - #region Tokenize wrapper methods - // --------------------------------------------------------------------- + // --------------------------------------------------------------------- - /// - /// Parse the rest of the stream and put all the tokens - /// in the input ArrayList. This resets the line number to 1. - /// - /// The ArrayList to append to. - /// bool - true for success - public bool Tokenize(ArrayList tokens) - { - Token token; - this.lineNumber = 1; + #region Tokenize wrapper methods - while (NextToken(out token)) - { - if (token == null) throw new NullReferenceException( - "StreamTokenizer: Tokenize: Got a null token from NextToken."); - tokens.Add(token); - } + // --------------------------------------------------------------------- - // add the last token returned (EOF) - tokens.Add(token); - return(true); - } + /// + /// Parse the rest of the stream and put all the tokens + /// in the input ArrayList. This resets the line number to 1. + /// + /// The ArrayList to append to. + /// bool - true for success + public bool Tokenize(ArrayList tokens) + { + Token token; + lineNumber = 1; - /// - /// Parse all tokens from the specified TextReader, put - /// them into the input ArrayList. - /// - /// The TextReader to read from. - /// The ArrayList to append to. - /// bool - true for success, false for failure. - public bool TokenizeReader(TextReader tr, ArrayList tokens) - { - textReader = tr; - return(Tokenize(tokens)); - } + while (NextToken(out token)) + { + if (token == null) + { + throw new NullReferenceException( + "StreamTokenizer: Tokenize: Got a null token from NextToken."); + } + tokens.Add(token); + } - /// - /// Parse all tokens from the specified file, put - /// them into the input ArrayList. - /// - /// The file to read. - /// The ArrayList to put tokens in. - /// bool - true for success, false for failure. - public bool TokenizeFile(string fileName, ArrayList tokens) - { - FileInfo fi = new FileInfo(fileName); - FileStream fr = null; - try - { - fr = fi.Open(FileMode.Open, FileAccess.Read, FileShare.None); - textReader = new StreamReader(fr); - } - catch (DirectoryNotFoundException) - { - } - try - { - if (!Tokenize(tokens)) - { - log.Error("Unable to parse tokens from file {0}", fileName); - textReader.Close(); - if (fr != null) fr.Close(); - return(false); - } - } - catch(StreamTokenizerUntermException e) - { - textReader.Close(); - if (fr != null) fr.Close(); - throw e; - } + // add the last token returned (EOF) + tokens.Add(token); + return (true); + } - if (textReader != null) textReader.Close(); - if (fr != null) fr.Close(); - return(true); - } + /// + /// Parse all tokens from the specified TextReader, put + /// them into the input ArrayList. + /// + /// The TextReader to read from. + /// The ArrayList to append to. + /// bool - true for success, false for failure. + public bool TokenizeReader(TextReader tr, ArrayList tokens) + { + TextReader = tr; + return (Tokenize(tokens)); + } - /// - /// Parse all tokens from the specified string, put - /// them into the input ArrayList. - /// - /// - /// The ArrayList to put tokens in. - /// bool - true for success, false for failure. - public bool TokenizeString(string str, ArrayList tokens) - { - textReader = new StringReader(str); - return(Tokenize(tokens)); - } + /// + /// Parse all tokens from the specified file, put + /// them into the input ArrayList. + /// + /// The file to read. + /// The ArrayList to put tokens in. + /// bool - true for success, false for failure. + public bool TokenizeFile(string fileName, ArrayList tokens) + { + FileInfo fi = new FileInfo(fileName); + FileStream fr = null; + try + { + fr = fi.Open(FileMode.Open, FileAccess.Read, FileShare.None); + TextReader = new StreamReader(fr); + } + catch (DirectoryNotFoundException) {} + try + { + if (!Tokenize(tokens)) + { + log.Error("Unable to parse tokens from file {0}", fileName); + TextReader.Close(); + if (fr != null) + { + fr.Close(); + } + return (false); + } + } + catch (StreamTokenizerUntermException e) + { + TextReader.Close(); + if (fr != null) + { + fr.Close(); + } + throw e; + } - /// - /// Parse all tokens from the specified Stream, put - /// them into the input ArrayList. - /// - /// - /// The ArrayList to put tokens in. - /// bool - true for success, false for failure. - public bool TokenizeStream(Stream s, ArrayList tokens) - { - textReader = new StreamReader(s); - return(Tokenize(tokens)); - } + if (TextReader != null) + { + TextReader.Close(); + } + if (fr != null) + { + fr.Close(); + } + return (true); + } - /// - /// Tokenize a file completely and return the tokens in a Token[]. - /// - /// The file to tokenize. - /// A Token[] with all tokens. - public Token[] TokenizeFile(string fileName) - { - ArrayList list = new ArrayList(); - if (!TokenizeFile(fileName, list)) - { - return(null); - } - else - { - if (list.Count > 0) - { - return((Token[])list.ToArray(typeof(Token))); - } - else return(null); - } - } - #endregion - } -} + /// + /// Parse all tokens from the specified string, put + /// them into the input ArrayList. + /// + /// + /// The ArrayList to put tokens in. + /// bool - true for success, false for failure. + public bool TokenizeString(string str, ArrayList tokens) + { + TextReader = new StringReader(str); + return (Tokenize(tokens)); + } + /// + /// Parse all tokens from the specified Stream, put + /// them into the input ArrayList. + /// + /// + /// The ArrayList to put tokens in. + /// bool - true for success, false for failure. + public bool TokenizeStream(Stream s, ArrayList tokens) + { + TextReader = new StreamReader(s); + return (Tokenize(tokens)); + } + /// + /// Tokenize a file completely and return the tokens in a Token[]. + /// + /// The file to tokenize. + /// A Token[] with all tokens. + public Token[] TokenizeFile(string fileName) + { + ArrayList list = new ArrayList(); + if (!TokenizeFile(fileName, list)) + { + return (null); + } + else + { + if (list.Count > 0) + { + return ((Token[]) list.ToArray(typeof(Token))); + } + else + { + return (null); + } + } + } + + #endregion + } +} \ No newline at end of file