Index: src/Common/NetTopologySuite/Utilities/RToolsUtil/StreamTokenizer.cs
===================================================================
diff -u -r8f6ae890fed8e8eae3a32f9c0498a10f82e0ddf9 -r5fc71a385897af92ccb092f2f969b5709afab85a
--- src/Common/NetTopologySuite/Utilities/RToolsUtil/StreamTokenizer.cs (.../StreamTokenizer.cs) (revision 8f6ae890fed8e8eae3a32f9c0498a10f82e0ddf9)
+++ src/Common/NetTopologySuite/Utilities/RToolsUtil/StreamTokenizer.cs (.../StreamTokenizer.cs) (revision 5fc71a385897af92ccb092f2f969b5709afab85a)
@@ -36,148 +36,229 @@
namespace RTools_NTS.Util
{
- // ---------------------------------------------------------------------
- #region Exceptions
- // ---------------------------------------------------------------------
+ // ---------------------------------------------------------------------
- ///
- /// Exception class for unterminated tokens.
- ///
- public class StreamTokenizerUntermException : Exception
- {
- ///
- /// Construct with a particular message.
- ///
- /// The message to store in this object.
- public StreamTokenizerUntermException(string msg) : base(msg) {}
- }
+ #region Exceptions
- ///
- /// Exception class for unterminated quotes.
- ///
- public class StreamTokenizerUntermQuoteException : StreamTokenizerUntermException
- {
- ///
- /// Construct with a particular message.
- ///
- /// The message to store in this object.
- public StreamTokenizerUntermQuoteException(string msg) : base(msg) {}
- }
+ // ---------------------------------------------------------------------
- ///
- /// Exception class for unterminated block comments.
- ///
- public class StreamTokenizerUntermCommentException : StreamTokenizerUntermException
- {
- ///
- /// Construct with a particular message.
- ///
- /// The message to store in this object.
- public StreamTokenizerUntermCommentException(string msg) : base(msg) {}
- }
+ ///
+ /// Exception class for unterminated tokens.
+ ///
+ public class StreamTokenizerUntermException : Exception
+ {
+ ///
+ /// Construct with a particular message.
+ ///
+ /// The message to store in this object.
+ public StreamTokenizerUntermException(string msg) : base(msg) {}
+ }
- #endregion
+ ///
+ /// Exception class for unterminated quotes.
+ ///
+ public class StreamTokenizerUntermQuoteException : StreamTokenizerUntermException
+ {
+ ///
+ /// Construct with a particular message.
+ ///
+ /// The message to store in this object.
+ public StreamTokenizerUntermQuoteException(string msg) : base(msg) {}
+ }
- // ---------------------------------------------------------------------
- #region Enumerations
- // ---------------------------------------------------------------------
+ ///
+ /// Exception class for unterminated block comments.
+ ///
+ public class StreamTokenizerUntermCommentException : StreamTokenizerUntermException
+ {
+ ///
+ /// Construct with a particular message.
+ ///
+ /// The message to store in this object.
+ public StreamTokenizerUntermCommentException(string msg) : base(msg) {}
+ }
- ///
- /// Bitwise enumeration for character types.
- ///
- [Flags]
- public enum CharTypeBits : byte
- {
- /// word characters (usually alpha, digits, and domain specific)
- Word = 1,
- /// # or something for line comments
- Comment = 2,
- /// whitespace
- Whitespace = 4,
- /// ' or " type
- Quote = 8,
- /// usually 0 to 9
- Digit = 16,
- /// usually 0 to 9, a-f and A-F
- HexDigit = 32,
- /// eof char
- Eof = 64
- }
+ #endregion
- #endregion
+ // ---------------------------------------------------------------------
- ///
- /// This contains the settings that control the behavior of the tokenizer.
- /// This is separated from the StreamTokenizer so that common settings
- /// are easy to package and keep together.
- ///
- [Serializable]
- public class StreamTokenizerSettings
- {
- // ---------------------------------------------------------------------
- #region Properties
- // ---------------------------------------------------------------------
+ #region Enumerations
- private byte[] charTypes;
- ///
- /// This is the character type table. Each byte is bitwise encoded
- /// with the character attributes, such as whether that character is
- /// word or whitespace.
- ///
- public byte[] CharTypes { get { return(charTypes); } }
+ // ---------------------------------------------------------------------
- bool grabWhitespace;
- ///
- /// Whether or not to return whitespace tokens. If not, they're ignored.
- ///
- public bool GrabWhitespace { get { return(grabWhitespace); } set { grabWhitespace = value; } }
+ ///
+ /// Bitwise enumeration for character types.
+ ///
+ [Flags]
+ public enum CharTypeBits : byte
+ {
+ /// word characters (usually alpha, digits, and domain specific)
+ Word = 1,
- bool grabEol;
- ///
- /// Whether or not to return EolTokens on end of line. Eol tokens will not
- /// break up other tokens which can be multi-line. For example block comments
- /// and quotes will not be broken by Eol tokens. Therefore the number of
- /// Eol tokens does not give you the line count of a stream.
- ///
- public bool GrabEol { get { return(grabEol); } set { grabEol = value; } }
+ /// # or something for line comments
+ Comment = 2,
- bool slashSlashComments;
- ///
- /// Whether or not to look for // comments
- ///
- public bool SlashSlashComments { get { return(slashSlashComments); } set { slashSlashComments = value; } }
+ /// whitespace
+ Whitespace = 4,
- bool slashStarComments;
- ///
- /// Whether or not to look for /* */ block comments.
- ///
- public bool SlashStarComments { get { return(slashStarComments); } set { slashStarComments = value; } }
+ /// ' or " type
+ Quote = 8,
- bool grabComments;
- ///
- /// Whether or not to return comments.
- ///
- public bool GrabComments { get { return(grabComments); } set { grabComments = value; } }
+ /// usually 0 to 9
+ Digit = 16,
- bool doUntermCheck;
- ///
- /// Whether or not to check for unterminated quotes and block comments.
- /// If true, and one is encoutered, an exception is thrown of the appropriate type.
- ///
- public bool DoUntermCheck { get { return(doUntermCheck); } set { doUntermCheck = value; } }
+ /// usually 0 to 9, a-f and A-F
+ HexDigit = 32,
- bool parseNumbers;
- ///
- /// Whether or not digits are specified as Digit type in the
- /// character table.
- /// This setting is based on the character types table, so this
- /// setting interacts with character type table manipulation.
- /// This setting may become incorrect if you modify the character
- /// types table directly.
- ///
- public bool ParseNumbers
- {
- get { return(parseNumbers); }
+ /// eof char
+ Eof = 64
+ }
+
+ #endregion
+
+ ///
+ /// This contains the settings that control the behavior of the tokenizer.
+ /// This is separated from the StreamTokenizer so that common settings
+ /// are easy to package and keep together.
+ ///
+ [Serializable]
+ public class StreamTokenizerSettings
+ {
+ // ---------------------------------------------------------------------
+
+ #region Properties
+
+ // ---------------------------------------------------------------------
+
+ ///
+ /// This is the character type table. Each byte is bitwise encoded
+ /// with the character attributes, such as whether that character is
+ /// word or whitespace.
+ ///
+ public byte[] CharTypes { get; private set; }
+
+ private bool grabWhitespace;
+
+ ///
+ /// Whether or not to return whitespace tokens. If not, they're ignored.
+ ///
+ public bool GrabWhitespace
+ {
+ get
+ {
+ return (grabWhitespace);
+ }
+ set
+ {
+ grabWhitespace = value;
+ }
+ }
+
+ private bool grabEol;
+
+ ///
+ /// Whether or not to return EolTokens on end of line. Eol tokens will not
+ /// break up other tokens which can be multi-line. For example block comments
+ /// and quotes will not be broken by Eol tokens. Therefore the number of
+ /// Eol tokens does not give you the line count of a stream.
+ ///
+ public bool GrabEol
+ {
+ get
+ {
+ return (grabEol);
+ }
+ set
+ {
+ grabEol = value;
+ }
+ }
+
+ private bool slashSlashComments;
+
+ ///
+ /// Whether or not to look for // comments
+ ///
+ public bool SlashSlashComments
+ {
+ get
+ {
+ return (slashSlashComments);
+ }
+ set
+ {
+ slashSlashComments = value;
+ }
+ }
+
+ private bool slashStarComments;
+
+ ///
+ /// Whether or not to look for /* */ block comments.
+ ///
+ public bool SlashStarComments
+ {
+ get
+ {
+ return (slashStarComments);
+ }
+ set
+ {
+ slashStarComments = value;
+ }
+ }
+
+ private bool grabComments;
+
+ ///
+ /// Whether or not to return comments.
+ ///
+ public bool GrabComments
+ {
+ get
+ {
+ return (grabComments);
+ }
+ set
+ {
+ grabComments = value;
+ }
+ }
+
+ private bool doUntermCheck;
+
+ ///
+ /// Whether or not to check for unterminated quotes and block comments.
+ /// If true, and one is encoutered, an exception is thrown of the appropriate type.
+ ///
+ public bool DoUntermCheck
+ {
+ get
+ {
+ return (doUntermCheck);
+ }
+ set
+ {
+ doUntermCheck = value;
+ }
+ }
+
+ private bool parseNumbers;
+
+ ///
+ /// Whether or not digits are specified as Digit type in the
+ /// character table.
+ /// This setting is based on the character types table, so this
+ /// setting interacts with character type table manipulation.
+ /// This setting may become incorrect if you modify the character
+ /// types table directly.
+ ///
+ public bool ParseNumbers
+ {
+ get
+ {
+ return (parseNumbers);
+ }
/* dropped for speed, this means this property isn't accurate if
* character types table is modified directly.
* {
@@ -192,38 +273,40 @@
return(true);
}
*/
- set
- {
- if (value)
- {
- for (int i = '0'; i <= '9'; i++)
- charTypes[i] |= (byte)CharTypeBits.Digit;
- }
- else
- {
- byte digit = (byte)CharTypeBits.Digit;
+ set
+ {
+ if (value)
+ {
+ for (int i = '0'; i <= '9'; i++)
+ {
+ CharTypes[i] |= (byte) CharTypeBits.Digit;
+ }
+ }
+ else
+ {
+ byte digit = (byte) CharTypeBits.Digit;
- for (int i = '0'; i <= '9'; i++)
- {
- charTypes[i] &= (byte)(~digit); // not digit
- }
- }
- parseNumbers = value;
- }
- }
+ for (int i = '0'; i <= '9'; i++)
+ {
+ CharTypes[i] &= (byte) (~digit); // not digit
+ }
+ }
+ parseNumbers = value;
+ }
+ }
- bool parseHexNumbers;
+ private bool parseHexNumbers;
- ///
- /// Whether or not to parse Hex (0xABCD...) numbers.
- /// This setting is based on the character types table, so this
- /// setting interacts with character type table manipulation.
- ///
- public bool ParseHexNumbers
- {
- get
- {
- return(parseHexNumbers);
+ ///
+ /// Whether or not to parse Hex (0xABCD...) numbers.
+ /// This setting is based on the character types table, so this
+ /// setting interacts with character type table manipulation.
+ ///
+ public bool ParseHexNumbers
+ {
+ get
+ {
+ return (parseHexNumbers);
// for (int i = 'A'; i <= 'F'; i++)
// {
// if (!IsCharType((char)i, CharTypeBits.Digit))
@@ -241,1357 +324,1494 @@
// if (!IsCharType('x', CharTypeBits.Digit)) return(false);
//
// return(true);
- }
- set
- {
- parseHexNumbers = value;
- if (parseHexNumbers)
- {
- for (int i = '0'; i <= '9'; i++)
- charTypes[i] |= (byte)CharTypeBits.HexDigit;
- for (int i = 'A'; i <= 'F'; i++)
- charTypes[i] |= (byte)CharTypeBits.HexDigit;
- for (int i = 'a'; i <= 'f'; i++)
- charTypes[i] |= (byte)CharTypeBits.HexDigit;
- charTypes[(int)'x'] |= (byte)CharTypeBits.HexDigit;
- }
- else
- {
- byte digit = (byte)CharTypeBits.HexDigit;
+ }
+ set
+ {
+ parseHexNumbers = value;
+ if (parseHexNumbers)
+ {
+ for (int i = '0'; i <= '9'; i++)
+ {
+ CharTypes[i] |= (byte) CharTypeBits.HexDigit;
+ }
+ for (int i = 'A'; i <= 'F'; i++)
+ {
+ CharTypes[i] |= (byte) CharTypeBits.HexDigit;
+ }
+ for (int i = 'a'; i <= 'f'; i++)
+ {
+ CharTypes[i] |= (byte) CharTypeBits.HexDigit;
+ }
+ CharTypes[(int) 'x'] |= (byte) CharTypeBits.HexDigit;
+ }
+ else
+ {
+ byte digit = (byte) CharTypeBits.HexDigit;
- for (int i = 'A'; i <= 'F'; i++)
- {
- charTypes[i] &= (byte)(~digit); // not digit
- }
- for (int i = 'a'; i <= 'f'; i++)
- {
- charTypes[i] &= (byte)(~digit); // not digit
- }
- charTypes[(int)'x'] &= (byte)(~digit);
- }
- }
- }
+ for (int i = 'A'; i <= 'F'; i++)
+ {
+ CharTypes[i] &= (byte) (~digit); // not digit
+ }
+ for (int i = 'a'; i <= 'f'; i++)
+ {
+ CharTypes[i] &= (byte) (~digit); // not digit
+ }
+ CharTypes[(int) 'x'] &= (byte) (~digit);
+ }
+ }
+ }
- #endregion
+ #endregion
- // ---------------------------------------------------------------------
- #region Constructors/Destructor
- // ---------------------------------------------------------------------
+ // ---------------------------------------------------------------------
- ///
- /// Default constructor.
- ///
- public StreamTokenizerSettings()
- {
- charTypes = new byte[StreamTokenizer.NChars + 1]; // plus an EOF entry
- SetDefaults();
- }
+ #region Constructors/Destructor
- ///
- /// Copy constructor.
- ///
- public StreamTokenizerSettings(StreamTokenizerSettings other)
- {
- Copy(other);
- }
+ // ---------------------------------------------------------------------
- ///
- /// Sets this object to be the same as the specified object.
- /// Note that some settings which are entirely embodied by the character
- /// type table.
- ///
- public void Copy(StreamTokenizerSettings other)
- {
- charTypes = new byte[StreamTokenizer.NChars + 1]; // plus an EOF entry
- Array.Copy(other.charTypes, 0, charTypes, 0, charTypes.Length);
-
- grabWhitespace = other.grabWhitespace;
- grabEol = other.grabEol;
- slashSlashComments = other.slashSlashComments;
- slashStarComments = other.slashStarComments;
- grabComments = other.grabComments;
- doUntermCheck = other.doUntermCheck;
+ ///
+ /// Default constructor.
+ ///
+ public StreamTokenizerSettings()
+ {
+ CharTypes = new byte[StreamTokenizer.NChars + 1]; // plus an EOF entry
+ SetDefaults();
+ }
- parseHexNumbers = other.parseHexNumbers;
- }
+ ///
+ /// Copy constructor.
+ ///
+ public StreamTokenizerSettings(StreamTokenizerSettings other)
+ {
+ Copy(other);
+ }
- #endregion
+ ///
+ /// Sets this object to be the same as the specified object.
+ /// Note that some settings which are entirely embodied by the character
+ /// type table.
+ ///
+ public void Copy(StreamTokenizerSettings other)
+ {
+ CharTypes = new byte[StreamTokenizer.NChars + 1]; // plus an EOF entry
+ Array.Copy(other.CharTypes, 0, CharTypes, 0, CharTypes.Length);
- // ---------------------------------------------------------------------
- #region main Setup
- // ---------------------------------------------------------------------
+ grabWhitespace = other.grabWhitespace;
+ grabEol = other.grabEol;
+ slashSlashComments = other.slashSlashComments;
+ slashStarComments = other.slashStarComments;
+ grabComments = other.grabComments;
+ doUntermCheck = other.doUntermCheck;
- ///
- /// Setup default parse behavior.
- /// This resets to same behavior as on construction.
- ///
- /// bool - true for success.
- public bool SetDefaults()
- {
- slashStarComments = false;
- grabComments = false;
- slashSlashComments = false;
- grabWhitespace = false;
- doUntermCheck = true;
- grabEol = false;
+ parseHexNumbers = other.parseHexNumbers;
+ }
- // setup table
- ResetCharTypeTable();
- ParseNumbers = true;
- ParseHexNumbers = true;
- WordChars('A', 'Z');
- WordChars('a', 'z');
- WhitespaceChars(0, ' ');
- QuoteChar('\'');
- QuoteChar('"');
- WordChars('0', '9');
+ #endregion
- return(true);
- }
+ // ---------------------------------------------------------------------
- ///
- /// Apply settings which are commonly used for code parsing
- /// C-endCapStyle code, including C++, C#, and Java.
- ///
- ///
- public bool SetupForCodeParse()
- {
- GrabWhitespace = true;
- GrabComments = true;
- SlashSlashComments = true;
- DoUntermCheck = true;
- SlashStarComments = true;
- WordChar('_');
- ParseNumbers = true;
- ParseHexNumbers = true;
- return(true);
- }
+ #region main Setup
- #endregion
+ // ---------------------------------------------------------------------
- // ---------------------------------------------------------------------
- #region Character Table Setup
- // ---------------------------------------------------------------------
+ ///
+ /// Setup default parse behavior.
+ /// This resets to same behavior as on construction.
+ ///
+ /// bool - true for success.
+ public bool SetDefaults()
+ {
+ slashStarComments = false;
+ grabComments = false;
+ slashSlashComments = false;
+ grabWhitespace = false;
+ doUntermCheck = true;
+ grabEol = false;
- ///
- /// Clear the character type settings. This leaves them unset,
- /// as opposed to the default. Use SetDefaults() for default
- /// settings.
- ///
- public void ResetCharTypeTable()
- {
- Array.Clear(charTypes, 0, charTypes.Length);
- charTypes[StreamTokenizer.NChars] = (byte)CharTypeBits.Eof; // last entry for Eof
- }
+ // setup table
+ ResetCharTypeTable();
+ ParseNumbers = true;
+ ParseHexNumbers = true;
+ WordChars('A', 'Z');
+ WordChars('a', 'z');
+ WhitespaceChars(0, ' ');
+ QuoteChar('\'');
+ QuoteChar('"');
+ WordChars('0', '9');
- ///
- /// Specify that a particular character is a word character.
- /// Character table type manipulation method.
- /// This adds the type to the char(s), rather
- /// than overwriting other types.
- ///
- /// The character.
- public void WordChar(int c)
- {
- charTypes[c] |= (byte)CharTypeBits.Word;
- }
+ return (true);
+ }
- ///
- /// Specify that a range of characters are word characters.
- /// Character table type manipulation method.
- /// This adds the type to the char(s), rather
- /// than overwriting other types.
- ///
- /// First character.
- /// Last character.
- public void WordChars(int startChar, int endChar)
- {
- for (int i = startChar; i <= endChar; i++)
- {
- charTypes[i] |= (byte)CharTypeBits.Word;
- }
- }
+ ///
+ /// Apply settings which are commonly used for code parsing
+ /// C-endCapStyle code, including C++, C#, and Java.
+ ///
+ ///
+ public bool SetupForCodeParse()
+ {
+ GrabWhitespace = true;
+ GrabComments = true;
+ SlashSlashComments = true;
+ DoUntermCheck = true;
+ SlashStarComments = true;
+ WordChar('_');
+ ParseNumbers = true;
+ ParseHexNumbers = true;
+ return (true);
+ }
- ///
- /// Specify that a string of characters are word characters.
- /// Character table type manipulation method.
- /// This adds the type to the char(s), rather
- /// than overwriting other types.
- ///
- ///
- public void WordChars(string s)
- {
- for (int i = 0; i < s.Length; i++)
- charTypes[s[i]] |= (byte)CharTypeBits.Word;
- }
+ #endregion
- ///
- /// Specify that a character is a whitespace character.
- /// Character table type manipulation method.
- /// This type is exclusive with other types.
- ///
- /// The character.
- public void WhitespaceChar(int c)
- {
- charTypes[c] = (byte)CharTypeBits.Whitespace;
- }
+ // ---------------------------------------------------------------------
- ///
- /// Specify that a range of characters are whitespace characters.
- /// Character table type manipulation method.
- /// This adds the characteristic to the char(s), rather
- /// than overwriting other characteristics.
- ///
- /// First character.
- /// Last character.
- public void WhitespaceChars(int startChar, int endChar)
- {
- for (int i = startChar; i <= endChar; i++)
- charTypes[i] = (byte)CharTypeBits.Whitespace;
- }
+ #region Character Table Setup
- ///
- /// Remove other type settings from a range of characters.
- /// Character table type manipulation method.
- ///
- ///
- ///
- public void OrdinaryChars(int startChar, int endChar)
- {
- for (int i = startChar; i <= endChar; i++)
- charTypes[i] = 0;
- }
+ // ---------------------------------------------------------------------
- ///
- /// Remove other type settings from a character.
- /// Character table type manipulation method.
- ///
- ///
- public void OrdinaryChar(int c)
- {
- charTypes[c] = 0;
- }
+ ///
+ /// Clear the character type settings. This leaves them unset,
+ /// as opposed to the default. Use SetDefaults() for default
+ /// settings.
+ ///
+ public void ResetCharTypeTable()
+ {
+ Array.Clear(CharTypes, 0, CharTypes.Length);
+ CharTypes[StreamTokenizer.NChars] = (byte) CharTypeBits.Eof; // last entry for Eof
+ }
- ///
- /// Specify that a particular character is a comment-starting character.
- /// Character table type manipulation method.
- ///
- ///
- public void CommentChar(int c)
- {
- charTypes[c] = (byte)CharTypeBits.Comment;
- }
+ ///
+ /// Specify that a particular character is a word character.
+ /// Character table type manipulation method.
+ /// This adds the type to the char(s), rather
+ /// than overwriting other types.
+ ///
+ /// The character.
+ public void WordChar(int c)
+ {
+ CharTypes[c] |= (byte) CharTypeBits.Word;
+ }
- ///
- /// Specify that a particular character is a quote character.
- /// Character table type manipulation method.
- ///
- ///
- public void QuoteChar(int c)
- {
- charTypes[c] = (byte)CharTypeBits.Quote;
- }
+ ///
+ /// Specify that a range of characters are word characters.
+ /// Character table type manipulation method.
+ /// This adds the type to the char(s), rather
+ /// than overwriting other types.
+ ///
+ /// First character.
+ /// Last character.
+ public void WordChars(int startChar, int endChar)
+ {
+ for (int i = startChar; i <= endChar; i++)
+ {
+ CharTypes[i] |= (byte) CharTypeBits.Word;
+ }
+ }
- #endregion
+ ///
+ /// Specify that a string of characters are word characters.
+ /// Character table type manipulation method.
+ /// This adds the type to the char(s), rather
+ /// than overwriting other types.
+ ///
+ ///
+ public void WordChars(string s)
+ {
+ for (int i = 0; i < s.Length; i++)
+ {
+ CharTypes[s[i]] |= (byte) CharTypeBits.Word;
+ }
+ }
- // ---------------------------------------------------------------------
- #region Utility Methods
- // ---------------------------------------------------------------------
+ ///
+ /// Specify that a character is a whitespace character.
+ /// Character table type manipulation method.
+ /// This type is exclusive with other types.
+ ///
+ /// The character.
+ public void WhitespaceChar(int c)
+ {
+ CharTypes[c] = (byte) CharTypeBits.Whitespace;
+ }
- ///
- /// Return a string representation of a character type setting.
- /// Since the type setting is bitwise encoded, a character
- /// can have more than one type.
- ///
- /// The character type byte.
- /// The string representation of the type flags.
- public string CharTypeToString(byte ctype)
- {
- StringBuilder str = new StringBuilder();
+ ///
+ /// Specify that a range of characters are whitespace characters.
+ /// Character table type manipulation method.
+ /// This adds the characteristic to the char(s), rather
+ /// than overwriting other characteristics.
+ ///
+ /// First character.
+ /// Last character.
+ public void WhitespaceChars(int startChar, int endChar)
+ {
+ for (int i = startChar; i <= endChar; i++)
+ {
+ CharTypes[i] = (byte) CharTypeBits.Whitespace;
+ }
+ }
- if (IsCharType(ctype, CharTypeBits.Quote)) str.Append('q');
- if (IsCharType(ctype, CharTypeBits.Comment)) str.Append('m');
- if (IsCharType(ctype, CharTypeBits.Whitespace)) str.Append('w');
- if (IsCharType(ctype, CharTypeBits.Digit)) str.Append('d');
- if (IsCharType(ctype, CharTypeBits.Word)) str.Append('a');
- if (IsCharType(ctype, CharTypeBits.Eof)) str.Append('e');
- if (str.Length == 0)
- {
- str.Append('c');
- }
- return(str.ToString());
- }
+ ///
+ /// Remove other type settings from a range of characters.
+ /// Character table type manipulation method.
+ ///
+ ///
+ ///
+ public void OrdinaryChars(int startChar, int endChar)
+ {
+ for (int i = startChar; i <= endChar; i++)
+ {
+ CharTypes[i] = 0;
+ }
+ }
- ///
- /// Check whether the specified char type byte has a
- /// particular type flag set.
- ///
- /// The char type byte.
- /// The CharTypeBits entry to compare to.
- /// bool - true or false
- public bool IsCharType(byte ctype, CharTypeBits type)
- {
- return((ctype & (byte)type) != 0);
- }
+ ///
+ /// Remove other type settings from a character.
+ /// Character table type manipulation method.
+ ///
+ ///
+ public void OrdinaryChar(int c)
+ {
+ CharTypes[c] = 0;
+ }
- ///
- /// Check whether the specified char has a
- /// particular type flag set.
- ///
- /// The character.
- /// The CharTypeBits entry to compare to.
- /// bool - true or false
- public bool IsCharType(char c, CharTypeBits type)
- {
- return((charTypes[c] & (byte)type) != 0);
- }
+ ///
+ /// Specify that a particular character is a comment-starting character.
+ /// Character table type manipulation method.
+ ///
+ ///
+ public void CommentChar(int c)
+ {
+ CharTypes[c] = (byte) CharTypeBits.Comment;
+ }
- ///
- /// Check whether the specified char has a
- /// particular type flag set.
- ///
- /// The character.
- /// The CharTypeBits entry to compare to.
- /// bool - true or false
- public bool IsCharType(int c, CharTypeBits type)
- {
- return((charTypes[c] & (byte)type) != 0);
- }
+ ///
+ /// Specify that a particular character is a quote character.
+ /// Character table type manipulation method.
+ ///
+ ///
+ public void QuoteChar(int c)
+ {
+ CharTypes[c] = (byte) CharTypeBits.Quote;
+ }
- #endregion
+ #endregion
- // ---------------------------------------------------------------------
- #region Standard Methods
- // ---------------------------------------------------------------------
+ // ---------------------------------------------------------------------
- ///
- /// Display the state of this object.
- ///
- public void Display()
- {
- Display(String.Empty);
- }
+ #region Utility Methods
- ///
- /// Display the state of this object, with a per-line prefix.
- ///
- /// The pre-line prefix.
- public void Display(string prefix)
- {
- }
- #endregion
- }
+ // ---------------------------------------------------------------------
- ///
- /// A StreamTokenizer similar to Java's. This breaks an input stream
- /// (coming from a TextReader) into Tokens based on various settings. The settings
- /// are stored in the TokenizerSettings property, which is a
- /// StreamTokenizerSettings instance.
- ///
- ///
- ///
- /// This is configurable in that you can modify TokenizerSettings.CharTypes[] array
- /// to specify which characters are which type, along with other settings
- /// such as whether to look for comments or not.
- ///
- ///
- /// WARNING: This is not internationalized. This treats all characters beyond
- /// the 7-bit ASCII range (decimal 127) as Word characters.
- ///
- ///
- /// There are two main ways to use this: 1) Parse the entire stream at
- /// once and get an ArrayList of Tokens (see the Tokenize* methods),
- /// and 2) call NextToken() successively.
- /// This reads from a TextReader, which you can set directly, and this
- /// also provides some convenient methods to parse files and strings.
- /// This returns an Eof token if the end of the input is reached.
- ///
- ///
- /// Here's an example of the NextToken() endCapStyle of use:
- ///
- /// StreamTokenizer tokenizer = new StreamTokenizer();
- /// tokenizer.GrabWhitespace = true;
- /// tokenizer.Verbosity = VerbosityLevel.Debug; // just for debugging
- /// tokenizer.TextReader = File.OpenText(fileName);
- /// Token token;
- /// while (tokenizer.NextToken(out token)) log.Info("Token = '{0}'", token);
- ///
- ///
- ///
- /// Here's an example of the Tokenize... endCapStyle of use:
- ///
- /// StreamTokenizer tokenizer = new StreamTokenizer("some string");
- /// ArrayList tokens = new ArrayList();
- /// if (!tokenizer.Tokenize(tokens))
- /// {
- /// // error handling
- /// }
- /// foreach (Token t in tokens) Console.WriteLine("t = {0}", t);
- ///
- ///
- ///
- /// Comment delimiters are hardcoded (// and /*), not affected by char type table.
- ///
- ///
- /// This sets line numbers in the tokens it produces. These numbers are normally
- /// the line on which the token starts.
- /// There is one known caveat, and that is that when GrabWhitespace setting
- /// is true, and a whitespace token contains a newline, that token's line number
- /// will be set to the following line rather than the line on which the token
- /// started.
- ///
- ///
- public class StreamTokenizer
- {
- // ----------------------------------------------------------------
- #region Constants
- // ----------------------------------------------------------------
+ ///
+ /// Return a string representation of a character type setting.
+ /// Since the type setting is bitwise encoded, a character
+ /// can have more than one type.
+ ///
+ /// The character type byte.
+ /// The string representation of the type flags.
+ public string CharTypeToString(byte ctype)
+ {
+ StringBuilder str = new StringBuilder();
- ///
- /// This is the number of characters in the character table.
- ///
- public static readonly int NChars = 128;
- private static readonly int Eof = NChars;
- #endregion
+ if (IsCharType(ctype, CharTypeBits.Quote))
+ {
+ str.Append('q');
+ }
+ if (IsCharType(ctype, CharTypeBits.Comment))
+ {
+ str.Append('m');
+ }
+ if (IsCharType(ctype, CharTypeBits.Whitespace))
+ {
+ str.Append('w');
+ }
+ if (IsCharType(ctype, CharTypeBits.Digit))
+ {
+ str.Append('d');
+ }
+ if (IsCharType(ctype, CharTypeBits.Word))
+ {
+ str.Append('a');
+ }
+ if (IsCharType(ctype, CharTypeBits.Eof))
+ {
+ str.Append('e');
+ }
+ if (str.Length == 0)
+ {
+ str.Append('c');
+ }
+ return (str.ToString());
+ }
- // ----------------------------------------------------------------
- #region Private Fields
- // ----------------------------------------------------------------
+ ///
+ /// Check whether the specified char type byte has a
+ /// particular type flag set.
+ ///
+ /// The char type byte.
+ /// The CharTypeBits entry to compare to.
+ /// bool - true or false
+ public bool IsCharType(byte ctype, CharTypeBits type)
+ {
+ return ((ctype & (byte) type) != 0);
+ }
- // A class for verbosity/message handling
- private Logger log;
+ ///
+ /// Check whether the specified char has a
+ /// particular type flag set.
+ ///
+ /// The character.
+ /// The CharTypeBits entry to compare to.
+ /// bool - true or false
+ public bool IsCharType(char c, CharTypeBits type)
+ {
+ return ((CharTypes[c] & (byte) type) != 0);
+ }
- // The TextReader we're reading from
- private TextReader textReader;
+ ///
+ /// Check whether the specified char has a
+ /// particular type flag set.
+ ///
+ /// The character.
+ /// The CharTypeBits entry to compare to.
+ /// bool - true or false
+ public bool IsCharType(int c, CharTypeBits type)
+ {
+ return ((CharTypes[c] & (byte) type) != 0);
+ }
- // buffered wrap of reader
- //private BufferedTextReader bufferedReader; // was slower
+ #endregion
- // keep track of current line number during parse
- private int lineNumber;
+ // ---------------------------------------------------------------------
- // used to back up in the stream
- private CharBuffer backString;
+ #region Standard Methods
- // used to collect characters of the current (next to be
- // emitted) token
- private CharBuffer nextTokenSb;
+ // ---------------------------------------------------------------------
- // for speed, construct these once and re-use
- private CharBuffer tmpSb;
- private CharBuffer expSb;
+ ///
+ /// Display the state of this object.
+ ///
+ public void Display()
+ {
+ Display(String.Empty);
+ }
- #endregion
+ ///
+ /// Display the state of this object, with a per-line prefix.
+ ///
+ /// The pre-line prefix.
+ public void Display(string prefix) {}
- // ----------------------------------------------------------------------
- #region Properties
- // ----------------------------------------------------------------------
+ #endregion
+ }
- ///
- /// This is the TextReader that this object will read from.
- /// Set this to set the input reader for the parse.
- ///
- public TextReader TextReader
- {
- get { return(textReader); }
- set { textReader = value; }
- }
+ ///
+ /// A StreamTokenizer similar to Java's. This breaks an input stream
+ /// (coming from a TextReader) into Tokens based on various settings. The settings
+ /// are stored in the TokenizerSettings property, which is a
+ /// StreamTokenizerSettings instance.
+ ///
+ ///
+ ///
+ /// This is configurable in that you can modify TokenizerSettings.CharTypes[] array
+ /// to specify which characters are which type, along with other settings
+ /// such as whether to look for comments or not.
+ ///
+ ///
+ /// WARNING: This is not internationalized. This treats all characters beyond
+ /// the 7-bit ASCII range (decimal 127) as Word characters.
+ ///
+ ///
+ /// There are two main ways to use this: 1) Parse the entire stream at
+ /// once and get an ArrayList of Tokens (see the Tokenize* methods),
+ /// and 2) call NextToken() successively.
+ /// This reads from a TextReader, which you can set directly, and this
+ /// also provides some convenient methods to parse files and strings.
+ /// This returns an Eof token if the end of the input is reached.
+ ///
+ ///
+ /// Here's an example of the NextToken() endCapStyle of use:
+ ///
+ /// StreamTokenizer tokenizer = new StreamTokenizer();
+ /// tokenizer.GrabWhitespace = true;
+ /// tokenizer.Verbosity = VerbosityLevel.Debug; // just for debugging
+ /// tokenizer.TextReader = File.OpenText(fileName);
+ /// Token token;
+ /// while (tokenizer.NextToken(out token)) log.Info("Token = '{0}'", token);
+ ///
+ ///
+ ///
+ /// Here's an example of the Tokenize... endCapStyle of use:
+ ///
+ /// StreamTokenizer tokenizer = new StreamTokenizer("some string");
+ /// ArrayList tokens = new ArrayList();
+ /// if (!tokenizer.Tokenize(tokens))
+ /// {
+ /// // error handling
+ /// }
+ /// foreach (Token t in tokens) Console.WriteLine("t = {0}", t);
+ ///
+ ///
+ ///
+ /// Comment delimiters are hardcoded (// and /*), not affected by char type table.
+ ///
+ ///
+ /// This sets line numbers in the tokens it produces. These numbers are normally
+ /// the line on which the token starts.
+ /// There is one known caveat, and that is that when GrabWhitespace setting
+ /// is true, and a whitespace token contains a newline, that token's line number
+ /// will be set to the following line rather than the line on which the token
+ /// started.
+ ///
+ ///
+ public class StreamTokenizer
+ {
+ // ----------------------------------------------------------------
- private StreamTokenizerSettings settings;
- ///
- /// The settings which govern the behavior of the tokenization.
- ///
- public StreamTokenizerSettings Settings { get { return(settings); } }
+ #region Constants
- ///
- /// The verbosity level for this object's Logger.
- ///
- public VerbosityLevel Verbosity
- {
- get { return(log.Verbosity); }
- set { log.Verbosity = value; }
- }
+ // ----------------------------------------------------------------
- #endregion
+ ///
+ /// This is the number of characters in the character table.
+ ///
+ public static readonly int NChars = 128;
- // ---------------------------------------------------------------------
- #region Constructors/Destructor
- // ---------------------------------------------------------------------
+ private static readonly int Eof = NChars;
- ///
- /// Default constructor.
- ///
- public StreamTokenizer()
- {
- Initialize();
- }
+ #endregion
- ///
- /// Construct and set this object's TextReader to the one specified.
- ///
- /// The TextReader to read from.
- public StreamTokenizer(TextReader sr)
- {
- Initialize();
- textReader = sr;
- }
+ // ----------------------------------------------------------------
- ///
- /// Construct and set a string to tokenize.
- ///
- /// The string to tokenize.
- public StreamTokenizer(string str)
- {
- Initialize();
- textReader = new StringReader(str);
- }
+ #region Private Fields
- ///
- /// Utility function, things common to constructors.
- ///
- void Initialize()
- {
- log = new Logger("StreamTokenizer");
- log.Verbosity = VerbosityLevel.Warn;
- backString = new CharBuffer(32);
- nextTokenSb = new CharBuffer(1024);
+ // ----------------------------------------------------------------
- InitializeStream();
- settings = new StreamTokenizerSettings();
- settings.SetDefaults();
+ // A class for verbosity/message handling
+ private Logger log;
- expSb = new CharBuffer();
- tmpSb = new CharBuffer();
- }
+ // The TextReader we're reading from
- ///
- /// Clear the stream settings.
- ///
- void InitializeStream()
- {
- lineNumber = 1; // base 1 line numbers
- textReader = null;
- }
+ // buffered wrap of reader
+ //private BufferedTextReader bufferedReader; // was slower
- #endregion
+ // keep track of current line number during parse
+ private int lineNumber;
- // ---------------------------------------------------------------------
- #region Standard Methods
- // ---------------------------------------------------------------------
+ // used to back up in the stream
+ private CharBuffer backString;
- ///
- /// Display the state of this object.
- ///
- public void Display()
- {
- Display(String.Empty);
- }
+ // used to collect characters of the current (next to be
+ // emitted) token
+ private CharBuffer nextTokenSb;
- ///
- /// Display the state of this object, with a per-line prefix.
- ///
- /// The pre-line prefix.
- public void Display(string prefix)
- {
- log.WriteLine(prefix + "StreamTokenizer display:");
- log.WriteLine(prefix + " textReader: {0}", (textReader == null ? "null" : "non-null"));
- log.WriteLine(prefix + " backString: {0}", backString);
+ // for speed, construct these once and re-use
+ private CharBuffer tmpSb;
+ private CharBuffer expSb;
- if (settings != null) settings.Display(prefix + " ");
- }
+ #endregion
- #endregion
+ // ----------------------------------------------------------------------
- // ---------------------------------------------------------------------
- #region NextToken (the state machine)
- // ---------------------------------------------------------------------
+ #region Properties
- ///
- /// The states of the state machine.
- ///
- private enum NextTokenState
- {
- Start,
- Whitespace,
- Word,
- Quote,
- EndQuote,
- MaybeNumber, // could be number or word
- MaybeComment, // after first slash, might be comment or not
- MaybeHex, // after 0, may be hex
- HexGot0x, // after 0x, may be hex
- HexNumber,
- LineComment,
- BlockComment,
- EndBlockComment,
- Char,
- Eol,
- Eof,
- Invalid
- }
+ // ----------------------------------------------------------------------
- ///
- /// Pick the next state given just a single character. This is used
- /// at the start of a new token.
- ///
- /// The type of the character.
- /// The character.
- /// The state.
- private NextTokenState PickNextState(byte ctype, int c)
- {
- return(PickNextState(ctype, c, NextTokenState.Start));
- }
+ ///
+ /// This is the TextReader that this object will read from.
+ /// Set this to set the input reader for the parse.
+ ///
+ public TextReader TextReader { get; set; }
- ///
- /// Pick the next state given just a single character. This is used
- /// at the start of a new token.
- ///
- /// The type of the character.
- /// The character.
- /// Exclude this state from the possible next state.
- /// The state.
- private NextTokenState PickNextState(byte ctype, int c, NextTokenState excludeState)
- {
- if (c == '/')
- {
- return(NextTokenState.MaybeComment); // overrides all other cats
- }
- else if ((excludeState != NextTokenState.MaybeHex)
- && settings.ParseHexNumbers && (c == '0'))
- {
- return(NextTokenState.MaybeHex);
- }
- else if ((excludeState != NextTokenState.MaybeNumber) && settings.ParseNumbers
- && (settings.IsCharType(ctype, CharTypeBits.Digit) || (c == '-') || (c == '.')))
- {
- return(NextTokenState.MaybeNumber);
- }
- else if (settings.IsCharType(ctype, CharTypeBits.Word)) return(NextTokenState.Word);
- else if (settings.GrabEol && (c == 10)) return(NextTokenState.Eol);
- else if (settings.IsCharType(ctype, CharTypeBits.Whitespace)) return(NextTokenState.Whitespace);
- else if (settings.IsCharType(ctype, CharTypeBits.Comment)) return(NextTokenState.LineComment);
- else if (settings.IsCharType(ctype, CharTypeBits.Quote)) return(NextTokenState.Quote);
- else if ((c == Eof) || (settings.IsCharType(ctype, CharTypeBits.Eof))) return(NextTokenState.Eof);
- return(NextTokenState.Char);
- }
+ ///
+ /// The settings which govern the behavior of the tokenization.
+ ///
+ public StreamTokenizerSettings Settings { get; private set; }
- ///
- /// Read the next character from the stream, or from backString
- /// if we backed up.
- ///
- /// The next character.
- private int GetNextChar()
- {
- int c;
+ ///
+ /// The verbosity level for this object's Logger.
+ ///
+ public VerbosityLevel Verbosity
+ {
+ get
+ {
+ return (log.Verbosity);
+ }
+ set
+ {
+ log.Verbosity = value;
+ }
+ }
- // consume from backString if possible
- if (backString.Length > 0)
- {
- c = backString[0];
- backString.Remove(0, 1);
- #if DEBUG
+ #endregion
+
+ // ---------------------------------------------------------------------
+
+ #region Constructors/Destructor
+
+ // ---------------------------------------------------------------------
+
+ ///
+ /// Default constructor.
+ ///
+ public StreamTokenizer()
+ {
+ Initialize();
+ }
+
+ ///
+ /// Construct and set this object's TextReader to the one specified.
+ ///
+ /// The TextReader to read from.
+ public StreamTokenizer(TextReader sr)
+ {
+ Initialize();
+ TextReader = sr;
+ }
+
+ ///
+ /// Construct and set a string to tokenize.
+ ///
+ /// The string to tokenize.
+ public StreamTokenizer(string str)
+ {
+ Initialize();
+ TextReader = new StringReader(str);
+ }
+
+ ///
+ /// Utility function, things common to constructors.
+ ///
+ private void Initialize()
+ {
+ log = new Logger("StreamTokenizer");
+ log.Verbosity = VerbosityLevel.Warn;
+ backString = new CharBuffer(32);
+ nextTokenSb = new CharBuffer(1024);
+
+ InitializeStream();
+ Settings = new StreamTokenizerSettings();
+ Settings.SetDefaults();
+
+ expSb = new CharBuffer();
+ tmpSb = new CharBuffer();
+ }
+
+ ///
+ /// Clear the stream settings.
+ ///
+ private void InitializeStream()
+ {
+ lineNumber = 1; // base 1 line numbers
+ TextReader = null;
+ }
+
+ #endregion
+
+ // ---------------------------------------------------------------------
+
+ #region Standard Methods
+
+ // ---------------------------------------------------------------------
+
+ ///
+ /// Display the state of this object.
+ ///
+ public void Display()
+ {
+ Display(String.Empty);
+ }
+
+ ///
+ /// Display the state of this object, with a per-line prefix.
+ ///
+ /// The pre-line prefix.
+ public void Display(string prefix)
+ {
+ log.WriteLine(prefix + "StreamTokenizer display:");
+ log.WriteLine(prefix + " textReader: {0}", (TextReader == null ? "null" : "non-null"));
+ log.WriteLine(prefix + " backString: {0}", backString);
+
+ if (Settings != null)
+ {
+ Settings.Display(prefix + " ");
+ }
+ }
+
+ #endregion
+
+ // ---------------------------------------------------------------------
+
+ #region NextToken (the state machine)
+
+ // ---------------------------------------------------------------------
+
+ ///
+ /// The states of the state machine.
+ ///
+ private enum NextTokenState
+ {
+ Start,
+ Whitespace,
+ Word,
+ Quote,
+ EndQuote,
+ MaybeNumber, // could be number or word
+ MaybeComment, // after first slash, might be comment or not
+ MaybeHex, // after 0, may be hex
+ HexGot0x, // after 0x, may be hex
+ HexNumber,
+ LineComment,
+ BlockComment,
+ EndBlockComment,
+ Char,
+ Eol,
+ Eof,
+ Invalid
+ }
+
+ ///
+ /// Pick the next state given just a single character. This is used
+ /// at the start of a new token.
+ ///
+ /// The type of the character.
+ /// The character.
+ /// The state.
+ private NextTokenState PickNextState(byte ctype, int c)
+ {
+ return (PickNextState(ctype, c, NextTokenState.Start));
+ }
+
+ ///
+ /// Pick the next state given just a single character. This is used
+ /// at the start of a new token.
+ ///
+ /// The type of the character.
+ /// The character.
+ /// Exclude this state from the possible next state.
+ /// The state.
+ private NextTokenState PickNextState(byte ctype, int c, NextTokenState excludeState)
+ {
+ if (c == '/')
+ {
+ return (NextTokenState.MaybeComment); // overrides all other cats
+ }
+ else if ((excludeState != NextTokenState.MaybeHex)
+ && Settings.ParseHexNumbers && (c == '0'))
+ {
+ return (NextTokenState.MaybeHex);
+ }
+ else if ((excludeState != NextTokenState.MaybeNumber) && Settings.ParseNumbers
+ && (Settings.IsCharType(ctype, CharTypeBits.Digit) || (c == '-') || (c == '.')))
+ {
+ return (NextTokenState.MaybeNumber);
+ }
+ else if (Settings.IsCharType(ctype, CharTypeBits.Word))
+ {
+ return (NextTokenState.Word);
+ }
+ else if (Settings.GrabEol && (c == 10))
+ {
+ return (NextTokenState.Eol);
+ }
+ else if (Settings.IsCharType(ctype, CharTypeBits.Whitespace))
+ {
+ return (NextTokenState.Whitespace);
+ }
+ else if (Settings.IsCharType(ctype, CharTypeBits.Comment))
+ {
+ return (NextTokenState.LineComment);
+ }
+ else if (Settings.IsCharType(ctype, CharTypeBits.Quote))
+ {
+ return (NextTokenState.Quote);
+ }
+ else if ((c == Eof) || (Settings.IsCharType(ctype, CharTypeBits.Eof)))
+ {
+ return (NextTokenState.Eof);
+ }
+ return (NextTokenState.Char);
+ }
+
+ ///
+ /// Read the next character from the stream, or from backString
+ /// if we backed up.
+ ///
+ /// The next character.
+ private int GetNextChar()
+ {
+ int c;
+
+ // consume from backString if possible
+ if (backString.Length > 0)
+ {
+ c = backString[0];
+ backString.Remove(0, 1);
+#if DEBUG
log.Debug("Backup char '{0}'", (char)c);
#endif
- return(c);
- }
+ return (c);
+ }
- if (textReader == null) return(Eof);
+ if (TextReader == null)
+ {
+ return (Eof);
+ }
- try
- {
- while((c = textReader.Read()) == 13) {} // skip LF (13)
- }
- catch(Exception)
- {
- return(Eof);
- }
+ try
+ {
+ while ((c = TextReader.Read()) == 13) {} // skip LF (13)
+ }
+ catch (Exception)
+ {
+ return (Eof);
+ }
- if (c == 10)
- {
- lineNumber++;
- #if DEBUG
+ if (c == 10)
+ {
+ lineNumber++;
+#if DEBUG
log.Debug("Line number incremented to {0}", lineNumber);
#endif
- }
- else if (c < 0)
- {
- c = Eof;
- }
+ }
+ else if (c < 0)
+ {
+ c = Eof;
+ }
- #if DEBUG
+#if DEBUG
log.Debug("Read char '{0}' ({1})", (char)c, c);
#endif
- return(c);
- }
+ return (c);
+ }
- ///
- /// Get the next token. The last token will be an EofToken unless
- /// there's an unterminated quote or unterminated block comment
- /// and Settings.DoUntermCheck is true, in which case this throws
- /// an exception of type StreamTokenizerUntermException or sub-class.
- ///
- /// The output token.
- /// bool - true for success, false for failure.
- public bool NextToken(out Token token)
- {
- token = null;
- int thisChar = 0; // current character
- byte ctype; // type of this character
+ ///
+ /// Get the next token. The last token will be an EofToken unless
+ /// there's an unterminated quote or unterminated block comment
+ /// and Settings.DoUntermCheck is true, in which case this throws
+ /// an exception of type StreamTokenizerUntermException or sub-class.
+ ///
+ /// The output token.
+ /// bool - true for success, false for failure.
+ public bool NextToken(out Token token)
+ {
+ token = null;
+ int thisChar = 0; // current character
+ byte ctype; // type of this character
- NextTokenState state = NextTokenState.Start;
- int prevChar = 0; // previous character
- byte prevCtype = (byte)CharTypeBits.Eof;
+ NextTokenState state = NextTokenState.Start;
+ int prevChar = 0; // previous character
+ byte prevCtype = (byte) CharTypeBits.Eof;
- // get previous char from nextTokenSb if there
- // (nextTokenSb is a StringBuilder containing the characters
- // of the next token to be emitted)
- if (nextTokenSb.Length > 0)
- {
- prevChar = nextTokenSb[nextTokenSb.Length - 1];
- prevCtype = settings.CharTypes[prevChar];
- state = PickNextState(prevCtype, prevChar);
- }
+ // get previous char from nextTokenSb if there
+ // (nextTokenSb is a StringBuilder containing the characters
+ // of the next token to be emitted)
+ if (nextTokenSb.Length > 0)
+ {
+ prevChar = nextTokenSb[nextTokenSb.Length - 1];
+ prevCtype = Settings.CharTypes[prevChar];
+ state = PickNextState(prevCtype, prevChar);
+ }
- // extra state for number parse
- int seenDot = 0; // how many .'s in the number
- int seenE = 0; // how many e's or E's have we seen in the number
- bool seenDigit = false; // seen any digits (numbers can start with -)
+ // extra state for number parse
+ int seenDot = 0; // how many .'s in the number
+ int seenE = 0; // how many e's or E's have we seen in the number
+ bool seenDigit = false; // seen any digits (numbers can start with -)
- // lineNumber can change with each GetNextChar()
- // tokenLineNumber is the line on which the token started
- int tokenLineNumber = lineNumber;
+ // lineNumber can change with each GetNextChar()
+ // tokenLineNumber is the line on which the token started
+ int tokenLineNumber = lineNumber;
- // State Machine: Produces a single token.
- // Enter a state based on a single character.
- // Generally, being in a state means we're currently collecting chars
- // in that type of token.
- // We do state machine until it builds a token (Eof is a token), then
- // return that token.
- thisChar = prevChar; // for first iteration, since prevChar is set to this
- bool done = false; // optimization
- while (!done)
- {
- prevChar = thisChar;
- thisChar = GetNextChar();
- if (thisChar >= settings.CharTypes.Length)
- {
- // greater than 7-bit ascii, treat as word character
- ctype = (byte)CharTypeBits.Word;
- }
- else ctype = settings.CharTypes[thisChar];
+ // State Machine: Produces a single token.
+ // Enter a state based on a single character.
+ // Generally, being in a state means we're currently collecting chars
+ // in that type of token.
+ // We do state machine until it builds a token (Eof is a token), then
+ // return that token.
+ thisChar = prevChar; // for first iteration, since prevChar is set to this
+ bool done = false; // optimization
+ while (!done)
+ {
+ prevChar = thisChar;
+ thisChar = GetNextChar();
+ if (thisChar >= Settings.CharTypes.Length)
+ {
+ // greater than 7-bit ascii, treat as word character
+ ctype = (byte) CharTypeBits.Word;
+ }
+ else
+ {
+ ctype = Settings.CharTypes[thisChar];
+ }
- #if DEBUG
+#if DEBUG
log.Debug("Before switch: state = {0}, thisChar = '{1}'", state, (char)thisChar);
#endif
- // see if we need to change states, or emit a token
- switch(state)
- {
- case NextTokenState.Start:
- // RESET
- state = PickNextState(ctype, thisChar);
- tokenLineNumber = lineNumber;
- break;
+ // see if we need to change states, or emit a token
+ switch (state)
+ {
+ case NextTokenState.Start:
+ // RESET
+ state = PickNextState(ctype, thisChar);
+ tokenLineNumber = lineNumber;
+ break;
- case NextTokenState.Char:
- token = new CharToken((char)prevChar, tokenLineNumber);
- done = true;
- nextTokenSb.Length = 0;
- break;
+ case NextTokenState.Char:
+ token = new CharToken((char) prevChar, tokenLineNumber);
+ done = true;
+ nextTokenSb.Length = 0;
+ break;
- case NextTokenState.Word:
- if ((!settings.IsCharType(ctype, CharTypeBits.Word))
- && (!settings.IsCharType(ctype, CharTypeBits.Digit)))
- {
- // end of word, emit
- token = new WordToken(nextTokenSb.ToString(), tokenLineNumber);
- done = true;
- nextTokenSb.Length = 0;
- }
- break;
+ case NextTokenState.Word:
+ if ((!Settings.IsCharType(ctype, CharTypeBits.Word))
+ && (!Settings.IsCharType(ctype, CharTypeBits.Digit)))
+ {
+ // end of word, emit
+ token = new WordToken(nextTokenSb.ToString(), tokenLineNumber);
+ done = true;
+ nextTokenSb.Length = 0;
+ }
+ break;
- case NextTokenState.Whitespace:
- if (!settings.IsCharType(ctype, CharTypeBits.Whitespace)
- || (settings.GrabEol && (thisChar == 10)))
- {
- // end of whitespace, emit
- if (settings.GrabWhitespace)
- {
- token = new WhitespaceToken(nextTokenSb.ToString(), tokenLineNumber);
- done = true;
- nextTokenSb.Length = 0;
- }
- else
- {
- // RESET
- nextTokenSb.Length = 0;
- tokenLineNumber = lineNumber;
- state = PickNextState(ctype, thisChar);
- }
- }
- break;
+ case NextTokenState.Whitespace:
+ if (!Settings.IsCharType(ctype, CharTypeBits.Whitespace)
+ || (Settings.GrabEol && (thisChar == 10)))
+ {
+ // end of whitespace, emit
+ if (Settings.GrabWhitespace)
+ {
+ token = new WhitespaceToken(nextTokenSb.ToString(), tokenLineNumber);
+ done = true;
+ nextTokenSb.Length = 0;
+ }
+ else
+ {
+ // RESET
+ nextTokenSb.Length = 0;
+ tokenLineNumber = lineNumber;
+ state = PickNextState(ctype, thisChar);
+ }
+ }
+ break;
- case NextTokenState.EndQuote:
- // we're now 1 char after end of quote
- token = new QuoteToken(nextTokenSb.ToString(), tokenLineNumber);
- done = true;
- nextTokenSb.Length = 0;
- break;
+ case NextTokenState.EndQuote:
+ // we're now 1 char after end of quote
+ token = new QuoteToken(nextTokenSb.ToString(), tokenLineNumber);
+ done = true;
+ nextTokenSb.Length = 0;
+ break;
- case NextTokenState.Quote:
- // looking for end quote matching char that started the quote
- if (thisChar == nextTokenSb[0])
- {
- // handle escaped backslashes: count the immediately prior backslashes
- // - even (including 0) means it's not escaped
- // - odd means it is escaped
- int backSlashCount = 0;
- for (int i = nextTokenSb.Length - 1; i >= 0; i--)
- {
- if (nextTokenSb[ i ] == '\\') backSlashCount++;
- else break;
- }
+ case NextTokenState.Quote:
+ // looking for end quote matching char that started the quote
+ if (thisChar == nextTokenSb[0])
+ {
+ // handle escaped backslashes: count the immediately prior backslashes
+ // - even (including 0) means it's not escaped
+ // - odd means it is escaped
+ int backSlashCount = 0;
+ for (int i = nextTokenSb.Length - 1; i >= 0; i--)
+ {
+ if (nextTokenSb[i] == '\\')
+ {
+ backSlashCount++;
+ }
+ else
+ {
+ break;
+ }
+ }
- if ((backSlashCount % 2) == 0)
- {
- state = NextTokenState.EndQuote;
- }
- }
+ if ((backSlashCount%2) == 0)
+ {
+ state = NextTokenState.EndQuote;
+ }
+ }
- if ((state != NextTokenState.EndQuote) && (thisChar == Eof))
- {
- if (settings.DoUntermCheck)
- {
- nextTokenSb.Length = 0;
- throw new StreamTokenizerUntermQuoteException("Unterminated quote");
- }
+ if ((state != NextTokenState.EndQuote) && (thisChar == Eof))
+ {
+ if (Settings.DoUntermCheck)
+ {
+ nextTokenSb.Length = 0;
+ throw new StreamTokenizerUntermQuoteException("Unterminated quote");
+ }
- token = new QuoteToken(nextTokenSb.ToString(), tokenLineNumber);
- done = true;
- nextTokenSb.Length = 0;
- }
- break;
+ token = new QuoteToken(nextTokenSb.ToString(), tokenLineNumber);
+ done = true;
+ nextTokenSb.Length = 0;
+ }
+ break;
- case NextTokenState.MaybeComment:
- if (thisChar == Eof)
- {
- token = new CharToken(nextTokenSb.ToString(), tokenLineNumber);
- done = true;
- nextTokenSb.Length = 0;
- }
- else
- {
- // if we get the right char, we're in a comment
- if (settings.SlashSlashComments && (thisChar == '/'))
- state = NextTokenState.LineComment;
- else if (settings.SlashStarComments && (thisChar == '*'))
- state = NextTokenState.BlockComment;
- else
- {
- token = new CharToken(nextTokenSb.ToString(), tokenLineNumber);
- done = true;
- nextTokenSb.Length = 0;
- }
- }
- break;
+ case NextTokenState.MaybeComment:
+ if (thisChar == Eof)
+ {
+ token = new CharToken(nextTokenSb.ToString(), tokenLineNumber);
+ done = true;
+ nextTokenSb.Length = 0;
+ }
+ else
+ {
+ // if we get the right char, we're in a comment
+ if (Settings.SlashSlashComments && (thisChar == '/'))
+ {
+ state = NextTokenState.LineComment;
+ }
+ else if (Settings.SlashStarComments && (thisChar == '*'))
+ {
+ state = NextTokenState.BlockComment;
+ }
+ else
+ {
+ token = new CharToken(nextTokenSb.ToString(), tokenLineNumber);
+ done = true;
+ nextTokenSb.Length = 0;
+ }
+ }
+ break;
- case NextTokenState.LineComment:
- if (thisChar == Eof)
- {
- if (settings.GrabComments)
- {
- token = new CommentToken(nextTokenSb.ToString(), tokenLineNumber);
- done = true;
- nextTokenSb.Length = 0;
- }
- else
- {
- // RESET
- nextTokenSb.Length = 0;
- tokenLineNumber = lineNumber;
- state = PickNextState(ctype, thisChar);
- }
- }
- else
- {
- if (thisChar == '\n')
- {
- if (settings.GrabComments)
- {
- token = new CommentToken(nextTokenSb.ToString(), tokenLineNumber);
- done = true;
- nextTokenSb.Length = 0;
- }
- else
- {
- // RESET
- nextTokenSb.Length = 0;
- tokenLineNumber = lineNumber;
- state = PickNextState(ctype, thisChar);
- }
- }
- }
- break;
+ case NextTokenState.LineComment:
+ if (thisChar == Eof)
+ {
+ if (Settings.GrabComments)
+ {
+ token = new CommentToken(nextTokenSb.ToString(), tokenLineNumber);
+ done = true;
+ nextTokenSb.Length = 0;
+ }
+ else
+ {
+ // RESET
+ nextTokenSb.Length = 0;
+ tokenLineNumber = lineNumber;
+ state = PickNextState(ctype, thisChar);
+ }
+ }
+ else
+ {
+ if (thisChar == '\n')
+ {
+ if (Settings.GrabComments)
+ {
+ token = new CommentToken(nextTokenSb.ToString(), tokenLineNumber);
+ done = true;
+ nextTokenSb.Length = 0;
+ }
+ else
+ {
+ // RESET
+ nextTokenSb.Length = 0;
+ tokenLineNumber = lineNumber;
+ state = PickNextState(ctype, thisChar);
+ }
+ }
+ }
+ break;
- case NextTokenState.BlockComment:
- if (thisChar == Eof)
- {
- if (settings.DoUntermCheck)
- {
- nextTokenSb.Length = 0;
- throw new StreamTokenizerUntermCommentException("Unterminated comment.");
- }
+ case NextTokenState.BlockComment:
+ if (thisChar == Eof)
+ {
+ if (Settings.DoUntermCheck)
+ {
+ nextTokenSb.Length = 0;
+ throw new StreamTokenizerUntermCommentException("Unterminated comment.");
+ }
- if (settings.GrabComments)
- {
- token = new CommentToken(nextTokenSb.ToString(), tokenLineNumber);
- done = true;
- nextTokenSb.Length = 0;
- }
- else
- {
- // RESET
- nextTokenSb.Length = 0;
- tokenLineNumber = lineNumber;
- state = PickNextState(ctype, thisChar);
- }
- }
- else
- {
- if ((thisChar == '/') && (prevChar == '*'))
- {
- state = NextTokenState.EndBlockComment;
- }
- }
- break;
+ if (Settings.GrabComments)
+ {
+ token = new CommentToken(nextTokenSb.ToString(), tokenLineNumber);
+ done = true;
+ nextTokenSb.Length = 0;
+ }
+ else
+ {
+ // RESET
+ nextTokenSb.Length = 0;
+ tokenLineNumber = lineNumber;
+ state = PickNextState(ctype, thisChar);
+ }
+ }
+ else
+ {
+ if ((thisChar == '/') && (prevChar == '*'))
+ {
+ state = NextTokenState.EndBlockComment;
+ }
+ }
+ break;
- // special case for 2-character token termination
- case NextTokenState.EndBlockComment:
- if (settings.GrabComments)
- {
- token = new CommentToken(nextTokenSb.ToString(), tokenLineNumber);
- done = true;
- nextTokenSb.Length = 0;
- }
- else
- {
- // RESET
- nextTokenSb.Length = 0;
- tokenLineNumber = lineNumber;
- state = PickNextState(ctype, thisChar);
- }
- break;
+ // special case for 2-character token termination
+ case NextTokenState.EndBlockComment:
+ if (Settings.GrabComments)
+ {
+ token = new CommentToken(nextTokenSb.ToString(), tokenLineNumber);
+ done = true;
+ nextTokenSb.Length = 0;
+ }
+ else
+ {
+ // RESET
+ nextTokenSb.Length = 0;
+ tokenLineNumber = lineNumber;
+ state = PickNextState(ctype, thisChar);
+ }
+ break;
- case NextTokenState.MaybeHex:
- // previous char was 0
- if (thisChar != 'x')
- {
- // back up and try non-hex
- // back up to the 0
- nextTokenSb.Append((char)thisChar);
- backString.Append(nextTokenSb);
- nextTokenSb.Length = 0;
+ case NextTokenState.MaybeHex:
+ // previous char was 0
+ if (thisChar != 'x')
+ {
+ // back up and try non-hex
+ // back up to the 0
+ nextTokenSb.Append((char) thisChar);
+ backString.Append(nextTokenSb);
+ nextTokenSb.Length = 0;
- // reset state and don't choose MaybeNumber state.
- // pull char from backString
- thisChar = backString[0];
- backString.Remove(0, 1);
- state = PickNextState(settings.CharTypes[thisChar], (int)thisChar,
- NextTokenState.MaybeHex);
- #if DEBUG
+ // reset state and don't choose MaybeNumber state.
+ // pull char from backString
+ thisChar = backString[0];
+ backString.Remove(0, 1);
+ state = PickNextState(Settings.CharTypes[thisChar], (int) thisChar,
+ NextTokenState.MaybeHex);
+#if DEBUG
log.Debug("HexGot0x: Next state on '{0}' is {1}", (char)thisChar,
state);
#endif
- }
- else state = NextTokenState.HexGot0x;
- break;
+ }
+ else
+ {
+ state = NextTokenState.HexGot0x;
+ }
+ break;
- case NextTokenState.HexGot0x:
- if (!settings.IsCharType(ctype, CharTypeBits.HexDigit))
- {
- // got 0x but now a non-hex char
- // back up to the 0
- nextTokenSb.Append((char)thisChar);
- backString.Append(nextTokenSb);
- nextTokenSb.Length = 0;
+ case NextTokenState.HexGot0x:
+ if (!Settings.IsCharType(ctype, CharTypeBits.HexDigit))
+ {
+ // got 0x but now a non-hex char
+ // back up to the 0
+ nextTokenSb.Append((char) thisChar);
+ backString.Append(nextTokenSb);
+ nextTokenSb.Length = 0;
- // reset state and don't choose MaybeNumber state.
- // pull char from backString
- thisChar = backString[0];
- backString.Remove(0, 1);
- state = PickNextState(settings.CharTypes[thisChar], (int)thisChar,
- NextTokenState.MaybeHex);
- #if DEBUG
+ // reset state and don't choose MaybeNumber state.
+ // pull char from backString
+ thisChar = backString[0];
+ backString.Remove(0, 1);
+ state = PickNextState(Settings.CharTypes[thisChar], (int) thisChar,
+ NextTokenState.MaybeHex);
+#if DEBUG
log.Debug("HexGot0x: Next state on '{0}' is {1}", (char)thisChar,
state);
#endif
- }
- else state = NextTokenState.HexNumber;
- break;
+ }
+ else
+ {
+ state = NextTokenState.HexNumber;
+ }
+ break;
- case NextTokenState.HexNumber:
- if (!settings.IsCharType(ctype, CharTypeBits.HexDigit))
- {
- // emit the hex number we've collected
- #if DEBUG
+ case NextTokenState.HexNumber:
+ if (!Settings.IsCharType(ctype, CharTypeBits.HexDigit))
+ {
+ // emit the hex number we've collected
+#if DEBUG
log.Debug("Emit hex IntToken from string '{0}'", nextTokenSb);
#endif
- token = IntToken.ParseHex(nextTokenSb.ToString(), tokenLineNumber);
- done = true;
- nextTokenSb.Length = 0;
- }
- break;
+ token = IntToken.ParseHex(nextTokenSb.ToString(), tokenLineNumber);
+ done = true;
+ nextTokenSb.Length = 0;
+ }
+ break;
- case NextTokenState.MaybeNumber:
- //
- // Determine whether or not to stop collecting characters for
- // the number parse. We terminate when it's clear it's not
- // a number or no longer a number.
- //
- bool term = false;
+ case NextTokenState.MaybeNumber:
+ //
+ // Determine whether or not to stop collecting characters for
+ // the number parse. We terminate when it's clear it's not
+ // a number or no longer a number.
+ //
+ bool term = false;
- if (settings.IsCharType(ctype, CharTypeBits.Digit)
- || settings.IsCharType(prevChar, CharTypeBits.Digit)) seenDigit = true;
+ if (Settings.IsCharType(ctype, CharTypeBits.Digit)
+ || Settings.IsCharType(prevChar, CharTypeBits.Digit))
+ {
+ seenDigit = true;
+ }
- // term conditions
- if (thisChar == '.')
- {
- seenDot++;
- if (seenDot > 1) term = true; // more than one dot, it aint a number
- }
- else if (((thisChar == 'e') || (thisChar == 'E')))
- {
- seenE++;
- if (!seenDigit) term = true; // e before any digits is bad
- else if (seenE > 1) term = true; // more than 1 e is bad
- else
- {
- term = true; // done regardless
+ // term conditions
+ if (thisChar == '.')
+ {
+ seenDot++;
+ if (seenDot > 1)
+ {
+ term = true; // more than one dot, it aint a number
+ }
+ }
+ else if (((thisChar == 'e') || (thisChar == 'E')))
+ {
+ seenE++;
+ if (!seenDigit)
+ {
+ term = true; // e before any digits is bad
+ }
+ else if (seenE > 1)
+ {
+ term = true; // more than 1 e is bad
+ }
+ else
+ {
+ term = true; // done regardless
- // scan the exponent, put its characters into
- // nextTokenSb, if there are any
- char c;
- expSb.Clear();
- expSb.Append((char)thisChar);
- if (GrabInt(expSb, true, out c))
- {
- // we got a good exponent, tack it on
- nextTokenSb.Append(expSb);
- thisChar = c; // and continue after the exponent's characters
- }
- }
- }
- else if (thisChar == Eof) term = true;
- // or a char that can't be in a number
- else if ((!settings.IsCharType(ctype, CharTypeBits.Digit)
- && (thisChar != 'e') && (thisChar != 'E')
- && (thisChar != '-') && (thisChar != '.'))
- || ((thisChar == '+') && (seenE == 0)))
- {
- // it's not a normal number character
- term = true;
- }
- // or a dash not after e
- else if ((thisChar == '-') && (!((prevChar == 'e') || (prevChar == 'E')))) term = true;
+ // scan the exponent, put its characters into
+ // nextTokenSb, if there are any
+ char c;
+ expSb.Clear();
+ expSb.Append((char) thisChar);
+ if (GrabInt(expSb, true, out c))
+ {
+ // we got a good exponent, tack it on
+ nextTokenSb.Append(expSb);
+ thisChar = c; // and continue after the exponent's characters
+ }
+ }
+ }
+ else if (thisChar == Eof)
+ {
+ term = true;
+ }
+ // or a char that can't be in a number
+ else if ((!Settings.IsCharType(ctype, CharTypeBits.Digit)
+ && (thisChar != 'e') && (thisChar != 'E')
+ && (thisChar != '-') && (thisChar != '.'))
+ || ((thisChar == '+') && (seenE == 0)))
+ {
+ // it's not a normal number character
+ term = true;
+ }
+ // or a dash not after e
+ else if ((thisChar == '-') && (!((prevChar == 'e') || (prevChar == 'E'))))
+ {
+ term = true;
+ }
- if (term)
- {
- // we are terminating a number, or it wasn't a number
- if (seenDigit)
- {
- if ((nextTokenSb.IndexOf('.') >= 0)
- || (nextTokenSb.IndexOf('e') >= 0)
- || (nextTokenSb.IndexOf('E') >= 0)
- || (nextTokenSb.Length >= 19) // probably too large for Int64, use float
- )
- {
- token = new FloatToken(nextTokenSb.ToString(), tokenLineNumber);
- #if DEBUG
+ if (term)
+ {
+ // we are terminating a number, or it wasn't a number
+ if (seenDigit)
+ {
+ if ((nextTokenSb.IndexOf('.') >= 0)
+ || (nextTokenSb.IndexOf('e') >= 0)
+ || (nextTokenSb.IndexOf('E') >= 0)
+ || (nextTokenSb.Length >= 19) // probably too large for Int64, use float
+ )
+ {
+ token = new FloatToken(nextTokenSb.ToString(), tokenLineNumber);
+#if DEBUG
log.Debug("Emit FloatToken from string '{0}'", nextTokenSb);
#endif
- }
- else
- {
- #if DEBUG
+ }
+ else
+ {
+#if DEBUG
log.Debug("Emit IntToken from string '{0}'", nextTokenSb);
#endif
- token = new IntToken(nextTokenSb.ToString(), tokenLineNumber);
- }
- done = true;
- nextTokenSb.Length = 0;
- }
- else
- {
- // -whatever or -.whatever
- // didn't see any digits, must have gotten here by a leading -
- // and no digits after it
- // back up to -, pick next state excluding numbers
- nextTokenSb.Append((char)thisChar);
- backString.Append(nextTokenSb);
- nextTokenSb.Length = 0;
+ token = new IntToken(nextTokenSb.ToString(), tokenLineNumber);
+ }
+ done = true;
+ nextTokenSb.Length = 0;
+ }
+ else
+ {
+ // -whatever or -.whatever
+ // didn't see any digits, must have gotten here by a leading -
+ // and no digits after it
+ // back up to -, pick next state excluding numbers
+ nextTokenSb.Append((char) thisChar);
+ backString.Append(nextTokenSb);
+ nextTokenSb.Length = 0;
- // restart on the - and don't choose MaybeNumber state
- // pull char from backString
- thisChar = backString[0];
- backString.Remove(0, 1);
- state = PickNextState(settings.CharTypes[thisChar], (int)thisChar,
- NextTokenState.MaybeNumber);
- #if DEBUG
+ // restart on the - and don't choose MaybeNumber state
+ // pull char from backString
+ thisChar = backString[0];
+ backString.Remove(0, 1);
+ state = PickNextState(Settings.CharTypes[thisChar], (int) thisChar,
+ NextTokenState.MaybeNumber);
+#if DEBUG
log.Debug("MaybeNumber: Next state on '{0}' is {1}", (char)thisChar,
state);
#endif
- }
- }
- break;
+ }
+ }
+ break;
- case NextTokenState.Eol:
- // tokenLineNumber - 1 because the newline char is on the previous line
- token = new EolToken(tokenLineNumber - 1);
- done = true;
- nextTokenSb.Length = 0;
- break;
+ case NextTokenState.Eol:
+ // tokenLineNumber - 1 because the newline char is on the previous line
+ token = new EolToken(tokenLineNumber - 1);
+ done = true;
+ nextTokenSb.Length = 0;
+ break;
- case NextTokenState.Eof:
- token = new EofToken(tokenLineNumber);
- done = true;
- nextTokenSb.Length = 0;
- return(false);
+ case NextTokenState.Eof:
+ token = new EofToken(tokenLineNumber);
+ done = true;
+ nextTokenSb.Length = 0;
+ return (false);
- case NextTokenState.Invalid:
- default:
- // not a good sign, some unrepresented state?
- log.Error("NextToken: Hit unrepresented state {0}", state);
- return(false);
- }
+ case NextTokenState.Invalid:
+ default:
+ // not a good sign, some unrepresented state?
+ log.Error("NextToken: Hit unrepresented state {0}", state);
+ return (false);
+ }
- // use a StringBuilder to accumulate characters which are part of this token
- if (thisChar != Eof) nextTokenSb.Append((char)thisChar);
- #if DEBUG
+ // use a StringBuilder to accumulate characters which are part of this token
+ if (thisChar != Eof)
+ {
+ nextTokenSb.Append((char) thisChar);
+ }
+#if DEBUG
log.Debug("After switch: state = {0}, nextTokenSb = '{1}', backString = '{2}'",
state, nextTokenSb, backString);
#endif
- }
+ }
- #if DEBUG
+#if DEBUG
log.Debug("Got token {0}", token.ToDebugString());
#endif
- return(true);
- }
+ return (true);
+ }
- ///
- /// Starting from current stream location, scan forward
- /// over an int. Determine whether it's an integer or not. If so,
- /// push the integer characters to the specified CharBuffer.
- /// If not, put them in backString (essentially leave the
- /// stream as it was) and return false.
- ///
- /// If it was an int, the stream is left 1 character after the
- /// end of the int, and that character is output in the thisChar parameter.
- ///
- /// The formats for integers are: 1, +1, and -1
- /// The + and - signs are included in the output buffer.
- ///
- /// The CharBuffer to append to.
- /// Whether or not to consider + to be part
- /// of an integer.
- /// The last character read by this method.
- /// true for parsed an int, false for not an int
- private bool GrabInt(CharBuffer sb, bool allowPlus, out char thisChar)
- {
- tmpSb.Clear(); // use tmp CharBuffer
+ ///
+ /// Starting from current stream location, scan forward
+ /// over an int. Determine whether it's an integer or not. If so,
+ /// push the integer characters to the specified CharBuffer.
+ /// If not, put them in backString (essentially leave the
+ /// stream as it was) and return false.
+ ///
+ /// If it was an int, the stream is left 1 character after the
+ /// end of the int, and that character is output in the thisChar parameter.
+ ///
+ /// The formats for integers are: 1, +1, and -1
+ /// The + and - signs are included in the output buffer.
+ ///
+ /// The CharBuffer to append to.
+ /// Whether or not to consider + to be part
+ /// of an integer.
+ /// The last character read by this method.
+ /// true for parsed an int, false for not an int
+ private bool GrabInt(CharBuffer sb, bool allowPlus, out char thisChar)
+ {
+ tmpSb.Clear(); // use tmp CharBuffer
- // first character can be -, maybe can be + depending on arg
- thisChar = (char)GetNextChar();
- if (thisChar == Eof)
- {
- return(false);
- }
- else if (thisChar == '+')
- {
- if (allowPlus)
- {
- tmpSb.Append(thisChar);
- }
- else
- {
- backString.Append(thisChar);
- return(false);
- }
- }
- else if (thisChar == '-')
- {
- tmpSb.Append(thisChar);
- }
- else if (settings.IsCharType(thisChar, CharTypeBits.Digit))
- {
- // a digit, back this out so we can handle it in loop below
- backString.Append(thisChar);
- }
- else
- {
- // not a number starter
- backString.Append(thisChar);
- return(false);
- }
+ // first character can be -, maybe can be + depending on arg
+ thisChar = (char) GetNextChar();
+ if (thisChar == Eof)
+ {
+ return (false);
+ }
+ else if (thisChar == '+')
+ {
+ if (allowPlus)
+ {
+ tmpSb.Append(thisChar);
+ }
+ else
+ {
+ backString.Append(thisChar);
+ return (false);
+ }
+ }
+ else if (thisChar == '-')
+ {
+ tmpSb.Append(thisChar);
+ }
+ else if (Settings.IsCharType(thisChar, CharTypeBits.Digit))
+ {
+ // a digit, back this out so we can handle it in loop below
+ backString.Append(thisChar);
+ }
+ else
+ {
+ // not a number starter
+ backString.Append(thisChar);
+ return (false);
+ }
- // rest of chars have to be digits
- bool gotInt = false;
- while(((thisChar = (char)GetNextChar()) != Eof)
- && (settings.IsCharType(thisChar, CharTypeBits.Digit)))
- {
- gotInt = true;
- tmpSb.Append(thisChar);
- }
+ // rest of chars have to be digits
+ bool gotInt = false;
+ while (((thisChar = (char) GetNextChar()) != Eof)
+ && (Settings.IsCharType(thisChar, CharTypeBits.Digit)))
+ {
+ gotInt = true;
+ tmpSb.Append(thisChar);
+ }
- if (gotInt)
- {
- sb.Append(tmpSb);
- #if DEBUG
+ if (gotInt)
+ {
+ sb.Append(tmpSb);
+#if DEBUG
log.Debug("Grabbed int {0}, sb = {1}", tmpSb, sb);
#endif
- return(true);
- }
- else
- {
- // didn't get any chars after first
- backString.Append(tmpSb); // put + or - back on
- if (thisChar != Eof) backString.Append(thisChar);
- return(false);
- }
- }
+ return (true);
+ }
+ else
+ {
+ // didn't get any chars after first
+ backString.Append(tmpSb); // put + or - back on
+ if (thisChar != Eof)
+ {
+ backString.Append(thisChar);
+ }
+ return (false);
+ }
+ }
- #endregion
+ #endregion
- // ---------------------------------------------------------------------
- #region Tokenize wrapper methods
- // ---------------------------------------------------------------------
+ // ---------------------------------------------------------------------
- ///
- /// Parse the rest of the stream and put all the tokens
- /// in the input ArrayList. This resets the line number to 1.
- ///
- /// The ArrayList to append to.
- /// bool - true for success
- public bool Tokenize(ArrayList tokens)
- {
- Token token;
- this.lineNumber = 1;
+ #region Tokenize wrapper methods
- while (NextToken(out token))
- {
- if (token == null) throw new NullReferenceException(
- "StreamTokenizer: Tokenize: Got a null token from NextToken.");
- tokens.Add(token);
- }
+ // ---------------------------------------------------------------------
- // add the last token returned (EOF)
- tokens.Add(token);
- return(true);
- }
+ ///
+ /// Parse the rest of the stream and put all the tokens
+ /// in the input ArrayList. This resets the line number to 1.
+ ///
+ /// The ArrayList to append to.
+ /// bool - true for success
+ public bool Tokenize(ArrayList tokens)
+ {
+ Token token;
+ lineNumber = 1;
- ///
- /// Parse all tokens from the specified TextReader, put
- /// them into the input ArrayList.
- ///
- /// The TextReader to read from.
- /// The ArrayList to append to.
- /// bool - true for success, false for failure.
- public bool TokenizeReader(TextReader tr, ArrayList tokens)
- {
- textReader = tr;
- return(Tokenize(tokens));
- }
+ while (NextToken(out token))
+ {
+ if (token == null)
+ {
+ throw new NullReferenceException(
+ "StreamTokenizer: Tokenize: Got a null token from NextToken.");
+ }
+ tokens.Add(token);
+ }
- ///
- /// Parse all tokens from the specified file, put
- /// them into the input ArrayList.
- ///
- /// The file to read.
- /// The ArrayList to put tokens in.
- /// bool - true for success, false for failure.
- public bool TokenizeFile(string fileName, ArrayList tokens)
- {
- FileInfo fi = new FileInfo(fileName);
- FileStream fr = null;
- try
- {
- fr = fi.Open(FileMode.Open, FileAccess.Read, FileShare.None);
- textReader = new StreamReader(fr);
- }
- catch (DirectoryNotFoundException)
- {
- }
- try
- {
- if (!Tokenize(tokens))
- {
- log.Error("Unable to parse tokens from file {0}", fileName);
- textReader.Close();
- if (fr != null) fr.Close();
- return(false);
- }
- }
- catch(StreamTokenizerUntermException e)
- {
- textReader.Close();
- if (fr != null) fr.Close();
- throw e;
- }
+ // add the last token returned (EOF)
+ tokens.Add(token);
+ return (true);
+ }
- if (textReader != null) textReader.Close();
- if (fr != null) fr.Close();
- return(true);
- }
+ ///
+ /// Parse all tokens from the specified TextReader, put
+ /// them into the input ArrayList.
+ ///
+ /// The TextReader to read from.
+ /// The ArrayList to append to.
+ /// bool - true for success, false for failure.
+ public bool TokenizeReader(TextReader tr, ArrayList tokens)
+ {
+ TextReader = tr;
+ return (Tokenize(tokens));
+ }
- ///
- /// Parse all tokens from the specified string, put
- /// them into the input ArrayList.
- ///
- ///
- /// The ArrayList to put tokens in.
- /// bool - true for success, false for failure.
- public bool TokenizeString(string str, ArrayList tokens)
- {
- textReader = new StringReader(str);
- return(Tokenize(tokens));
- }
+ ///
+ /// Parse all tokens from the specified file, put
+ /// them into the input ArrayList.
+ ///
+ /// The file to read.
+ /// The ArrayList to put tokens in.
+ /// bool - true for success, false for failure.
+ public bool TokenizeFile(string fileName, ArrayList tokens)
+ {
+ FileInfo fi = new FileInfo(fileName);
+ FileStream fr = null;
+ try
+ {
+ fr = fi.Open(FileMode.Open, FileAccess.Read, FileShare.None);
+ TextReader = new StreamReader(fr);
+ }
+ catch (DirectoryNotFoundException) {}
+ try
+ {
+ if (!Tokenize(tokens))
+ {
+ log.Error("Unable to parse tokens from file {0}", fileName);
+ TextReader.Close();
+ if (fr != null)
+ {
+ fr.Close();
+ }
+ return (false);
+ }
+ }
+ catch (StreamTokenizerUntermException e)
+ {
+ TextReader.Close();
+ if (fr != null)
+ {
+ fr.Close();
+ }
+ throw e;
+ }
- ///
- /// Parse all tokens from the specified Stream, put
- /// them into the input ArrayList.
- ///
- ///
- /// The ArrayList to put tokens in.
- /// bool - true for success, false for failure.
- public bool TokenizeStream(Stream s, ArrayList tokens)
- {
- textReader = new StreamReader(s);
- return(Tokenize(tokens));
- }
+ if (TextReader != null)
+ {
+ TextReader.Close();
+ }
+ if (fr != null)
+ {
+ fr.Close();
+ }
+ return (true);
+ }
- ///
- /// Tokenize a file completely and return the tokens in a Token[].
- ///
- /// The file to tokenize.
- /// A Token[] with all tokens.
- public Token[] TokenizeFile(string fileName)
- {
- ArrayList list = new ArrayList();
- if (!TokenizeFile(fileName, list))
- {
- return(null);
- }
- else
- {
- if (list.Count > 0)
- {
- return((Token[])list.ToArray(typeof(Token)));
- }
- else return(null);
- }
- }
- #endregion
- }
-}
+ ///
+ /// Parse all tokens from the specified string, put
+ /// them into the input ArrayList.
+ ///
+ ///
+ /// The ArrayList to put tokens in.
+ /// bool - true for success, false for failure.
+ public bool TokenizeString(string str, ArrayList tokens)
+ {
+ TextReader = new StringReader(str);
+ return (Tokenize(tokens));
+ }
+ ///
+ /// Parse all tokens from the specified Stream, put
+ /// them into the input ArrayList.
+ ///
+ ///
+ /// The ArrayList to put tokens in.
+ /// bool - true for success, false for failure.
+ public bool TokenizeStream(Stream s, ArrayList tokens)
+ {
+ TextReader = new StreamReader(s);
+ return (Tokenize(tokens));
+ }
+ ///
+ /// Tokenize a file completely and return the tokens in a Token[].
+ ///
+ /// The file to tokenize.
+ /// A Token[] with all tokens.
+ public Token[] TokenizeFile(string fileName)
+ {
+ ArrayList list = new ArrayList();
+ if (!TokenizeFile(fileName, list))
+ {
+ return (null);
+ }
+ else
+ {
+ if (list.Count > 0)
+ {
+ return ((Token[]) list.ToArray(typeof(Token)));
+ }
+ else
+ {
+ return (null);
+ }
+ }
+ }
+
+ #endregion
+ }
+}
\ No newline at end of file