Beispiel #1
0
		/// <summary>
		/// Build an Array of a particular type from a list of tokens.  
		/// The Type must be one that can be built with Convert.ChangeType.
		/// There are various ways to specify how many elements to parse.
		/// WARNING: This will throw an exception if any tokens cannot be
		/// converted.
		/// </summary>
		/// <param name="tokens">The ArrayList of tokens.</param>
		/// <param name="i">The starting (and ending) index.  This is
		/// modified, and left pointing at the last used token.</param>
		/// <param name="type">The Type of the array elements.</param>
		/// <param name="endToken">An optional end Token to look for.
		/// Parsing stops when a token equal to this is found.
		/// If this is null, then it is not used.</param>
		/// <param name="maxLength">The maximum number of array elements
		/// to parse.  If this is negative, then it is not used.</param>
		/// <param name="log">A Logger to use for messages.</param>
		/// <returns>The Array, or null for error.</returns>
		public static Array BuildArray(ArrayList tokens, ref int i, Type type,
			Token endToken, int maxLength, Logger log)
		{
			int len = tokens.Count;
			if (i >= len) 
			{
				log.Error("BuildArray: Input index too large.");
				return(null);
			}

			// put the objects into an array list first, since we don't
			// know length
			ArrayList list = new ArrayList();

			// allow null endToken specified
			if (endToken == null) endToken = new EofToken();

			Token token = null;
			token = (Token)tokens[i++];
			int arrayLength = 0;

			while ((!(token is EofToken)) && (token != endToken) && (i < len)
				&& ((maxLength < 0) || (arrayLength < maxLength)))
			{
				Object o = token.ConvertToType(type);
				list.Add(o);
				arrayLength++;
				token = (Token)tokens[i++];
			}
			i--; // went one past

			return(list.ToArray(type));
		}
Beispiel #2
0
        /// <summary>
        /// Build an Array of a particular type from a list of tokens.
        /// The Type must be one that can be built with Convert.ChangeType.
        /// There are various ways to specify how many elements to parse.
        /// WARNING: This will throw an exception if any tokens cannot be
        /// converted.
        /// </summary>
        /// <param name="tokens">The ArrayList of tokens.</param>
        /// <param name="i">The starting (and ending) index.  This is
        /// modified, and left pointing at the last used token.</param>
        /// <param name="type">The Type of the array elements.</param>
        /// <param name="endToken">An optional end Token to look for.
        /// Parsing stops when a token equal to this is found.
        /// If this is null, then it is not used.</param>
        /// <param name="maxLength">The maximum number of array elements
        /// to parse.  If this is negative, then it is not used.</param>
        /// <param name="log">A Logger to use for messages.</param>
        /// <returns>The Array, or null for error.</returns>
        public static Array BuildArray(ArrayList tokens, ref int i, Type type,
                                       Token endToken, int maxLength, Logger log)
        {
            int len = tokens.Count;

            if (i >= len)
            {
                log.Error("BuildArray: Input index too large.");
                return(null);
            }

            // put the objects into an array list first, since we don't
            // know length
            ArrayList list = new ArrayList();

            // allow null endToken specified
            if (endToken == null)
            {
                endToken = new EofToken();
            }

            Token token = null;

            token = (Token)tokens[i++];
            int arrayLength = 0;

            while ((!(token is EofToken)) && (token != endToken) && (i < len) &&
                   ((maxLength < 0) || (arrayLength < maxLength)))
            {
                Object o = token.ConvertToType(type);
                list.Add(o);
                arrayLength++;
                token = (Token)tokens[i++];
            }
            i--;             // went one past

            return(list.ToArray(type));
        }
		/// <summary>
		/// Get the next token.  The last token will be an EofToken unless
		/// there's an unterminated quote or unterminated block comment
		/// and Settings.DoUntermCheck is true, in which case this throws
		/// an exception of type StreamTokenizerUntermException or sub-class.
		/// </summary>
		/// <param name="token">The output token.</param>
		/// <returns>bool - true for success, false for failure.</returns>
		public bool NextToken(out Token token)
		{
			token = null;
			int thisChar = 0; // current character
			byte ctype; // type of this character

			NextTokenState state = NextTokenState.Start;
			int prevChar = 0; // previous character
			byte prevCtype = (byte)CharTypeBits.Eof;

			// get previous char from nextTokenSb if there
			// (nextTokenSb is a StringBuilder containing the characters
			//  of the next token to be emitted)
			if (nextTokenSb.Length > 0) 
			{
				prevChar = nextTokenSb[nextTokenSb.Length - 1];
				prevCtype = settings.CharTypes[prevChar];
				state = PickNextState(prevCtype, prevChar);
			}

			// extra state for number parse
			int seenDot = 0; // how many .'s in the number
			int seenE = 0; // how many e's or E's have we seen in the number
			bool seenDigit = false; // seen any digits (numbers can start with -)

			// lineNumber can change with each GetNextChar()
			// tokenLineNumber is the line on which the token started
			int tokenLineNumber = lineNumber;

			// State Machine: Produces a single token.
			// Enter a state based on a single character.
			// Generally, being in a state means we're currently collecting chars 
			// in that type of token.
			// We do state machine until it builds a token (Eof is a token), then
			// return that token.
			thisChar = prevChar;  // for first iteration, since prevChar is set to this 
			bool done = false; // optimization
			while (!done)
			{
				prevChar = thisChar;
				thisChar = GetNextChar();
				if (thisChar >= settings.CharTypes.Length)
				{
					// greater than 7-bit ascii, treat as word character
					ctype = (byte)CharTypeBits.Word;
				}
				else ctype = settings.CharTypes[thisChar];

				#if DEBUG
				log.Debug("Before switch: state = {0}, thisChar = '{1}'", state, (char)thisChar);
				#endif

				// see if we need to change states, or emit a token
				switch(state)
				{
					case NextTokenState.Start:
						// RESET
						state = PickNextState(ctype, thisChar);
						tokenLineNumber = lineNumber;
						break;

					case NextTokenState.Char:
						token = new CharToken((char)prevChar, tokenLineNumber);
						done = true;
						nextTokenSb.Length = 0;
						break;

					case NextTokenState.Word:
						if ((!settings.IsCharType(ctype, CharTypeBits.Word))
							&& (!settings.IsCharType(ctype, CharTypeBits.Digit)))
						{
							// end of word, emit
							token = new WordToken(nextTokenSb.ToString(), tokenLineNumber);
							done = true;
							nextTokenSb.Length = 0;
						}
						break;

					case NextTokenState.Whitespace:
						if (!settings.IsCharType(ctype, CharTypeBits.Whitespace)
							|| (settings.GrabEol && (thisChar == 10)))
						{
							// end of whitespace, emit
							if (settings.GrabWhitespace)
							{
								token = new WhitespaceToken(nextTokenSb.ToString(), tokenLineNumber);
								done = true;
								nextTokenSb.Length = 0;
							}
							else
							{
								// RESET
								nextTokenSb.Length = 0;
								tokenLineNumber = lineNumber;
								state = PickNextState(ctype, thisChar);
							}
						}
						break;

					case NextTokenState.EndQuote:
						// we're now 1 char after end of quote
						token = new QuoteToken(nextTokenSb.ToString(), tokenLineNumber);
						done = true;
						nextTokenSb.Length = 0;
						break;

					case NextTokenState.Quote:
						// looking for end quote matching char that started the quote
						if (thisChar == nextTokenSb[0])
						{
							// handle escaped backslashes: count the immediately prior backslashes 
							// - even (including 0) means it's not escaped 
							// - odd means it is escaped 
							int backSlashCount = 0; 
							for (int i = nextTokenSb.Length - 1; i >= 0; i--)
							{ 
								if (nextTokenSb[ i ] == '\\') backSlashCount++; 
								else break; 
							} 

							if ((backSlashCount % 2) == 0) 
							{ 
								state = NextTokenState.EndQuote;
							}
						}

						if ((state != NextTokenState.EndQuote) && (thisChar == Eof))
						{
							if (settings.DoUntermCheck) 
							{
								nextTokenSb.Length = 0;
								throw new StreamTokenizerUntermQuoteException("Unterminated quote");
							}

							token = new QuoteToken(nextTokenSb.ToString(), tokenLineNumber);
							done = true;
							nextTokenSb.Length = 0;
						}
						break;

					case NextTokenState.MaybeComment:
						if (thisChar == Eof)
						{
							token = new CharToken(nextTokenSb.ToString(), tokenLineNumber);
							done = true;
							nextTokenSb.Length = 0;
						}
						else
						{
							// if we get the right char, we're in a comment
							if (settings.SlashSlashComments && (thisChar == '/')) 
								state = NextTokenState.LineComment;
							else if (settings.SlashStarComments && (thisChar == '*')) 
								state = NextTokenState.BlockComment;
							else
							{
								token = new CharToken(nextTokenSb.ToString(), tokenLineNumber);
								done = true;
								nextTokenSb.Length = 0;
							}
						}
						break;

					case NextTokenState.LineComment:
						if (thisChar == Eof)
						{
							if (settings.GrabComments) 
							{
								token = new CommentToken(nextTokenSb.ToString(), tokenLineNumber);
								done = true;
								nextTokenSb.Length = 0;
							}
							else
							{
								// RESET
								nextTokenSb.Length = 0;
								tokenLineNumber = lineNumber;
								state = PickNextState(ctype, thisChar);
							}
						}
						else
						{
							if (thisChar == '\n')
							{
								if (settings.GrabComments) 
								{
									token = new CommentToken(nextTokenSb.ToString(), tokenLineNumber);
									done = true;
									nextTokenSb.Length = 0;
								}
								else
								{
									// RESET
									nextTokenSb.Length = 0;
									tokenLineNumber = lineNumber;
									state = PickNextState(ctype, thisChar);
								}
							}
						}
						break;

					case NextTokenState.BlockComment:
						if (thisChar == Eof)
						{
							if (settings.DoUntermCheck) 
							{
								nextTokenSb.Length = 0;
								throw new StreamTokenizerUntermCommentException("Unterminated comment.");
							}

							if (settings.GrabComments) 
							{
								token = new CommentToken(nextTokenSb.ToString(), tokenLineNumber);
								done = true;
								nextTokenSb.Length = 0;
							}
							else
							{
								// RESET
								nextTokenSb.Length = 0;
								tokenLineNumber = lineNumber;
								state = PickNextState(ctype, thisChar);
							}
						}
						else
						{
							if ((thisChar == '/') && (prevChar == '*'))
							{
								state = NextTokenState.EndBlockComment;
							}
						}
						break;

					// special case for 2-character token termination
					case NextTokenState.EndBlockComment:
						if (settings.GrabComments) 
						{
							token = new CommentToken(nextTokenSb.ToString(), tokenLineNumber);
							done = true;
							nextTokenSb.Length = 0;
						}
						else
						{
							// RESET
							nextTokenSb.Length = 0;
							tokenLineNumber = lineNumber;
							state = PickNextState(ctype, thisChar);
						}
						break;

					case NextTokenState.MaybeHex:
						// previous char was 0
						if (thisChar != 'x')
						{
							// back up and try non-hex
							// back up to the 0
							nextTokenSb.Append((char)thisChar);
							backString.Append(nextTokenSb);
							nextTokenSb.Length = 0;

							// reset state and don't choose MaybeNumber state.
							// pull char from backString
							thisChar = backString[0];
							backString.Remove(0, 1);
							state = PickNextState(settings.CharTypes[thisChar], (int)thisChar, 
								NextTokenState.MaybeHex);
							#if DEBUG
							log.Debug("HexGot0x: Next state on '{0}' is {1}", (char)thisChar,
								state);
							#endif
						}
						else state = NextTokenState.HexGot0x;
						break;

					case NextTokenState.HexGot0x:
						if (!settings.IsCharType(ctype, CharTypeBits.HexDigit))
						{
							// got 0x but now a non-hex char
							// back up to the 0
							nextTokenSb.Append((char)thisChar);
							backString.Append(nextTokenSb);
							nextTokenSb.Length = 0;

							// reset state and don't choose MaybeNumber state.
							// pull char from backString
							thisChar = backString[0];
							backString.Remove(0, 1);
							state = PickNextState(settings.CharTypes[thisChar], (int)thisChar, 
								NextTokenState.MaybeHex);
							#if DEBUG
							log.Debug("HexGot0x: Next state on '{0}' is {1}", (char)thisChar,
								state);
							#endif
						}
						else state = NextTokenState.HexNumber;
						break;

					case NextTokenState.HexNumber:
						if (!settings.IsCharType(ctype, CharTypeBits.HexDigit))
						{
							// emit the hex number we've collected
							#if DEBUG
							log.Debug("Emit hex IntToken from string '{0}'", nextTokenSb);
							#endif
							token = IntToken.ParseHex(nextTokenSb.ToString(), tokenLineNumber);
							done = true;
							nextTokenSb.Length = 0;
						}
						break;

					case NextTokenState.MaybeNumber:
						//
						// Determine whether or not to stop collecting characters for
						// the number parse.  We terminate when it's clear it's not
						// a number or no longer a number.
						//
						bool term = false;

						if (settings.IsCharType(ctype, CharTypeBits.Digit)  
							|| settings.IsCharType(prevChar, CharTypeBits.Digit)) seenDigit = true;

						// term conditions
						if (thisChar == '.') 
						{ 
							seenDot++; 
							if (seenDot > 1) term = true;  // more than one dot, it aint a number
						}
						else if (((thisChar == 'e') || (thisChar == 'E')))
						{
							seenE++;
							if (!seenDigit) term = true;  // e before any digits is bad
							else if (seenE > 1) term = true;  // more than 1 e is bad
							else
							{
								term = true; // done regardless

								// scan the exponent, put its characters into
								// nextTokenSb, if there are any
								char c;
								expSb.Clear();
								expSb.Append((char)thisChar);
								if (GrabInt(expSb, true, out c))
								{
									// we got a good exponent, tack it on
									nextTokenSb.Append(expSb);
									thisChar = c; // and continue after the exponent's characters
								}
							}
						}
						else if (thisChar == Eof) term = true;  
							// or a char that can't be in a number
						else if ((!settings.IsCharType(ctype, CharTypeBits.Digit) 
							&& (thisChar != 'e') && (thisChar != 'E') 
							&& (thisChar != '-') && (thisChar != '.')) 
							|| ((thisChar == '+') && (seenE == 0)))
						{
							// it's not a normal number character
							term = true;
						}
						// or a dash not after e
						else if ((thisChar == '-') && (!((prevChar == 'e') || (prevChar == 'E')))) term = true;

						if (term)
						{
							// we are terminating a number, or it wasn't a number
							if (seenDigit)
							{
								if ((nextTokenSb.IndexOf('.') >= 0)
									|| (nextTokenSb.IndexOf('e') >= 0)
									|| (nextTokenSb.IndexOf('E') >= 0)
									|| (nextTokenSb.Length >= 19) // probably too large for Int64, use float
									)
								{
									token = new FloatToken(nextTokenSb.ToString(), tokenLineNumber);
									#if DEBUG
									log.Debug("Emit FloatToken from string '{0}'", nextTokenSb);
									#endif
								}
								else 
								{
									#if DEBUG
									log.Debug("Emit IntToken from string '{0}'", nextTokenSb);
									#endif
									token = new IntToken(nextTokenSb.ToString(), tokenLineNumber);
								}
								done = true;
								nextTokenSb.Length = 0;
							}
							else
							{
								// -whatever or -.whatever
								// didn't see any digits, must have gotten here by a leading -
								// and no digits after it
								// back up to -, pick next state excluding numbers
								nextTokenSb.Append((char)thisChar);
								backString.Append(nextTokenSb);
								nextTokenSb.Length = 0;

								// restart on the - and don't choose MaybeNumber state
								// pull char from backString
								thisChar = backString[0];
								backString.Remove(0, 1);
								state = PickNextState(settings.CharTypes[thisChar], (int)thisChar, 
									NextTokenState.MaybeNumber);
								#if DEBUG
								log.Debug("MaybeNumber: Next state on '{0}' is {1}", (char)thisChar,
									state);
								#endif
							}
						}
						break;

					case NextTokenState.Eol:
						// tokenLineNumber - 1 because the newline char is on the previous line
						token = new EolToken(tokenLineNumber - 1);
						done = true;
						nextTokenSb.Length = 0;
						break;

					case NextTokenState.Eof:
						token = new EofToken(tokenLineNumber);
						done = true;
						nextTokenSb.Length = 0;
						return(false);

					case NextTokenState.Invalid:
					default:
						// not a good sign, some unrepresented state?
						log.Error("NextToken: Hit unrepresented state {0}", state);
						return(false);
				}

				// use a StringBuilder to accumulate characters which are part of this token
				if (thisChar != Eof) nextTokenSb.Append((char)thisChar);
				#if DEBUG
				log.Debug("After switch: state = {0}, nextTokenSb = '{1}', backString = '{2}'", 
					state, nextTokenSb, backString);
				#endif
			}

			#if DEBUG
			log.Debug("Got token {0}", token.ToDebugString());
			#endif
			return(true);
		}