コード例 #1
0
ファイル: Token.cs プロジェクト: pilgrimzh/NetTopologySuite
        /// <summary>
        /// Parse a string known to be a hex string.  This is faster
        /// than Parse which doesn't assume the number is Hex.  This will
        /// throw an exception if the input number isn't hex.
        /// </summary>
        /// <param name="s">The hex number as a string.</param>
        /// <param name="lineNumber">The line where this token was found.</param>
        /// <returns>A new IntToken set to the value in the input string.</returns>
        public static IntToken ParseHex(string s, int lineNumber)
        {
            IntToken it = null;

            try
            {
                it = new IntToken(Convert.ToInt32(s, 16), lineNumber);
            }
            catch
            {
                it = new IntToken(Convert.ToInt64(s, 16), lineNumber);
            }

            return(it);
        }
コード例 #2
0
ファイル: Token.cs プロジェクト: ExRam/DotSpatial-PCL
		/// <summary>
		/// Parse a string known to be a hex string.  This is faster
		/// than Parse which doesn't assume the number is Hex.  This will
		/// throw an exception if the input number isn't hex.
		/// </summary>
		/// <param name="s">The hex number as a string.</param>
		/// <param name="lineNumber">The line where this token was found.</param>
		/// <returns>A new IntToken set to the value in the input string.</returns>
		public static IntToken ParseHex(string s, int lineNumber)
		{
			IntToken it = null;
			try
			{
				it = new IntToken(Convert.ToInt32(s, 16), lineNumber);
			}
			catch
			{
				it = new IntToken(Convert.ToInt64(s, 16), lineNumber);
			}

			return(it);
		}
コード例 #3
0
		/// <summary>
		/// Get the next token.  The last token will be an EofToken unless
		/// there's an unterminated quote or unterminated block comment
		/// and Settings.DoUntermCheck is true, in which case this throws
		/// an exception of type StreamTokenizerUntermException or sub-class.
		/// </summary>
		/// <param name="token">The output token.</param>
		/// <returns>bool - true for success, false for failure.</returns>
		public bool NextToken(out Token token)
		{
			token = null;
			int thisChar = 0; // current character
			byte ctype; // type of this character

			NextTokenState state = NextTokenState.Start;
			int prevChar = 0; // previous character
			byte prevCtype = (byte)CharTypeBits.Eof;

			// get previous char from nextTokenSb if there
			// (nextTokenSb is a StringBuilder containing the characters
			//  of the next token to be emitted)
			if (nextTokenSb.Length > 0) 
			{
				prevChar = nextTokenSb[nextTokenSb.Length - 1];
				prevCtype = settings.CharTypes[prevChar];
				state = PickNextState(prevCtype, prevChar);
			}

			// extra state for number parse
			int seenDot = 0; // how many .'s in the number
			int seenE = 0; // how many e's or E's have we seen in the number
			bool seenDigit = false; // seen any digits (numbers can start with -)

			// lineNumber can change with each GetNextChar()
			// tokenLineNumber is the line on which the token started
			int tokenLineNumber = lineNumber;

			// State Machine: Produces a single token.
			// Enter a state based on a single character.
			// Generally, being in a state means we're currently collecting chars 
			// in that type of token.
			// We do state machine until it builds a token (Eof is a token), then
			// return that token.
			thisChar = prevChar;  // for first iteration, since prevChar is set to this 
			bool done = false; // optimization
			while (!done)
			{
				prevChar = thisChar;
				thisChar = GetNextChar();
				if (thisChar >= settings.CharTypes.Length)
				{
					// greater than 7-bit ascii, treat as word character
					ctype = (byte)CharTypeBits.Word;
				}
				else ctype = settings.CharTypes[thisChar];

				#if DEBUG
				log.Debug("Before switch: state = {0}, thisChar = '{1}'", state, (char)thisChar);
				#endif

				// see if we need to change states, or emit a token
				switch(state)
				{
					case NextTokenState.Start:
						// RESET
						state = PickNextState(ctype, thisChar);
						tokenLineNumber = lineNumber;
						break;

					case NextTokenState.Char:
						token = new CharToken((char)prevChar, tokenLineNumber);
						done = true;
						nextTokenSb.Length = 0;
						break;

					case NextTokenState.Word:
						if ((!settings.IsCharType(ctype, CharTypeBits.Word))
							&& (!settings.IsCharType(ctype, CharTypeBits.Digit)))
						{
							// end of word, emit
							token = new WordToken(nextTokenSb.ToString(), tokenLineNumber);
							done = true;
							nextTokenSb.Length = 0;
						}
						break;

					case NextTokenState.Whitespace:
						if (!settings.IsCharType(ctype, CharTypeBits.Whitespace)
							|| (settings.GrabEol && (thisChar == 10)))
						{
							// end of whitespace, emit
							if (settings.GrabWhitespace)
							{
								token = new WhitespaceToken(nextTokenSb.ToString(), tokenLineNumber);
								done = true;
								nextTokenSb.Length = 0;
							}
							else
							{
								// RESET
								nextTokenSb.Length = 0;
								tokenLineNumber = lineNumber;
								state = PickNextState(ctype, thisChar);
							}
						}
						break;

					case NextTokenState.EndQuote:
						// we're now 1 char after end of quote
						token = new QuoteToken(nextTokenSb.ToString(), tokenLineNumber);
						done = true;
						nextTokenSb.Length = 0;
						break;

					case NextTokenState.Quote:
						// looking for end quote matching char that started the quote
						if (thisChar == nextTokenSb[0])
						{
							// handle escaped backslashes: count the immediately prior backslashes 
							// - even (including 0) means it's not escaped 
							// - odd means it is escaped 
							int backSlashCount = 0; 
							for (int i = nextTokenSb.Length - 1; i >= 0; i--)
							{ 
								if (nextTokenSb[ i ] == '\\') backSlashCount++; 
								else break; 
							} 

							if ((backSlashCount % 2) == 0) 
							{ 
								state = NextTokenState.EndQuote;
							}
						}

						if ((state != NextTokenState.EndQuote) && (thisChar == Eof))
						{
							if (settings.DoUntermCheck) 
							{
								nextTokenSb.Length = 0;
								throw new StreamTokenizerUntermQuoteException("Unterminated quote");
							}

							token = new QuoteToken(nextTokenSb.ToString(), tokenLineNumber);
							done = true;
							nextTokenSb.Length = 0;
						}
						break;

					case NextTokenState.MaybeComment:
						if (thisChar == Eof)
						{
							token = new CharToken(nextTokenSb.ToString(), tokenLineNumber);
							done = true;
							nextTokenSb.Length = 0;
						}
						else
						{
							// if we get the right char, we're in a comment
							if (settings.SlashSlashComments && (thisChar == '/')) 
								state = NextTokenState.LineComment;
							else if (settings.SlashStarComments && (thisChar == '*')) 
								state = NextTokenState.BlockComment;
							else
							{
								token = new CharToken(nextTokenSb.ToString(), tokenLineNumber);
								done = true;
								nextTokenSb.Length = 0;
							}
						}
						break;

					case NextTokenState.LineComment:
						if (thisChar == Eof)
						{
							if (settings.GrabComments) 
							{
								token = new CommentToken(nextTokenSb.ToString(), tokenLineNumber);
								done = true;
								nextTokenSb.Length = 0;
							}
							else
							{
								// RESET
								nextTokenSb.Length = 0;
								tokenLineNumber = lineNumber;
								state = PickNextState(ctype, thisChar);
							}
						}
						else
						{
							if (thisChar == '\n')
							{
								if (settings.GrabComments) 
								{
									token = new CommentToken(nextTokenSb.ToString(), tokenLineNumber);
									done = true;
									nextTokenSb.Length = 0;
								}
								else
								{
									// RESET
									nextTokenSb.Length = 0;
									tokenLineNumber = lineNumber;
									state = PickNextState(ctype, thisChar);
								}
							}
						}
						break;

					case NextTokenState.BlockComment:
						if (thisChar == Eof)
						{
							if (settings.DoUntermCheck) 
							{
								nextTokenSb.Length = 0;
								throw new StreamTokenizerUntermCommentException("Unterminated comment.");
							}

							if (settings.GrabComments) 
							{
								token = new CommentToken(nextTokenSb.ToString(), tokenLineNumber);
								done = true;
								nextTokenSb.Length = 0;
							}
							else
							{
								// RESET
								nextTokenSb.Length = 0;
								tokenLineNumber = lineNumber;
								state = PickNextState(ctype, thisChar);
							}
						}
						else
						{
							if ((thisChar == '/') && (prevChar == '*'))
							{
								state = NextTokenState.EndBlockComment;
							}
						}
						break;

					// special case for 2-character token termination
					case NextTokenState.EndBlockComment:
						if (settings.GrabComments) 
						{
							token = new CommentToken(nextTokenSb.ToString(), tokenLineNumber);
							done = true;
							nextTokenSb.Length = 0;
						}
						else
						{
							// RESET
							nextTokenSb.Length = 0;
							tokenLineNumber = lineNumber;
							state = PickNextState(ctype, thisChar);
						}
						break;

					case NextTokenState.MaybeHex:
						// previous char was 0
						if (thisChar != 'x')
						{
							// back up and try non-hex
							// back up to the 0
							nextTokenSb.Append((char)thisChar);
							backString.Append(nextTokenSb);
							nextTokenSb.Length = 0;

							// reset state and don't choose MaybeNumber state.
							// pull char from backString
							thisChar = backString[0];
							backString.Remove(0, 1);
							state = PickNextState(settings.CharTypes[thisChar], (int)thisChar, 
								NextTokenState.MaybeHex);
							#if DEBUG
							log.Debug("HexGot0x: Next state on '{0}' is {1}", (char)thisChar,
								state);
							#endif
						}
						else state = NextTokenState.HexGot0x;
						break;

					case NextTokenState.HexGot0x:
						if (!settings.IsCharType(ctype, CharTypeBits.HexDigit))
						{
							// got 0x but now a non-hex char
							// back up to the 0
							nextTokenSb.Append((char)thisChar);
							backString.Append(nextTokenSb);
							nextTokenSb.Length = 0;

							// reset state and don't choose MaybeNumber state.
							// pull char from backString
							thisChar = backString[0];
							backString.Remove(0, 1);
							state = PickNextState(settings.CharTypes[thisChar], (int)thisChar, 
								NextTokenState.MaybeHex);
							#if DEBUG
							log.Debug("HexGot0x: Next state on '{0}' is {1}", (char)thisChar,
								state);
							#endif
						}
						else state = NextTokenState.HexNumber;
						break;

					case NextTokenState.HexNumber:
						if (!settings.IsCharType(ctype, CharTypeBits.HexDigit))
						{
							// emit the hex number we've collected
							#if DEBUG
							log.Debug("Emit hex IntToken from string '{0}'", nextTokenSb);
							#endif
							token = IntToken.ParseHex(nextTokenSb.ToString(), tokenLineNumber);
							done = true;
							nextTokenSb.Length = 0;
						}
						break;

					case NextTokenState.MaybeNumber:
						//
						// Determine whether or not to stop collecting characters for
						// the number parse.  We terminate when it's clear it's not
						// a number or no longer a number.
						//
						bool term = false;

						if (settings.IsCharType(ctype, CharTypeBits.Digit)  
							|| settings.IsCharType(prevChar, CharTypeBits.Digit)) seenDigit = true;

						// term conditions
						if (thisChar == '.') 
						{ 
							seenDot++; 
							if (seenDot > 1) term = true;  // more than one dot, it aint a number
						}
						else if (((thisChar == 'e') || (thisChar == 'E')))
						{
							seenE++;
							if (!seenDigit) term = true;  // e before any digits is bad
							else if (seenE > 1) term = true;  // more than 1 e is bad
							else
							{
								term = true; // done regardless

								// scan the exponent, put its characters into
								// nextTokenSb, if there are any
								char c;
								expSb.Clear();
								expSb.Append((char)thisChar);
								if (GrabInt(expSb, true, out c))
								{
									// we got a good exponent, tack it on
									nextTokenSb.Append(expSb);
									thisChar = c; // and continue after the exponent's characters
								}
							}
						}
						else if (thisChar == Eof) term = true;  
							// or a char that can't be in a number
						else if ((!settings.IsCharType(ctype, CharTypeBits.Digit) 
							&& (thisChar != 'e') && (thisChar != 'E') 
							&& (thisChar != '-') && (thisChar != '.')) 
							|| ((thisChar == '+') && (seenE == 0)))
						{
							// it's not a normal number character
							term = true;
						}
						// or a dash not after e
						else if ((thisChar == '-') && (!((prevChar == 'e') || (prevChar == 'E')))) term = true;

						if (term)
						{
							// we are terminating a number, or it wasn't a number
							if (seenDigit)
							{
								if ((nextTokenSb.IndexOf('.') >= 0)
									|| (nextTokenSb.IndexOf('e') >= 0)
									|| (nextTokenSb.IndexOf('E') >= 0)
									|| (nextTokenSb.Length >= 19) // probably too large for Int64, use float
									)
								{
									token = new FloatToken(nextTokenSb.ToString(), tokenLineNumber);
									#if DEBUG
									log.Debug("Emit FloatToken from string '{0}'", nextTokenSb);
									#endif
								}
								else 
								{
									#if DEBUG
									log.Debug("Emit IntToken from string '{0}'", nextTokenSb);
									#endif
									token = new IntToken(nextTokenSb.ToString(), tokenLineNumber);
								}
								done = true;
								nextTokenSb.Length = 0;
							}
							else
							{
								// -whatever or -.whatever
								// didn't see any digits, must have gotten here by a leading -
								// and no digits after it
								// back up to -, pick next state excluding numbers
								nextTokenSb.Append((char)thisChar);
								backString.Append(nextTokenSb);
								nextTokenSb.Length = 0;

								// restart on the - and don't choose MaybeNumber state
								// pull char from backString
								thisChar = backString[0];
								backString.Remove(0, 1);
								state = PickNextState(settings.CharTypes[thisChar], (int)thisChar, 
									NextTokenState.MaybeNumber);
								#if DEBUG
								log.Debug("MaybeNumber: Next state on '{0}' is {1}", (char)thisChar,
									state);
								#endif
							}
						}
						break;

					case NextTokenState.Eol:
						// tokenLineNumber - 1 because the newline char is on the previous line
						token = new EolToken(tokenLineNumber - 1);
						done = true;
						nextTokenSb.Length = 0;
						break;

					case NextTokenState.Eof:
						token = new EofToken(tokenLineNumber);
						done = true;
						nextTokenSb.Length = 0;
						return(false);

					case NextTokenState.Invalid:
					default:
						// not a good sign, some unrepresented state?
						log.Error("NextToken: Hit unrepresented state {0}", state);
						return(false);
				}

				// use a StringBuilder to accumulate characters which are part of this token
				if (thisChar != Eof) nextTokenSb.Append((char)thisChar);
				#if DEBUG
				log.Debug("After switch: state = {0}, nextTokenSb = '{1}', backString = '{2}'", 
					state, nextTokenSb, backString);
				#endif
			}

			#if DEBUG
			log.Debug("Got token {0}", token.ToDebugString());
			#endif
			return(true);
		}