/// <summary> /// Build an Array of a particular type from a list of tokens. /// The Type must be one that can be built with Convert.ChangeType. /// There are various ways to specify how many elements to parse. /// WARNING: This will throw an exception if any tokens cannot be /// converted. /// </summary> /// <param name="tokens">The ArrayList of tokens.</param> /// <param name="i">The starting (and ending) index. This is /// modified, and left pointing at the last used token.</param> /// <param name="type">The Type of the array elements.</param> /// <param name="endToken">An optional end Token to look for. /// Parsing stops when a token equal to this is found. /// If this is null, then it is not used.</param> /// <param name="maxLength">The maximum number of array elements /// to parse. If this is negative, then it is not used.</param> /// <param name="log">A Logger to use for messages.</param> /// <returns>The Array, or null for error.</returns> public static Array BuildArray(ArrayList tokens, ref int i, Type type, Token endToken, int maxLength, Logger log) { int len = tokens.Count; if (i >= len) { log.Error("BuildArray: Input index too large."); return(null); } // put the objects into an array list first, since we don't // know length ArrayList list = new ArrayList(); // allow null endToken specified if (endToken == null) endToken = new EofToken(); Token token = null; token = (Token)tokens[i++]; int arrayLength = 0; while ((!(token is EofToken)) && (token != endToken) && (i < len) && ((maxLength < 0) || (arrayLength < maxLength))) { Object o = token.ConvertToType(type); list.Add(o); arrayLength++; token = (Token)tokens[i++]; } i--; // went one past return(list.ToArray(type)); }
//private bool IsAtEndOfTokens(IList<Token> tokens) //{ // return !(_wktReader.Index < tokens.Count); //} private static bool IsAtEndOfTokens(Token token) { return token is EofToken; }
/// <summary> /// Get the next token. The last token will be an EofToken unless /// there's an unterminated quote or unterminated block comment /// and Settings.DoUntermCheck is true, in which case this throws /// an exception of type StreamTokenizerUntermException or sub-class. /// </summary> /// <param name="token">The output token.</param> /// <returns>bool - true for success, false for failure.</returns> public bool NextToken(out Token token) { token = null; int thisChar = 0; // current character byte ctype; // type of this character NextTokenState state = NextTokenState.Start; int prevChar = 0; // previous character byte prevCtype = (byte)CharTypeBits.Eof; // get previous char from nextTokenSb if there // (nextTokenSb is a StringBuilder containing the characters // of the next token to be emitted) if (nextTokenSb.Length > 0) { prevChar = nextTokenSb[nextTokenSb.Length - 1]; prevCtype = settings.CharTypes[prevChar]; state = PickNextState(prevCtype, prevChar); } // extra state for number parse int seenDot = 0; // how many .'s in the number int seenE = 0; // how many e's or E's have we seen in the number bool seenDigit = false; // seen any digits (numbers can start with -) // lineNumber can change with each GetNextChar() // tokenLineNumber is the line on which the token started int tokenLineNumber = lineNumber; // State Machine: Produces a single token. // Enter a state based on a single character. // Generally, being in a state means we're currently collecting chars // in that type of token. // We do state machine until it builds a token (Eof is a token), then // return that token. thisChar = prevChar; // for first iteration, since prevChar is set to this bool done = false; // optimization while (!done) { prevChar = thisChar; thisChar = GetNextChar(); if (thisChar >= settings.CharTypes.Length) { // greater than 7-bit ascii, treat as word character ctype = (byte)CharTypeBits.Word; } else ctype = settings.CharTypes[thisChar]; #if DEBUG log.Debug("Before switch: state = {0}, thisChar = '{1}'", state, (char)thisChar); #endif // see if we need to change states, or emit a token switch(state) { case NextTokenState.Start: // RESET state = PickNextState(ctype, thisChar); tokenLineNumber = lineNumber; break; case NextTokenState.Char: token = new CharToken((char)prevChar, tokenLineNumber); done = true; nextTokenSb.Length = 0; break; case NextTokenState.Word: if ((!settings.IsCharType(ctype, CharTypeBits.Word)) && (!settings.IsCharType(ctype, CharTypeBits.Digit))) { // end of word, emit token = new WordToken(nextTokenSb.ToString(), tokenLineNumber); done = true; nextTokenSb.Length = 0; } break; case NextTokenState.Whitespace: if (!settings.IsCharType(ctype, CharTypeBits.Whitespace) || (settings.GrabEol && (thisChar == 10))) { // end of whitespace, emit if (settings.GrabWhitespace) { token = new WhitespaceToken(nextTokenSb.ToString(), tokenLineNumber); done = true; nextTokenSb.Length = 0; } else { // RESET nextTokenSb.Length = 0; tokenLineNumber = lineNumber; state = PickNextState(ctype, thisChar); } } break; case NextTokenState.EndQuote: // we're now 1 char after end of quote token = new QuoteToken(nextTokenSb.ToString(), tokenLineNumber); done = true; nextTokenSb.Length = 0; break; case NextTokenState.Quote: // looking for end quote matching char that started the quote if (thisChar == nextTokenSb[0]) { // handle escaped backslashes: count the immediately prior backslashes // - even (including 0) means it's not escaped // - odd means it is escaped int backSlashCount = 0; for (int i = nextTokenSb.Length - 1; i >= 0; i--) { if (nextTokenSb[ i ] == '\\') backSlashCount++; else break; } if ((backSlashCount % 2) == 0) { state = NextTokenState.EndQuote; } } if ((state != NextTokenState.EndQuote) && (thisChar == Eof)) { if (settings.DoUntermCheck) { nextTokenSb.Length = 0; throw new StreamTokenizerUntermQuoteException("Unterminated quote"); } token = new QuoteToken(nextTokenSb.ToString(), tokenLineNumber); done = true; nextTokenSb.Length = 0; } break; case NextTokenState.MaybeComment: if (thisChar == Eof) { token = new CharToken(nextTokenSb.ToString(), tokenLineNumber); done = true; nextTokenSb.Length = 0; } else { // if we get the right char, we're in a comment if (settings.SlashSlashComments && (thisChar == '/')) state = NextTokenState.LineComment; else if (settings.SlashStarComments && (thisChar == '*')) state = NextTokenState.BlockComment; else { token = new CharToken(nextTokenSb.ToString(), tokenLineNumber); done = true; nextTokenSb.Length = 0; } } break; case NextTokenState.LineComment: if (thisChar == Eof) { if (settings.GrabComments) { token = new CommentToken(nextTokenSb.ToString(), tokenLineNumber); done = true; nextTokenSb.Length = 0; } else { // RESET nextTokenSb.Length = 0; tokenLineNumber = lineNumber; state = PickNextState(ctype, thisChar); } } else { if (thisChar == '\n') { if (settings.GrabComments) { token = new CommentToken(nextTokenSb.ToString(), tokenLineNumber); done = true; nextTokenSb.Length = 0; } else { // RESET nextTokenSb.Length = 0; tokenLineNumber = lineNumber; state = PickNextState(ctype, thisChar); } } } break; case NextTokenState.BlockComment: if (thisChar == Eof) { if (settings.DoUntermCheck) { nextTokenSb.Length = 0; throw new StreamTokenizerUntermCommentException("Unterminated comment."); } if (settings.GrabComments) { token = new CommentToken(nextTokenSb.ToString(), tokenLineNumber); done = true; nextTokenSb.Length = 0; } else { // RESET nextTokenSb.Length = 0; tokenLineNumber = lineNumber; state = PickNextState(ctype, thisChar); } } else { if ((thisChar == '/') && (prevChar == '*')) { state = NextTokenState.EndBlockComment; } } break; // special case for 2-character token termination case NextTokenState.EndBlockComment: if (settings.GrabComments) { token = new CommentToken(nextTokenSb.ToString(), tokenLineNumber); done = true; nextTokenSb.Length = 0; } else { // RESET nextTokenSb.Length = 0; tokenLineNumber = lineNumber; state = PickNextState(ctype, thisChar); } break; case NextTokenState.MaybeHex: // previous char was 0 if (thisChar != 'x') { // back up and try non-hex // back up to the 0 nextTokenSb.Append((char)thisChar); backString.Append(nextTokenSb); nextTokenSb.Length = 0; // reset state and don't choose MaybeNumber state. // pull char from backString thisChar = backString[0]; backString.Remove(0, 1); state = PickNextState(settings.CharTypes[thisChar], (int)thisChar, NextTokenState.MaybeHex); #if DEBUG log.Debug("HexGot0x: Next state on '{0}' is {1}", (char)thisChar, state); #endif } else state = NextTokenState.HexGot0x; break; case NextTokenState.HexGot0x: if (!settings.IsCharType(ctype, CharTypeBits.HexDigit)) { // got 0x but now a non-hex char // back up to the 0 nextTokenSb.Append((char)thisChar); backString.Append(nextTokenSb); nextTokenSb.Length = 0; // reset state and don't choose MaybeNumber state. // pull char from backString thisChar = backString[0]; backString.Remove(0, 1); state = PickNextState(settings.CharTypes[thisChar], (int)thisChar, NextTokenState.MaybeHex); #if DEBUG log.Debug("HexGot0x: Next state on '{0}' is {1}", (char)thisChar, state); #endif } else state = NextTokenState.HexNumber; break; case NextTokenState.HexNumber: if (!settings.IsCharType(ctype, CharTypeBits.HexDigit)) { // emit the hex number we've collected #if DEBUG log.Debug("Emit hex IntToken from string '{0}'", nextTokenSb); #endif token = IntToken.ParseHex(nextTokenSb.ToString(), tokenLineNumber); done = true; nextTokenSb.Length = 0; } break; case NextTokenState.MaybeNumber: // // Determine whether or not to stop collecting characters for // the number parse. We terminate when it's clear it's not // a number or no longer a number. // bool term = false; if (settings.IsCharType(ctype, CharTypeBits.Digit) || settings.IsCharType(prevChar, CharTypeBits.Digit)) seenDigit = true; // term conditions if (thisChar == '.') { seenDot++; if (seenDot > 1) term = true; // more than one dot, it aint a number } else if (((thisChar == 'e') || (thisChar == 'E'))) { seenE++; if (!seenDigit) term = true; // e before any digits is bad else if (seenE > 1) term = true; // more than 1 e is bad else { term = true; // done regardless // scan the exponent, put its characters into // nextTokenSb, if there are any char c; expSb.Clear(); expSb.Append((char)thisChar); if (GrabInt(expSb, true, out c)) { // we got a good exponent, tack it on nextTokenSb.Append(expSb); thisChar = c; // and continue after the exponent's characters } } } else if (thisChar == Eof) term = true; // or a char that can't be in a number else if ((!settings.IsCharType(ctype, CharTypeBits.Digit) && (thisChar != 'e') && (thisChar != 'E') && (thisChar != '-') && (thisChar != '.')) || ((thisChar == '+') && (seenE == 0))) { // it's not a normal number character term = true; } // or a dash not after e else if ((thisChar == '-') && (!((prevChar == 'e') || (prevChar == 'E')))) term = true; if (term) { // we are terminating a number, or it wasn't a number if (seenDigit) { if ((nextTokenSb.IndexOf('.') >= 0) || (nextTokenSb.IndexOf('e') >= 0) || (nextTokenSb.IndexOf('E') >= 0) || (nextTokenSb.Length >= 19) // probably too large for Int64, use float ) { token = new FloatToken(nextTokenSb.ToString(), tokenLineNumber); #if DEBUG log.Debug("Emit FloatToken from string '{0}'", nextTokenSb); #endif } else { #if DEBUG log.Debug("Emit IntToken from string '{0}'", nextTokenSb); #endif token = new IntToken(nextTokenSb.ToString(), tokenLineNumber); } done = true; nextTokenSb.Length = 0; } else { // -whatever or -.whatever // didn't see any digits, must have gotten here by a leading - // and no digits after it // back up to -, pick next state excluding numbers nextTokenSb.Append((char)thisChar); backString.Append(nextTokenSb); nextTokenSb.Length = 0; // restart on the - and don't choose MaybeNumber state // pull char from backString thisChar = backString[0]; backString.Remove(0, 1); state = PickNextState(settings.CharTypes[thisChar], (int)thisChar, NextTokenState.MaybeNumber); #if DEBUG log.Debug("MaybeNumber: Next state on '{0}' is {1}", (char)thisChar, state); #endif } } break; case NextTokenState.Eol: // tokenLineNumber - 1 because the newline char is on the previous line token = new EolToken(tokenLineNumber - 1); done = true; nextTokenSb.Length = 0; break; case NextTokenState.Eof: token = new EofToken(tokenLineNumber); done = true; nextTokenSb.Length = 0; return(false); case NextTokenState.Invalid: default: // not a good sign, some unrepresented state? log.Error("NextToken: Hit unrepresented state {0}", state); return(false); } // use a StringBuilder to accumulate characters which are part of this token if (thisChar != Eof) nextTokenSb.Append((char)thisChar); #if DEBUG log.Debug("After switch: state = {0}, nextTokenSb = '{1}', backString = '{2}'", state, nextTokenSb, backString); #endif } #if DEBUG log.Debug("Got token {0}", token.ToDebugString()); #endif return(true); }
/// <summary> /// Given a Token[] and a reference int, skip forward /// in the token array until a WordToken is found, /// and leave the reference int at that index. /// </summary> /// <param name="tokens">The token array.</param> /// <param name="i">The start index, and the result index.</param> /// <returns>bool - true for success, false for /// hit the end of the tokens.</returns> public static bool SkipToWord(Token[] tokens, ref int i) { while (!(tokens[i] is WordToken)) { i++; if (i >= tokens.Length) return(false); } return(true); }
/// <summary> /// Find matching closing character. /// The matchable pairs of characters are parenthesis (), /// square brackets [], and curly braces {}. /// Given a Token[] and a reference int containing the index /// in the Token[] of a matchable? char, skip forward /// in the token array until the matching character is found. /// </summary> /// <remarks> /// This implicitly skips matching characters in quotes and /// comments if they are hidden in the tokens. So if you grab /// comments and quotes when you tokenize, the characters in those /// tokens are not looked at by this function. /// </remarks> /// <param name="tokens">The token array.</param> /// <param name="i">The start index, and the result index.</param> /// <param name="c">The start character whose match is to be found.</param> /// <returns>bool - true for success, false for /// hit the end of the tokens.</returns> public static bool FindMatch(Token[] tokens, ref int i, char c) { char endChar; if (c == '(') endChar = ')'; else if (c == '{') endChar = '}'; else if (c == '[') endChar = ']'; else return(false); int nestLevel = 1; // count first one // i'th token must be the start character if (tokens[i] != c) { return(false); } i++; // terminate when we hit an end char and that takes us to // nest level 0 while (nestLevel > 0) { if (tokens[i] == c) nestLevel++; else if (tokens[i] == endChar) nestLevel--; i++; if (i >= tokens.Length) return(false); } i--; // went one past return(true); }
/// <summary> /// Given a Token[] and a reference int, skip forward /// in the token array until a WordToken is found, /// and leave the reference int at that index. /// </summary> /// <param name="tokens">The token array.</param> /// <param name="dropTokens">The tokens to drop.</param> /// <returns>bool - true for success, false for /// hit the end of the tokens.</returns> public static Token[] DropTokens(Token[] tokens, Token[] dropTokens) { ArrayList outputList = new ArrayList(); int i = 0; for (i = 0; i < tokens.Length; i++) { bool dropIt = false; for (int j = 0; j < dropTokens.Length; j++) { if (tokens[i].Equals(dropTokens[j])) dropIt = true; } if (!dropIt) outputList.Add(tokens[i]); } // copy to array Token[] outputTokens = new Token[outputList.Count]; i = 0; foreach(Token t in outputList) outputTokens[i++] = t; return(outputTokens); }
/// <summary> /// Given a Token[] and a reference int, skip forward /// in the token array until a WordToken is found, /// and leave the reference int at that index. /// </summary> /// <param name="tokens">The token array.</param> /// <param name="i">The start index, and the result index.</param> /// <returns>bool - true for success, false for /// hit the end of the tokens.</returns> public static bool SkipToEol(Token[] tokens, ref int i) { if ((i < 0) || (i >= tokens.Length)) return(false); while (!(tokens[i] is EolToken)) { i++; if (i >= tokens.Length) return(false); } return(true); }
/// <summary> /// Given a Token[] and a reference int, skip forward /// in the token array until a WordToken is found, /// and leave the reference int at that index. /// </summary> /// <param name="tokens">The token array.</param> /// <param name="i">The start index, and the result index.</param> /// <returns>bool - true for success, false for /// hit the end of the tokens.</returns> public static bool SkipWs(Token[] tokens, ref int i) { while (tokens[i] is WhitespaceToken) { i++; if (i >= tokens.Length) return(false); } return(true); }
/// <summary> /// Given a Token[] and a reference int, skip forward /// in the token array until a WordToken is found, /// and leave the reference int at that index. /// </summary> /// <param name="tokens">The token array.</param> /// <param name="i">The start index, and the result index.</param> /// <param name="c">The char to look for.</param> /// <returns>bool - true for success, false for /// hit the end of the tokens.</returns> public static bool SkipToChar(Token[] tokens, ref int i, char c) { while(tokens[i] != c) { i++; if (i >= tokens.Length) return(false); } return(true); }
/// <summary> /// Given a Token[], a reference int and a string, skip forward /// in the token array until a token matches the string /// and leave the reference int at that index. /// </summary> /// <param name="tokens">The token array.</param> /// <param name="i">The start index, and the result index.</param> /// <param name="s">The string to look for.</param> /// <returns>bool - true for success, false for /// hit the end of the tokens.</returns> public static bool SkipToStringValue(Token[] tokens, ref int i, string s) { while (tokens[i] != s) { i++; if (i >= tokens.Length) return(false); } return(true); }