public static LexerToken new_StringToken(string rawString, int startCharIdx, int endCharIdx) { LexerToken stringToken = new LexerToken(rawString, startCharIdx, endCharIdx); stringToken.tokenType = TYPE.STRING; return(stringToken); }
private void readStringLiteral() { if (source[charIdx] != '\"') { throw new LexerFailure(""); } StringBuilder stringLiteral = new StringBuilder(); int startStringCharIdx = charIdx; // Record the location of the first " charIdx++; bool isEscaped = false; bool endQuoteFound = false; for (; charIdx < source.Length; charIdx++) { char c = source[charIdx]; // The current character under consideration if (isEscaped) { //TODO Add support for escaped characters in strings // Just skip the escaped character and leave escaped mode isEscaped = false; continue; } else { if (c == '\"') { endQuoteFound = true; break; } else if (c == '\\') { isEscaped = true; continue; } else { stringLiteral.Append(c); continue; } } } if (!endQuoteFound) { throw new LexerSourceException("Beginning \" missing end \".", source, startStringCharIdx - 1, charIdx); } charIdx++; // Move the index to point after the last " in the string. int endStringCharIdx = charIdx; output.Add(LexerToken.new_StringToken(stringLiteral.ToString(), startStringCharIdx, endStringCharIdx)); }
public RuntimeLexerSourceException(string msg, LexerToken badToken) : base(msg) { this.badToken = badToken; }
public static LexerToken new_StringToken(string rawString, int startCharIdx, int endCharIdx) { LexerToken stringToken = new LexerToken(rawString, startCharIdx, endCharIdx); stringToken.tokenType = TYPE.STRING; return stringToken; }
public PNode parseTokensIntoPNodes(ProseRuntime runtime, ProseClient who, List <LexerToken> tokens) { // Save the parameters this.who = who; this.runtime = runtime; sourceTokenList = tokens; setupForParsing(); Trie <RawWord, Word> .Node currWordLookupNode, wordLookupRoot, lastGoodNode; wordLookupRoot = runtime.getWordLookupRoot(); currWordLookupNode = wordLookupRoot; lastGoodNode = null; int lastGoodNodeTokenIdx = -1; // Record whether or not we're in the process of building up a word. bool isBuildingWord = false; int lastProcessedTokenIdx = -1; while (tokenIdx < tokens.Count) { LexerToken token = tokens[tokenIdx]; // // Deal With Quadquotes. // if (token.rawWord == ProseLanguage.Raw.Quadquote) { // First clean up any word we may be building and write it to output. if (isBuildingWord) { if (lastGoodNode == null) { // // If we're inside a quadquote block then this is fine: even if we don't have // // a legitimate word we can still wrap rawwords. Otherwise it's an error. // if (insideQuadquoteExpression) { // If there is no last good match, then take the raw words we've passed // and dump them all into raw word objects. for (int i = lastProcessedTokenIdx + 1; i < tokenIdx; i++) { writePNode(new PNode(new RawWordObject(tokens[i].rawWord))); } // Update everything so we continue after this point lastGoodNodeTokenIdx = tokenIdx - 1; lastProcessedTokenIdx = lastGoodNodeTokenIdx; currWordLookupNode = wordLookupRoot; isBuildingWord = false; // Don't bother updating tokenIdx because we need to look at the word again. } // else { // throw new RuntimeLexerSourceException("Unrecognized word or symbol.", tokens[tokenIdx-1]); // } } else { writePNode(new PNode(lastGoodNode.Value)); // Reset everything so we're looking for a new word again. lastGoodNode = null; currWordLookupNode = wordLookupRoot; isBuildingWord = false; lastGoodNodeTokenIdx = tokenIdx; lastProcessedTokenIdx = lastGoodNodeTokenIdx; } } // Output a quadquote writePNode(new PNode(runtime.Quadquote)); // Toggle our quad-quote-state. insideQuadquoteExpression = !insideQuadquoteExpression; lastQuadquoteIdx = tokenIdx; lastProcessedTokenIdx = tokenIdx; // Continue tokenIdx++; continue; } if (insideQuadquoteExpression) { if (token.tokenType != LexerToken.TYPE.UNCLASSIFIED) { throw new RuntimeLexerFailure("Static Lexer Failed Token Classification."); } // // This code is essentially copied from the LexerToken.TYPE.UNCLASSIFIED block below. // The only major difference is that instead of throwing an exception we wrap unknown // text inside raw word objects. // isBuildingWord = true; // Try to continue the current word matching. Trie <RawWord, Word> .Node nodeForThisRawWord = currWordLookupNode.getChildNode(token.rawWord); // If we can't continue this way... if (nodeForThisRawWord == null) { //...then whatever our last good match was is the correct word. if (lastGoodNode == null) { // If there is no last good match, then take the raw words we've passed // and dump them all into raw word objects. for (int i = lastProcessedTokenIdx + 1; i < tokenIdx; i++) { writePNode(new PNode(new RawWordObject(tokens[i].rawWord))); } lastProcessedTokenIdx = tokenIdx - 1; // Update everything so we continue after this point // Don't bother updating tokenIdx because we need to look at the word again. // Do update currWordLookupNode currWordLookupNode = wordLookupRoot.getChildNode(token.rawWord); // If there's no node at all, we have to deal with it now if (currWordLookupNode == null) { writePNode(new PNode(new RawWordObject(token.rawWord))); lastGoodNodeTokenIdx = tokenIdx; isBuildingWord = false; currWordLookupNode = wordLookupRoot; lastProcessedTokenIdx = tokenIdx; tokenIdx++; } else { isBuildingWord = true; lastGoodNodeTokenIdx = tokenIdx - 1; tokenIdx++; } continue; } writePNode(new PNode(lastGoodNode.Value)); // Reset everything so we're looking for a new word again. lastGoodNode = null; currWordLookupNode = wordLookupRoot; isBuildingWord = false; lastProcessedTokenIdx = lastGoodNodeTokenIdx; // Move the head back to the spot after the last token in the word tokenIdx = lastGoodNodeTokenIdx + 1; continue; } // If adding this raw word makes a word, then record it as good if (nodeForThisRawWord.Value != null) { lastGoodNode = nodeForThisRawWord; lastGoodNodeTokenIdx = tokenIdx; } currWordLookupNode = nodeForThisRawWord; continue; } else { switch (token.tokenType) { case LexerToken.TYPE.UNCLASSIFIED: { isBuildingWord = true; // Try to continue the current word matching. Trie <RawWord, Word> .Node nodeForThisRawWord = currWordLookupNode.getChildNode(token.rawWord); // If we can't continue this way... if (nodeForThisRawWord == null) { //...then whatever our last good match was is the correct word. if (lastGoodNode == null) { //throw new RuntimeLexerSourceException("Unrecognized word or symbol.", token); // Dump everything into raw words. tokenIdx++; // Include this word for (int i = lastProcessedTokenIdx + 1; i < tokenIdx; i++) { writePNode(new PNode(new RawWordObject(tokens[i].rawWord))); } // Update everything so we continue after this point lastGoodNodeTokenIdx = tokenIdx - 1; lastProcessedTokenIdx = lastGoodNodeTokenIdx; currWordLookupNode = wordLookupRoot; isBuildingWord = false; continue; } writePNode(new PNode(lastGoodNode.Value)); // Reset everything so we're looking for a new word again. lastGoodNode = null; currWordLookupNode = wordLookupRoot; isBuildingWord = false; lastProcessedTokenIdx = lastGoodNodeTokenIdx; // Move the head back to the spot after the last token in the word tokenIdx = lastGoodNodeTokenIdx + 1; continue; } // If adding this raw word makes a word, then record it as good if (nodeForThisRawWord.Value != null) { lastGoodNode = nodeForThisRawWord; lastGoodNodeTokenIdx = tokenIdx; } currWordLookupNode = nodeForThisRawWord; continue; } break; case LexerToken.TYPE.STRING: { // First clean up any word we may be building and write it to output. if (isBuildingWord) { if (lastGoodNode == null || lastGoodNodeTokenIdx != tokenIdx - 1) { //throw new RuntimeLexerSourceException("Unrecognized word or symbol.", tokens[tokenIdx-1]); // Just take all the words up until now and dump them into raw words. for (int i = lastProcessedTokenIdx + 1; i < tokenIdx; i++) { writePNode(new PNode(new RawWordObject(tokens[i].rawWord))); } // Update everything so we continue after this point lastGoodNodeTokenIdx = tokenIdx - 1; lastProcessedTokenIdx = lastGoodNodeTokenIdx; currWordLookupNode = wordLookupRoot; isBuildingWord = false; } else { writePNode(new PNode(lastGoodNode.Value)); // Reset everything so we're looking for a new word again. lastGoodNode = null; currWordLookupNode = wordLookupRoot; isBuildingWord = false; lastProcessedTokenIdx = lastGoodNodeTokenIdx; } } // Now write the string literal object to output writePNode(new PNode(new StringLiteralObject(token.rawWord.AsString))); lastProcessedTokenIdx = tokenIdx; // Continue tokenIdx++; continue; } break; } } } finalCheckAfterParsing(); return(outputRoot); }