예제 #1
0
        public static LexerToken new_StringToken(string rawString, int startCharIdx, int endCharIdx)
        {
            LexerToken stringToken = new LexerToken(rawString, startCharIdx, endCharIdx);

            stringToken.tokenType = TYPE.STRING;
            return(stringToken);
        }
예제 #2
0
        private void readStringLiteral()
        {
            if (source[charIdx] != '\"')
            {
                throw new LexerFailure("");
            }

            StringBuilder stringLiteral      = new StringBuilder();
            int           startStringCharIdx = charIdx;                 //	Record the location of the first "

            charIdx++;
            bool isEscaped     = false;
            bool endQuoteFound = false;

            for (; charIdx < source.Length; charIdx++)
            {
                char c = source[charIdx];                               //	The current character under consideration

                if (isEscaped)
                {
                    //TODO	Add support for escaped characters in strings

                    //	Just skip the escaped character and leave escaped mode
                    isEscaped = false;
                    continue;
                }
                else
                {
                    if (c == '\"')
                    {
                        endQuoteFound = true;
                        break;
                    }
                    else if (c == '\\')
                    {
                        isEscaped = true;
                        continue;
                    }
                    else
                    {
                        stringLiteral.Append(c);
                        continue;
                    }
                }
            }



            if (!endQuoteFound)
            {
                throw new LexerSourceException("Beginning \" missing end \".",
                                               source, startStringCharIdx - 1, charIdx);
            }


            charIdx++;                  //	Move the index to point after the last " in the string.
            int endStringCharIdx = charIdx;

            output.Add(LexerToken.new_StringToken(stringLiteral.ToString(), startStringCharIdx, endStringCharIdx));
        }
 public RuntimeLexerSourceException(string msg, LexerToken badToken) : base(msg)
 {
     this.badToken = badToken;
 }
예제 #4
0
 public RuntimeLexerSourceException(string msg, LexerToken badToken)
     : base(msg)
 {
     this.badToken = badToken;
 }
예제 #5
0
파일: LexerToken.cs 프로젝트: FizzyP/Prose
 public static LexerToken new_StringToken(string rawString, int startCharIdx, int endCharIdx)
 {
     LexerToken stringToken = new LexerToken(rawString, startCharIdx, endCharIdx);
     stringToken.tokenType = TYPE.STRING;
     return stringToken;
 }
예제 #6
0
        public PNode parseTokensIntoPNodes(ProseRuntime runtime, ProseClient who, List <LexerToken> tokens)
        {
            //	Save the parameters
            this.who        = who;
            this.runtime    = runtime;
            sourceTokenList = tokens;

            setupForParsing();

            Trie <RawWord, Word> .Node currWordLookupNode, wordLookupRoot, lastGoodNode;
            wordLookupRoot     = runtime.getWordLookupRoot();
            currWordLookupNode = wordLookupRoot;
            lastGoodNode       = null;
            int lastGoodNodeTokenIdx = -1;
            //	Record whether or not we're in the process of building up a word.
            bool isBuildingWord        = false;
            int  lastProcessedTokenIdx = -1;

            while (tokenIdx < tokens.Count)
            {
                LexerToken token = tokens[tokenIdx];

                //
                //	Deal With Quadquotes.
                //
                if (token.rawWord == ProseLanguage.Raw.Quadquote)
                {
                    //	First clean up any word we may be building and write it to output.
                    if (isBuildingWord)
                    {
                        if (lastGoodNode == null)
                        {
//							//	If we're inside a quadquote block then this is fine: even if we don't have
//							//	a legitimate word we can still wrap rawwords.  Otherwise it's an error.
//							if (insideQuadquoteExpression)
                            {
                                //	If there is no last good match, then take the raw words we've passed
                                //	and dump them all into raw word objects.
                                for (int i = lastProcessedTokenIdx + 1; i < tokenIdx; i++)
                                {
                                    writePNode(new PNode(new RawWordObject(tokens[i].rawWord)));
                                }
                                //	Update everything so we continue after this point
                                lastGoodNodeTokenIdx  = tokenIdx - 1;
                                lastProcessedTokenIdx = lastGoodNodeTokenIdx;
                                currWordLookupNode    = wordLookupRoot;
                                isBuildingWord        = false;
                                //	Don't bother updating tokenIdx because we need to look at the word again.
                            }
//							else {
//								throw new RuntimeLexerSourceException("Unrecognized word or symbol.", tokens[tokenIdx-1]);
//							}
                        }
                        else
                        {
                            writePNode(new PNode(lastGoodNode.Value));
                            //	Reset everything so we're looking for a new word again.
                            lastGoodNode          = null;
                            currWordLookupNode    = wordLookupRoot;
                            isBuildingWord        = false;
                            lastGoodNodeTokenIdx  = tokenIdx;
                            lastProcessedTokenIdx = lastGoodNodeTokenIdx;
                        }
                    }

                    //	Output a quadquote
                    writePNode(new PNode(runtime.Quadquote));
                    //	Toggle our quad-quote-state.
                    insideQuadquoteExpression = !insideQuadquoteExpression;
                    lastQuadquoteIdx          = tokenIdx;
                    lastProcessedTokenIdx     = tokenIdx;
                    //	Continue
                    tokenIdx++;
                    continue;
                }



                if (insideQuadquoteExpression)
                {
                    if (token.tokenType != LexerToken.TYPE.UNCLASSIFIED)
                    {
                        throw new RuntimeLexerFailure("Static Lexer Failed Token Classification.");
                    }

                    //
                    //	This code is essentially copied from the LexerToken.TYPE.UNCLASSIFIED block below.
                    //	The only major difference is that instead of throwing an exception we wrap unknown
                    //	text inside raw word objects.
                    //
                    isBuildingWord = true;
                    //	Try to continue the current word matching.
                    Trie <RawWord, Word> .Node nodeForThisRawWord = currWordLookupNode.getChildNode(token.rawWord);
                    //	If we can't continue this way...
                    if (nodeForThisRawWord == null)
                    {
                        //...then whatever our last good match was is the correct word.
                        if (lastGoodNode == null)
                        {
                            //	If there is no last good match, then take the raw words we've passed
                            //	and dump them all into raw word objects.
                            for (int i = lastProcessedTokenIdx + 1; i < tokenIdx; i++)
                            {
                                writePNode(new PNode(new RawWordObject(tokens[i].rawWord)));
                            }
                            lastProcessedTokenIdx = tokenIdx - 1;
                            //	Update everything so we continue after this point
                            //	Don't bother updating tokenIdx because we need to look at the word again.
                            //	Do update currWordLookupNode
                            currWordLookupNode = wordLookupRoot.getChildNode(token.rawWord);
                            //	If there's no node at all, we have to deal with it now
                            if (currWordLookupNode == null)
                            {
                                writePNode(new PNode(new RawWordObject(token.rawWord)));
                                lastGoodNodeTokenIdx  = tokenIdx;
                                isBuildingWord        = false;
                                currWordLookupNode    = wordLookupRoot;
                                lastProcessedTokenIdx = tokenIdx;
                                tokenIdx++;
                            }
                            else
                            {
                                isBuildingWord       = true;
                                lastGoodNodeTokenIdx = tokenIdx - 1;
                                tokenIdx++;
                            }
                            continue;
                        }

                        writePNode(new PNode(lastGoodNode.Value));
                        //	Reset everything so we're looking for a new word again.
                        lastGoodNode          = null;
                        currWordLookupNode    = wordLookupRoot;
                        isBuildingWord        = false;
                        lastProcessedTokenIdx = lastGoodNodeTokenIdx;
                        //	Move the head back to the spot after the last token in the word
                        tokenIdx = lastGoodNodeTokenIdx + 1;
                        continue;
                    }

                    //	If adding this raw word makes a word, then record it as good
                    if (nodeForThisRawWord.Value != null)
                    {
                        lastGoodNode         = nodeForThisRawWord;
                        lastGoodNodeTokenIdx = tokenIdx;
                    }
                    currWordLookupNode = nodeForThisRawWord;
                    continue;
                }
                else
                {
                    switch (token.tokenType)
                    {
                    case LexerToken.TYPE.UNCLASSIFIED:
                    {
                        isBuildingWord = true;
                        //	Try to continue the current word matching.
                        Trie <RawWord, Word> .Node nodeForThisRawWord = currWordLookupNode.getChildNode(token.rawWord);
                        //	If we can't continue this way...
                        if (nodeForThisRawWord == null)
                        {
                            //...then whatever our last good match was is the correct word.
                            if (lastGoodNode == null)
                            {
                                //throw new RuntimeLexerSourceException("Unrecognized word or symbol.", token);
                                //	Dump everything into raw words.
                                tokenIdx++;                                     //	Include this word
                                for (int i = lastProcessedTokenIdx + 1; i < tokenIdx; i++)
                                {
                                    writePNode(new PNode(new RawWordObject(tokens[i].rawWord)));
                                }
                                //	Update everything so we continue after this point
                                lastGoodNodeTokenIdx  = tokenIdx - 1;
                                lastProcessedTokenIdx = lastGoodNodeTokenIdx;
                                currWordLookupNode    = wordLookupRoot;
                                isBuildingWord        = false;
                                continue;
                            }
                            writePNode(new PNode(lastGoodNode.Value));
                            //	Reset everything so we're looking for a new word again.
                            lastGoodNode          = null;
                            currWordLookupNode    = wordLookupRoot;
                            isBuildingWord        = false;
                            lastProcessedTokenIdx = lastGoodNodeTokenIdx;
                            //	Move the head back to the spot after the last token in the word
                            tokenIdx = lastGoodNodeTokenIdx + 1;
                            continue;
                        }

                        //	If adding this raw word makes a word, then record it as good
                        if (nodeForThisRawWord.Value != null)
                        {
                            lastGoodNode         = nodeForThisRawWord;
                            lastGoodNodeTokenIdx = tokenIdx;
                        }
                        currWordLookupNode = nodeForThisRawWord;
                        continue;
                    }
                    break;

                    case LexerToken.TYPE.STRING:
                    {
                        //	First clean up any word we may be building and write it to output.
                        if (isBuildingWord)
                        {
                            if (lastGoodNode == null || lastGoodNodeTokenIdx != tokenIdx - 1)
                            {
                                //throw new RuntimeLexerSourceException("Unrecognized word or symbol.", tokens[tokenIdx-1]);
                                //	Just take all the words up until now and dump them into raw words.
                                for (int i = lastProcessedTokenIdx + 1; i < tokenIdx; i++)
                                {
                                    writePNode(new PNode(new RawWordObject(tokens[i].rawWord)));
                                }
                                //	Update everything so we continue after this point
                                lastGoodNodeTokenIdx  = tokenIdx - 1;
                                lastProcessedTokenIdx = lastGoodNodeTokenIdx;
                                currWordLookupNode    = wordLookupRoot;
                                isBuildingWord        = false;
                            }
                            else
                            {
                                writePNode(new PNode(lastGoodNode.Value));
                                //	Reset everything so we're looking for a new word again.
                                lastGoodNode          = null;
                                currWordLookupNode    = wordLookupRoot;
                                isBuildingWord        = false;
                                lastProcessedTokenIdx = lastGoodNodeTokenIdx;
                            }
                        }

                        //	Now write the string literal object to output
                        writePNode(new PNode(new StringLiteralObject(token.rawWord.AsString)));
                        lastProcessedTokenIdx = tokenIdx;
                        //	Continue
                        tokenIdx++;
                        continue;
                    }
                    break;
                    }
                }
            }


            finalCheckAfterParsing();
            return(outputRoot);
        }