Beispiel #1
0
        public void EmbeddingTest()
        {
            //Set up tokenizer
            WaebricLexer lexer = new WaebricLexer(new StringReader("\"pre<\"\\\">\">post\""));

            lexer.LexicalizeStream();

            TokenIterator tokens = lexer.GetTokenIterator();

            //Test token
            Assert.AreEqual(1, tokens.GetSize());
            Assert.AreEqual(TokenType.EMBEDDING, tokens.Peek(1).GetType());

            //Get embedding and test inner tokens
            EmbeddingToken parsedToken     = (EmbeddingToken)tokens.NextToken();
            TokenIterator  embeddingTokens = parsedToken.GetTokenIterator();

            Assert.AreEqual(7, embeddingTokens.GetSize());
            Assert.AreEqual("\"", embeddingTokens.Peek(1).GetValue().ToString());
            Assert.AreEqual("pre", embeddingTokens.Peek(2).GetValue().ToString());
            Assert.AreEqual("<", embeddingTokens.Peek(3).GetValue().ToString());
            Assert.AreEqual("\\\">", embeddingTokens.Peek(4).GetValue().ToString());
            Assert.AreEqual(">", embeddingTokens.Peek(5).GetValue().ToString());
            Assert.AreEqual("post", embeddingTokens.Peek(6).GetValue().ToString());
            Assert.AreEqual("\"", embeddingTokens.Peek(7).GetValue().ToString());
        }
Beispiel #2
0
        public void ComplexEmbeddingTest()
        {
            //Set up tokenizer
            WaebricLexer lexer = new WaebricLexer(new StringReader("\"<a(href=\"http://www.microsoft.com\") \"Microsoft Corp\">\""));

            lexer.LexicalizeStream();

            TokenIterator tokens = lexer.GetTokenIterator();

            //Test token
            Assert.AreEqual(1, tokens.GetSize());
            Assert.AreEqual(TokenType.EMBEDDING, tokens.Peek(1).GetType());

            //Test tokens in embedding
            EmbeddingToken embeddingToken  = (EmbeddingToken)tokens.NextToken();
            TokenIterator  embeddingTokens = embeddingToken.GetTokenIterator();

            Assert.AreEqual(12, embeddingTokens.GetSize());
            Assert.AreEqual("\"", embeddingTokens.Peek(1).GetValue().ToString());
            Assert.AreEqual("", embeddingTokens.Peek(2).GetValue().ToString());
            Assert.AreEqual("<", embeddingTokens.Peek(3).GetValue().ToString());
            Assert.AreEqual("a", embeddingTokens.Peek(4).GetValue().ToString());
            Assert.AreEqual("(", embeddingTokens.Peek(5).GetValue().ToString());
            Assert.AreEqual("href", embeddingTokens.Peek(6).GetValue().ToString());
            Assert.AreEqual("=", embeddingTokens.Peek(7).GetValue().ToString());
            Assert.AreEqual("http://www.microsoft.com", embeddingTokens.Peek(8).GetValue().ToString());
            Assert.AreEqual(")", embeddingTokens.Peek(9).GetValue().ToString());
            Assert.AreEqual("Microsoft Corp", embeddingTokens.Peek(10).GetValue().ToString());
            Assert.AreEqual(">", embeddingTokens.Peek(11).GetValue().ToString());
            Assert.AreEqual("\"", embeddingTokens.Peek(12).GetValue().ToString());
        }
Beispiel #3
0
        /// <summary>
        /// Lexicalizes an embedding
        /// </summary>
        private void LexicalizeEmbedding(String text)
        {
            List <Token> embeddingTokens = new List <Token>();
            String       buffer          = "";
            char         currentChar     = '\0';
            char         previousChar    = '\0';
            bool         embedded        = false;
            bool         quoted          = false;

            //Add " token
            embeddingTokens.Add(new Token('"', TokenType.SYMBOL, tokenizer.GetScannedLines()));

            //Add text to buffer
            buffer = text;

            int tempLinenumber = tokenizer.GetScannedLines();

            //Scan until end of embedding found
            currentChar = tokenizer.GetCharacterValue();
            do
            {
                if (CurrentToken == StreamTokenizer.EOF)
                {   //Abrupt stop of stream
                    throw new StreamTokenizerException("Unclosed embedding", tokenizer.GetScannedLines());
                }
                if (currentChar == '"' && previousChar != '\\')
                {
                    quoted = !quoted;
                }
                if (currentChar == '<' && !quoted)
                {
                    // Detected start of embed, process pre-text
                    embeddingTokens.Add(new Token(buffer, TokenType.TEXT, tokenizer.GetScannedLines()));
                    buffer   = ""; // Clean buffer
                    embedded = true;
                }

                buffer      += tokenizer.ToString();
                previousChar = currentChar;

                if (currentChar == '>' && !quoted)
                {
                    // Detected end of embed, process content
                    LexicalizeBuffer(embeddingTokens, buffer, tempLinenumber);
                    buffer   = "";
                    embedded = false;
                }

                CurrentToken = tokenizer.NextToken();
                currentChar  = tokenizer.GetCharacterValue();
            } while ((currentChar != '"' || previousChar == '\\') || embedded);

            if (!buffer.Equals(""))
            {
                // Process post text
                embeddingTokens.Add(new Token(buffer, TokenType.TEXT, tokenizer.GetScannedLines()));
            }

            if (CurrentToken != StreamTokenizer.EOF)
            {
                // Attach closure quote
                embeddingTokens.Add(new Token('"', TokenType.SYMBOL, tokenizer.GetScannedLines()));

                //Skip " token
                CurrentToken = tokenizer.NextToken();
            }

            // Create token from buffered content
            Token embedding = new EmbeddingToken(embeddingTokens, TokenType.EMBEDDING, tokenizer.GetScannedLines());

            TokenStream.Add(embedding);
        }