/// <summary> /// Lexicalizes a buffer. When lexicalized it adds the tokens to the given list. /// </summary> /// <param name="tokens">TokenList to add new tokens to</param> /// <param name="buffer">Buffer to lexicalize</param> /// <param name="line">Linenumber of startposition</param> private void LexicalizeBuffer(List <Token> tokens, String buffer, int line) { if (buffer == null || buffer == "") { //no data return; } //Create new lexer and lexicalize buffer StringReader stringReader = new StringReader(buffer); WaebricLexer lexer = new WaebricLexer(stringReader); lexer.SetLine(line); lexer.LexicalizeStream(); //Add new tokens to list List <Token> scannedTokens = lexer.GetTokenList(); for (int i = 0; i <= (scannedTokens.Count - 1); i++) { tokens.Add(scannedTokens[i]); } }
/// <summary> /// Lexicalizes a quote /// </summary> private void LexicalizeQuote() { //Store current line number location for backtracking int tempLine = tokenizer.GetScannedLines(); //Hold previous char for recognizing escape chars char previousChar = '\0'; bool IsString = false; //Skip " token, only text is interesting CurrentToken = tokenizer.NextToken(); //Ignore comments, due urls, etc tokenizer.SetIgnoreComments(true); //Check if this text is comment text Token[] tempArray = TokenStream.ToArray(); if (tempArray[tempArray.Length - 1].GetType() == TokenType.KEYWORD && tempArray[tempArray.Length - 1].GetValue().ToString() == "comment") { IsString = true; } //Retrieve possible quoted text StringBuilder stringBuilder = new StringBuilder(); tokenizer.SetIgnoreNumeric(true); while (tokenizer.GetCharacterValue() != '\"' || previousChar == '\\') //Scan until non escaped " found { if(CurrentToken == StreamTokenizer.EOF) { // End of file, so it wasn't a quoted part but just a single " tokenizer.SetIgnoreComments(false); tokenizer.SetIgnoreNumeric(false); //First add a single quote as token TokenStream.Add(new Token("\"", TokenType.SYMBOL, tempLine)); //Second, scan remaining string WaebricLexer tempLexer = new WaebricLexer(new StringReader(stringBuilder.ToString())); tempLexer.LexicalizeStream(); List<Token> tempTokenList = tempLexer.GetTokenList(); //Add all tokens to stream foreach(Token currentToken in tempTokenList) { TokenStream.Add(new Token(currentToken.GetValue(), currentToken.GetType(), (currentToken.GetLine()+tempLine))); } return; //Lexicalizing done } else if(tokenizer.GetCharacterValue() == '<' && !IsString) { //Embedding found, so lexicalize embedding LexicalizeEmbedding(stringBuilder.ToString()); tokenizer.SetIgnoreComments(false); tokenizer.SetIgnoreNumeric(false); return; } //Get next part and add it to stringBuilder stringBuilder.Append(tokenizer.ToString()); previousChar = tokenizer.GetCharacterValue(); CurrentToken = tokenizer.NextToken(); } tokenizer.SetIgnoreComments(false); tokenizer.SetIgnoreNumeric(false); //Check if string is correct quote text if (IsString) { if (!IsCorrectString(stringBuilder.ToString())) { throw new StreamTokenizerException("String Text containts non valid characters", tempLine); } } TokenStream.Add(new Token(stringBuilder.ToString(), TokenType.TEXT, tempLine)); //Skip " token, only text is interesting CurrentToken = tokenizer.NextToken(); }
/// <summary> /// Lexicalizes a buffer. When lexicalized it adds the tokens to the given list. /// </summary> /// <param name="tokens">TokenList to add new tokens to</param> /// <param name="buffer">Buffer to lexicalize</param> /// <param name="line">Linenumber of startposition</param> private void LexicalizeBuffer(List<Token> tokens, String buffer, int line) { if (buffer == null || buffer == "") { //no data return; } //Create new lexer and lexicalize buffer StringReader stringReader = new StringReader(buffer); WaebricLexer lexer = new WaebricLexer(stringReader); lexer.SetLine(line); lexer.LexicalizeStream(); //Add new tokens to list List<Token> scannedTokens = lexer.GetTokenList(); for (int i = 0; i <= (scannedTokens.Count - 1); i++) { tokens.Add(scannedTokens[i]); } }
public void TestComplexStream() { WaebricLexer lexer = new WaebricLexer(new StringReader("module test\n\nsite site/index.html : home()\nend")); lexer.LexicalizeStream(); Assert.IsTrue(lexer.GetTokenList().Count == 13); }
/// <summary> /// Lexicalizes a quote /// </summary> private void LexicalizeQuote() { //Store current line number location for backtracking int tempLine = tokenizer.GetScannedLines(); //Hold previous char for recognizing escape chars char previousChar = '\0'; bool IsString = false; //Skip " token, only text is interesting CurrentToken = tokenizer.NextToken(); //Ignore comments, due urls, etc tokenizer.SetIgnoreComments(true); //Check if this text is comment text Token[] tempArray = TokenStream.ToArray(); if (tempArray[tempArray.Length - 1].GetType() == TokenType.KEYWORD && tempArray[tempArray.Length - 1].GetValue().ToString() == "comment") { IsString = true; } //Retrieve possible quoted text StringBuilder stringBuilder = new StringBuilder(); tokenizer.SetIgnoreNumeric(true); while (tokenizer.GetCharacterValue() != '\"' || previousChar == '\\') //Scan until non escaped " found { if (CurrentToken == StreamTokenizer.EOF) { // End of file, so it wasn't a quoted part but just a single " tokenizer.SetIgnoreComments(false); tokenizer.SetIgnoreNumeric(false); //First add a single quote as token TokenStream.Add(new Token("\"", TokenType.SYMBOL, tempLine)); //Second, scan remaining string WaebricLexer tempLexer = new WaebricLexer(new StringReader(stringBuilder.ToString())); tempLexer.LexicalizeStream(); List <Token> tempTokenList = tempLexer.GetTokenList(); //Add all tokens to stream foreach (Token currentToken in tempTokenList) { TokenStream.Add(new Token(currentToken.GetValue(), currentToken.GetType(), (currentToken.GetLine() + tempLine))); } return; //Lexicalizing done } else if (tokenizer.GetCharacterValue() == '<' && !IsString) { //Embedding found, so lexicalize embedding LexicalizeEmbedding(stringBuilder.ToString()); tokenizer.SetIgnoreComments(false); tokenizer.SetIgnoreNumeric(false); return; } //Get next part and add it to stringBuilder stringBuilder.Append(tokenizer.ToString()); previousChar = tokenizer.GetCharacterValue(); CurrentToken = tokenizer.NextToken(); } tokenizer.SetIgnoreComments(false); tokenizer.SetIgnoreNumeric(false); //Check if string is correct quote text if (IsString) { if (!IsCorrectString(stringBuilder.ToString())) { throw new StreamTokenizerException("String Text containts non valid characters", tempLine); } } TokenStream.Add(new Token(stringBuilder.ToString(), TokenType.TEXT, tempLine)); //Skip " token, only text is interesting CurrentToken = tokenizer.NextToken(); }