/** * Adds a pattern to the tokenizer and reports a test failure if * it failed. * * @param tokenizer the tokenizer * @param pattern the pattern to add */ private void FailAddPattern(Tokenizer tokenizer, TokenPattern pattern) { try { tokenizer.AddPattern(pattern); Fail("could add pattern " + pattern.Name); } catch (ParserCreationException) { // Failure was expected } }
/** * Adds a pattern to the tokenizer and reports a test failure if * it failed. * * @param tokenizer the tokenizer * @param pattern the pattern to add */ private void AddPattern(Tokenizer tokenizer, TokenPattern pattern) { try { tokenizer.AddPattern(pattern); } catch (ParserCreationException e) { Fail("couldn't add pattern " + pattern.Name + ": " + e.Message); } }
public void ParsePatternWithoutSeparators() { var tokenizer = new Tokenizer(); tokenizer.AddPattern(IntPatternMatcher.Default, true, false, TestTag); tokenizer.AddToken("Alsing", false, true, TestTag); const string text = @"The quick brown 1337 fox jumped 0v3r the little pig"; // XXXX X X tokenizer.Text = text; var tokens = tokenizer.Tokenize(); var testTokens = from token in tokens where token.HasTag(TestTag) select token; Assert.AreEqual(3, testTokens.ToList().Count); }
public void ParsePattern() { var tokenizer = new Tokenizer(); tokenizer.AddPattern(IntPatternMatcher.Default, true, true, TestTag); tokenizer.AddToken("Alsing", false, true, TestTag); const string text = @"The quick brown 1337 fox jumped 0v3r the little pig 1234"; // XXXX - - XXXX // only two tokens should be found, the rest // are either wrong casing or do not have separators next to them tokenizer.Text = text; var tokens = tokenizer.Tokenize(); var testTokens = from token in tokens where token.HasTag(TestTag) select token; Assert.AreEqual(2, testTokens.ToList().Count); }
/// <summary> /// 渡された JSON ファイルを分解する. /// </summary> /// <param name="text">JSON ファイル.</param> /// <returns>トークンのリスト.</returns> public static TokenList <TokenType> Tokenize(string text) { // Tokenizer オブジェクトを準備する Tokenizer <TokenType> tokenizer = new Tokenizer <TokenType>(); // トークンの分解規則を追加する tokenizer.AddPattern(TokenType.NewLine, "\r\n|\r|\n"); tokenizer.AddPattern(TokenType.Comma, ","); tokenizer.AddPattern(TokenType.Semicolon, ";"); tokenizer.AddPattern(TokenType.Equal, "="); tokenizer.AddPattern(TokenType.Minus, "-"); tokenizer.AddPattern(TokenType.Or, @"\|"); tokenizer.AddPattern(TokenType.Question, @"\?"); tokenizer.AddPattern(TokenType.OpenComment, @"\(\*"); tokenizer.AddPattern(TokenType.CloseComment, @"\*\)"); tokenizer.AddPattern(TokenType.OpenParen, @"\("); tokenizer.AddPattern(TokenType.CloseParen, @"\)"); tokenizer.AddPattern(TokenType.OpenBrace, "{"); tokenizer.AddPattern(TokenType.CloseBrace, "}"); tokenizer.AddPattern(TokenType.OpenBracket, @"\["); tokenizer.AddPattern(TokenType.CloseBracket, @"\]"); tokenizer.AddPattern(TokenType.String, @"(""((?<=\\)""|[^\r\n""])*"")|('([^'])*')"); tokenizer.AddPattern(TokenType.Space, @" +"); tokenizer.AddPattern(TokenType.Name, @"([^\s=""{}\[\]()\|*,;?-]|[ ])+"); // リストにトークンを追加する直前に発生するイベント // - e.Cancel = true; で追加しない bool commentStarted = false; tokenizer.BeforeAddToken += (object sender, BeforeAddTokenEventArgs <TokenType> e) => { // コメントを Tokenizer で取り除いておく if (e.TokenMatch.Type == TokenType.OpenComment) { commentStarted = true; e.Cancel = true; return; } if (e.TokenMatch.Type == TokenType.CloseComment) { commentStarted = false; e.Cancel = true; return; } if (commentStarted) { e.Cancel = true; return; } // 改行 if (e.TokenMatch.Type == TokenType.NewLine) { e.Cancel = true; return; } // スペース if (e.TokenMatch.Type == TokenType.Space) { e.Cancel = true; return; } // 名前 if (e.TokenMatch.Type == TokenType.Name) { e.TokenMatch.Text = e.TokenMatch.Text.Trim(' '); return; } }; // トークンに分解する TokenList <TokenType> tokens = tokenizer.Tokenize(text); ///* // 分解した内容を表示する (デバッグ用) foreach (Token <TokenType> token in tokens) { Debug.WriteLine(string.Format( "token: ({0},{1}): {2}: {3}", token.LineNumber, token.LineIndex, token.Type, token.Text )); } //*/ return(tokens); }
/// <summary> /// Tokenizer に分解パターンを追加する. /// </summary> /// <param name="name">パターンの名前.</param> /// <param name="patternText">正規表現のパターン.</param> public void AddTokenizerPattern(string name, string patternText) { Tokenizer.AddPattern(new GeneratedParser.TokenType(name), patternText); }