예제 #1
0
 /**
  * Adds a pattern to the tokenizer and reports a test failure if
  * it failed.
  *
  * @param tokenizer      the tokenizer
  * @param pattern        the pattern to add
  */
 private void FailAddPattern(Tokenizer tokenizer, TokenPattern pattern)
 {
     try {
         tokenizer.AddPattern(pattern);
         Fail("could add pattern " + pattern.Name);
     } catch (ParserCreationException) {
         // Failure was expected
     }
 }
예제 #2
0
 /**
  * Adds a pattern to the tokenizer and reports a test failure if
  * it failed.
  *
  * @param tokenizer      the tokenizer
  * @param pattern        the pattern to add
  */
 private void AddPattern(Tokenizer tokenizer, TokenPattern pattern)
 {
     try {
         tokenizer.AddPattern(pattern);
     } catch (ParserCreationException e) {
         Fail("couldn't add pattern " + pattern.Name + ": " +
              e.Message);
     }
 }
예제 #3
0
        public void ParsePatternWithoutSeparators()
        {
            var tokenizer = new Tokenizer();

            tokenizer.AddPattern(IntPatternMatcher.Default, true, false, TestTag);

            tokenizer.AddToken("Alsing", false, true, TestTag);

            const string text = @"The quick brown 1337 fox jumped 0v3r the little pig";

            //                                    XXXX            X X

            tokenizer.Text = text;
            var tokens = tokenizer.Tokenize();

            var testTokens = from token in tokens
                             where token.HasTag(TestTag)
                             select token;

            Assert.AreEqual(3, testTokens.ToList().Count);
        }
예제 #4
0
        public void ParsePattern()
        {
            var tokenizer = new Tokenizer();

            tokenizer.AddPattern(IntPatternMatcher.Default, true, true, TestTag);

            tokenizer.AddToken("Alsing", false, true, TestTag);

            const string text = @"The quick brown 1337 fox jumped 0v3r the little pig 1234";

            //                                    XXXX            - -                 XXXX

            // only two tokens should be found, the rest
            // are either wrong casing or do not have separators next to them

            tokenizer.Text = text;
            var tokens = tokenizer.Tokenize();

            var testTokens = from token in tokens
                             where token.HasTag(TestTag)
                             select token;

            Assert.AreEqual(2, testTokens.ToList().Count);
        }
예제 #5
0
        /// <summary>
        /// 渡された JSON ファイルを分解する.
        /// </summary>
        /// <param name="text">JSON ファイル.</param>
        /// <returns>トークンのリスト.</returns>
        public static TokenList <TokenType> Tokenize(string text)
        {
            // Tokenizer オブジェクトを準備する
            Tokenizer <TokenType> tokenizer = new Tokenizer <TokenType>();

            // トークンの分解規則を追加する
            tokenizer.AddPattern(TokenType.NewLine, "\r\n|\r|\n");
            tokenizer.AddPattern(TokenType.Comma, ",");
            tokenizer.AddPattern(TokenType.Semicolon, ";");
            tokenizer.AddPattern(TokenType.Equal, "=");
            tokenizer.AddPattern(TokenType.Minus, "-");
            tokenizer.AddPattern(TokenType.Or, @"\|");
            tokenizer.AddPattern(TokenType.Question, @"\?");
            tokenizer.AddPattern(TokenType.OpenComment, @"\(\*");
            tokenizer.AddPattern(TokenType.CloseComment, @"\*\)");
            tokenizer.AddPattern(TokenType.OpenParen, @"\(");
            tokenizer.AddPattern(TokenType.CloseParen, @"\)");
            tokenizer.AddPattern(TokenType.OpenBrace, "{");
            tokenizer.AddPattern(TokenType.CloseBrace, "}");
            tokenizer.AddPattern(TokenType.OpenBracket, @"\[");
            tokenizer.AddPattern(TokenType.CloseBracket, @"\]");
            tokenizer.AddPattern(TokenType.String, @"(""((?<=\\)""|[^\r\n""])*"")|('([^'])*')");
            tokenizer.AddPattern(TokenType.Space, @" +");
            tokenizer.AddPattern(TokenType.Name, @"([^\s=""{}\[\]()\|*,;?-]|[ ])+");

            // リストにトークンを追加する直前に発生するイベント
            // - e.Cancel = true; で追加しない
            bool commentStarted = false;

            tokenizer.BeforeAddToken += (object sender, BeforeAddTokenEventArgs <TokenType> e) => {
                // コメントを Tokenizer で取り除いておく
                if (e.TokenMatch.Type == TokenType.OpenComment)
                {
                    commentStarted = true;
                    e.Cancel       = true;
                    return;
                }
                if (e.TokenMatch.Type == TokenType.CloseComment)
                {
                    commentStarted = false;
                    e.Cancel       = true;
                    return;
                }
                if (commentStarted)
                {
                    e.Cancel = true;
                    return;
                }

                // 改行
                if (e.TokenMatch.Type == TokenType.NewLine)
                {
                    e.Cancel = true;
                    return;
                }
                // スペース
                if (e.TokenMatch.Type == TokenType.Space)
                {
                    e.Cancel = true;
                    return;
                }
                // 名前
                if (e.TokenMatch.Type == TokenType.Name)
                {
                    e.TokenMatch.Text = e.TokenMatch.Text.Trim(' ');
                    return;
                }
            };

            // トークンに分解する
            TokenList <TokenType> tokens = tokenizer.Tokenize(text);

            ///*
            // 分解した内容を表示する (デバッグ用)
            foreach (Token <TokenType> token in tokens)
            {
                Debug.WriteLine(string.Format(
                                    "token: ({0},{1}): {2}: {3}",
                                    token.LineNumber, token.LineIndex,
                                    token.Type, token.Text
                                    ));
            }
            //*/

            return(tokens);
        }
예제 #6
0
 /// <summary>
 /// Tokenizer に分解パターンを追加する.
 /// </summary>
 /// <param name="name">パターンの名前.</param>
 /// <param name="patternText">正規表現のパターン.</param>
 public void AddTokenizerPattern(string name, string patternText)
 {
     Tokenizer.AddPattern(new GeneratedParser.TokenType(name), patternText);
 }