Beispiel #1
0
            public override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
            {
                Tokenizer   tokenizer = new WikipediaTokenizer(reader);
                TokenStream stream    = new SopTokenFilter(tokenizer);

                stream = new WordDelimiterFilter(TEST_VERSION_CURRENT, stream, table, -50, protWords);
                stream = new SopTokenFilter(stream);
                return(new TokenStreamComponents(tokenizer, stream));
            }
Beispiel #2
0
        public virtual void TestCuriousWikipediaString()
        {
            CharArraySet protWords = new CharArraySet(TEST_VERSION_CURRENT, new JCG.HashSet <string> {
                "rrdpafa", "pupmmlu", "xlq", "dyy", "zqrxrrck", "o", "hsrlfvcha"
            }, false);

            byte[]   table = (byte[])(Array) new sbyte[] { -57, 26, 1, 48, 63, -23, 55, -84, 18, 120, -97, 103, 58, 13, 84, 89, 57, -13, -63, 5, 28, 97, -54, -94, 102, -108, -5, 5, 46, 40, 43, 78, 43, -72, 36, 29, 124, -106, -22, -51, 65, 5, 31, -42, 6, -99, 97, 14, 81, -128, 74, 100, 54, -55, -25, 53, -71, -98, 44, 33, 86, 106, -42, 47, 115, -89, -18, -26, 22, -95, -43, 83, -125, 105, -104, -24, 106, -16, 126, 115, -105, 97, 65, -33, 57, 44, -1, 123, -68, 100, 13, -41, -64, -119, 0, 92, 94, -36, 53, -9, -102, -18, 90, 94, -26, 31, 71, -20 };
            Analyzer a     = Analyzer.NewAnonymous(createComponents: (fieldName, reader) =>
            {
                Tokenizer tokenizer = new WikipediaTokenizer(reader);
                TokenStream stream  = new SopTokenFilter(tokenizer);
                stream = new WordDelimiterFilter(TEST_VERSION_CURRENT, stream, table, (WordDelimiterFlags)(object)-50, protWords);
                stream = new SopTokenFilter(stream);
                return(new TokenStreamComponents(tokenizer, stream));
            });

            CheckAnalysisConsistency(Random, a, false, "B\u28c3\ue0f8[ \ud800\udfc2 </p> jb");
        }