コード例 #1
0
        public virtual void Test()
        {
            CharArraySet cas = new CharArraySet(TEST_VERSION_CURRENT, 3, false);

            cas.add("jjp");
            cas.add("wlmwoknt");
            cas.add("tcgyreo");

            NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder();
            builder.Add("mtqlpi", "");
            builder.Add("mwoknt", "jjp");
            builder.Add("tcgyreo", "zpfpajyws");
            NormalizeCharMap map = builder.Build();

            Analyzer a = Analyzer.NewAnonymous(createComponents: (fieldName, reader) =>
            {
                Tokenizer t   = new MockTokenizer(new TestRandomChains.CheckThatYouDidntReadAnythingReaderWrapper(reader), MockTokenFilter.ENGLISH_STOPSET, false, -65);
                TokenFilter f = new CommonGramsFilter(TEST_VERSION_CURRENT, t, cas);
                return(new TokenStreamComponents(t, f));
            }, initReader: (fieldName, reader) =>
            {
                reader = new MockCharFilter(reader, 0);
                reader = new MappingCharFilter(map, reader);
                return(reader);
            });

            CheckAnalysisConsistency(Random, a, false, "wmgddzunizdomqyj");
        }
コード例 #2
0
            protected override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
            {
                Tokenizer   t = new MockTokenizer(new TestRandomChains.CheckThatYouDidntReadAnythingReaderWrapper(reader), MockTokenFilter.ENGLISH_STOPSET, false, -65);
                TokenFilter f = new CommonGramsFilter(TEST_VERSION_CURRENT, t, cas);

                return(new TokenStreamComponents(t, f));
            }
コード例 #3
0
            protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
            {
                Tokenizer         t   = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
                CommonGramsFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, t, commonWords);

                return(new TokenStreamComponents(t, new CommonGramsQueryFilter(cgf)));
            }
コード例 #4
0
 /// <summary>
 /// Test that CommonGramsFilter works correctly in case-insensitive mode
 /// </summary>
 //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
 //ORIGINAL LINE: public void testCaseSensitive() throws Exception
 public virtual void testCaseSensitive()
 {
     const string input = "How The s a brown s cow d like A B thing?";
     MockTokenizer wt = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false);
     TokenFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords);
     assertTokenStreamContents(cgf, new string[] {"How", "The", "The_s", "s", "s_a", "a", "a_brown", "brown", "brown_s", "s", "s_cow", "cow", "cow_d", "d", "d_like", "like", "A", "B", "thing?"});
 }
コード例 #5
0
        /// <summary>
        /// Test that CommonGramsFilter works correctly in case-insensitive mode
        /// </summary>
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testCaseSensitive() throws Exception
        public virtual void testCaseSensitive()
        {
            const string  input = "How The s a brown s cow d like A B thing?";
            MockTokenizer wt    = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false);
            TokenFilter   cgf   = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords);

            assertTokenStreamContents(cgf, new string[] { "How", "The", "The_s", "s", "s_a", "a", "a_brown", "brown", "brown_s", "s", "s_cow", "cow", "cow_d", "d", "d_like", "like", "A", "B", "thing?" });
        }
コード例 #6
0
        /// <summary>
        /// Test CommonGramsQueryFilter when first and last words are stopwords.
        /// </summary>
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void TestFirstAndLastStopWord() throws Exception
        public virtual void TestFirstAndLastStopWord()
        {
            const string      input = "the of";
            MockTokenizer     wt    = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false);
            CommonGramsFilter cgf   = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords);
            TokenFilter       nsf   = new CommonGramsQueryFilter(cgf);

            assertTokenStreamContents(nsf, new string[] { "the_of" });
        }
コード例 #7
0
        /// <summary>
        /// Test CommonGramsQueryFilter in the case of a single word query
        /// </summary>
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testOneWordQuery() throws Exception
        public virtual void testOneWordQuery()
        {
            const string      input = "monster";
            MockTokenizer     wt    = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false);
            CommonGramsFilter cgf   = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords);
            TokenFilter       nsf   = new CommonGramsQueryFilter(cgf);

            assertTokenStreamContents(nsf, new string[] { "monster" });
        }
コード例 #8
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testQueryReset() throws Exception
        public virtual void testQueryReset()
        {
            const string           input = "How the s a brown s cow d like A B thing?";
            WhitespaceTokenizer    wt    = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
            CommonGramsFilter      cgf   = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords);
            CommonGramsQueryFilter nsf   = new CommonGramsQueryFilter(cgf);

            CharTermAttribute term = wt.addAttribute(typeof(CharTermAttribute));

            nsf.reset();
            assertTrue(nsf.incrementToken());
            assertEquals("How_the", term.ToString());
            assertTrue(nsf.incrementToken());
            assertEquals("the_s", term.ToString());
            nsf.close();

            wt.Reader = new StringReader(input);
            nsf.reset();
            assertTrue(nsf.incrementToken());
            assertEquals("How_the", term.ToString());
        }
コード例 #9
0
 static object[] NewFilterArgs(Random random, TokenStream stream, Type[] paramTypes)
 {
     object[] args = new object[paramTypes.Length];
     for (int i = 0; i < args.Length; i++)
     {
         Type paramType = paramTypes[i];
         if (paramType == typeof(TokenStream))
         {
             args[i] = stream;
         }
         else if (paramType == typeof(CommonGramsFilter))
         {
             // TODO: fix this one, thats broken: CommonGramsQueryFilter takes this one explicitly
             args[i] = new CommonGramsFilter(TEST_VERSION_CURRENT, stream, NewRandomArg <CharArraySet>(random, typeof(CharArraySet)));
         }
         else
         {
             args[i] = NewRandomArg <object>(random, paramType);
         }
     }
     return(args);
 }
コード例 #10
0
        /// <summary>
        /// Create a CommonGramsFilter and wrap it with a CommonGramsQueryFilter
        /// </summary>
        public override TokenFilter create(TokenStream input)
        {
            CommonGramsFilter commonGrams = (CommonGramsFilter)base.create(input);

            return(new CommonGramsQueryFilter(commonGrams));
        }
コード例 #11
0
 protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
 {
     Tokenizer t = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
     CommonGramsFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, t, commonWords);
     return new TokenStreamComponents(t, new CommonGramsQueryFilter(cgf));
 }
コード例 #12
0
        //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
        //ORIGINAL LINE: public void testReset() throws Exception
        public virtual void testReset()
        {
            const string input = "How the s a brown s cow d like A B thing?";
            WhitespaceTokenizer wt = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
            CommonGramsFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords);

            CharTermAttribute term = cgf.addAttribute(typeof(CharTermAttribute));
            cgf.reset();
            assertTrue(cgf.incrementToken());
            assertEquals("How", term.ToString());
            assertTrue(cgf.incrementToken());
            assertEquals("How_the", term.ToString());
            assertTrue(cgf.incrementToken());
            assertEquals("the", term.ToString());
            assertTrue(cgf.incrementToken());
            assertEquals("the_s", term.ToString());
            cgf.close();

            wt.Reader = new StringReader(input);
            cgf.reset();
            assertTrue(cgf.incrementToken());
            assertEquals("How", term.ToString());
        }
コード例 #13
0
 /// <summary>
 /// Test CommonGramsQueryFilter in the case of a single (stop)word query
 /// </summary>
 //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
 //ORIGINAL LINE: public void testOneWordQueryStopWord() throws Exception
 public virtual void testOneWordQueryStopWord()
 {
     const string input = "the";
     MockTokenizer wt = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false);
     CommonGramsFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords);
     TokenFilter nsf = new CommonGramsQueryFilter(cgf);
     assertTokenStreamContents(nsf, new string[] {"the"});
 }