コード例 #1
0
ファイル: CustomAnalyzer.cs プロジェクト: Neo4Net/Neo4Net
        protected internal override TokenStreamComponents CreateComponents(string fieldName)
        {
            Called = true;
            Tokenizer source = new WhitespaceTokenizer();

            return(new TokenStreamComponents(source, new LowerCaseFilter(source)));
        }
コード例 #2
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testRetainMockAttribute() throws Exception
        public virtual void testRetainMockAttribute()
        {
            CharArraySet dict      = makeDictionary("abc", "d", "efg");
            Tokenizer    tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("abcdefg"));
            TokenStream  stream    = new MockRetainAttributeFilter(tokenizer);

            stream = new DictionaryCompoundWordTokenFilter(TEST_VERSION_CURRENT, stream, dict, CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE, CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE, CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE, false);
            MockRetainAttribute retAtt = stream.addAttribute(typeof(MockRetainAttribute));

            stream.reset();
            while (stream.incrementToken())
            {
                assertTrue("Custom attribute value was lost", retAtt.Retain);
            }
        }
コード例 #3
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testReset() throws Exception
        public virtual void testReset()
        {
            CharArraySet dict = makeDictionary("Rind", "Fleisch", "Draht", "Schere", "Gesetz", "Aufgabe", "Überwachung");

            Tokenizer wsTokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("Rindfleischüberwachungsgesetz"));
            DictionaryCompoundWordTokenFilter tf = new DictionaryCompoundWordTokenFilter(TEST_VERSION_CURRENT, wsTokenizer, dict, CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE, CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE, CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE, false);

            CharTermAttribute termAtt = tf.getAttribute(typeof(CharTermAttribute));

            tf.reset();
            assertTrue(tf.incrementToken());
            assertEquals("Rindfleischüberwachungsgesetz", termAtt.ToString());
            assertTrue(tf.incrementToken());
            assertEquals("Rind", termAtt.ToString());
            tf.end();
            tf.close();
            wsTokenizer.Reader = new StringReader("Rindfleischüberwachungsgesetz");
            tf.reset();
            assertTrue(tf.incrementToken());
            assertEquals("Rindfleischüberwachungsgesetz", termAtt.ToString());
        }
コード例 #4
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testQueryReset() throws Exception
        public virtual void testQueryReset()
        {
            const string           input = "How the s a brown s cow d like A B thing?";
            WhitespaceTokenizer    wt    = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
            CommonGramsFilter      cgf   = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords);
            CommonGramsQueryFilter nsf   = new CommonGramsQueryFilter(cgf);

            CharTermAttribute term = wt.addAttribute(typeof(CharTermAttribute));

            nsf.reset();
            assertTrue(nsf.incrementToken());
            assertEquals("How_the", term.ToString());
            assertTrue(nsf.incrementToken());
            assertEquals("the_s", term.ToString());
            nsf.close();

            wt.Reader = new StringReader(input);
            nsf.reset();
            assertTrue(nsf.incrementToken());
            assertEquals("How_the", term.ToString());
        }
コード例 #5
0
        //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
        //ORIGINAL LINE: public void testRandomRealisticWhiteSpace() throws java.io.IOException
        public virtual void testRandomRealisticWhiteSpace()
        {
            IDictionary<string, string> map = new Dictionary<string, string>();
            int numTerms = atLeast(50);
            for (int i = 0; i < numTerms; i++)
            {
              string randomRealisticUnicodeString = TestUtil.randomRealisticUnicodeString(random());
              char[] charArray = randomRealisticUnicodeString.ToCharArray();
              StringBuilder builder = new StringBuilder();
              for (int j = 0; j < charArray.Length;)
              {
            int cp = char.codePointAt(charArray, j, charArray.Length);
            if (!char.IsWhiteSpace(cp))
            {
              builder.appendCodePoint(cp);
            }
            j += char.charCount(cp);
              }
              if (builder.Length > 0)
              {
            string value = TestUtil.randomSimpleString(random());
            map[builder.ToString()] = value.Length == 0 ? "a" : value;

              }
            }
            if (map.Count == 0)
            {
              map["booked"] = "books";
            }
            StemmerOverrideFilter.Builder builder = new StemmerOverrideFilter.Builder(random().nextBoolean());
            ISet<KeyValuePair<string, string>> entrySet = map.SetOfKeyValuePairs();
            StringBuilder input = new StringBuilder();
            IList<string> output = new List<string>();
            foreach (KeyValuePair<string, string> entry in entrySet)
            {
              builder.add(entry.Key, entry.Value);
              if (random().nextBoolean() || output.Count == 0)
              {
            input.Append(entry.Key).Append(" ");
            output.Add(entry.Value);
              }
            }
            Tokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input.ToString()));
            TokenStream stream = new PorterStemFilter(new StemmerOverrideFilter(tokenizer, builder.build()));
            assertTokenStreamContents(stream, output.ToArray());
        }
コード例 #6
0
 //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
 //ORIGINAL LINE: public void testReset() throws Exception
 public virtual void testReset()
 {
     Tokenizer wsTokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("please divide this sentence"));
     TokenStream filter = new ShingleFilter(wsTokenizer, 2);
     assertTokenStreamContents(filter, new string[]{"please","please divide","divide","divide this","this","this sentence","sentence"}, new int[]{0,0,7,7,14,14,19}, new int[]{6,13,13,18,18,27,27}, new string[]{TypeAttribute.DEFAULT_TYPE,"shingle",TypeAttribute.DEFAULT_TYPE,"shingle",TypeAttribute.DEFAULT_TYPE,"shingle",TypeAttribute.DEFAULT_TYPE}, new int[]{1,0,1,0,1,0,1});
     wsTokenizer.Reader = new StringReader("please divide this sentence");
     assertTokenStreamContents(filter, new string[]{"please","please divide","divide","divide this","this","this sentence","sentence"}, new int[]{0,0,7,7,14,14,19}, new int[]{6,13,13,18,18,27,27}, new string[]{TypeAttribute.DEFAULT_TYPE,"shingle",TypeAttribute.DEFAULT_TYPE,"shingle",TypeAttribute.DEFAULT_TYPE,"shingle",TypeAttribute.DEFAULT_TYPE}, new int[]{1,0,1,0,1,0,1});
 }
コード例 #7
0
 //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
 //ORIGINAL LINE: public void testReset() throws Exception
 public virtual void testReset()
 {
     WhitespaceTokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("abcde"));
     NGramTokenFilter filter = new NGramTokenFilter(TEST_VERSION_CURRENT, tokenizer, 1, 1);
     assertTokenStreamContents(filter, new string[]{"a","b","c","d","e"}, new int[]{0,0,0,0,0}, new int[]{5,5,5,5,5}, new int[]{1,0,0,0,0});
     tokenizer.Reader = new StringReader("abcde");
     assertTokenStreamContents(filter, new string[]{"a","b","c","d","e"}, new int[]{0,0,0,0,0}, new int[]{5,5,5,5,5}, new int[]{1,0,0,0,0});
 }
コード例 #8
0
 //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
 //ORIGINAL LINE: public void testRetainMockAttribute() throws Exception
 public virtual void testRetainMockAttribute()
 {
     CharArraySet dict = makeDictionary("abc", "d", "efg");
     Tokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("abcdefg"));
     TokenStream stream = new MockRetainAttributeFilter(tokenizer);
     stream = new DictionaryCompoundWordTokenFilter(TEST_VERSION_CURRENT, stream, dict, CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE, CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE, CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE, false);
     MockRetainAttribute retAtt = stream.addAttribute(typeof(MockRetainAttribute));
     stream.reset();
     while (stream.incrementToken())
     {
       assertTrue("Custom attribute value was lost", retAtt.Retain);
     }
 }
コード例 #9
0
        //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
        //ORIGINAL LINE: public void testReset() throws Exception
        public virtual void testReset()
        {
            CharArraySet dict = makeDictionary("Rind", "Fleisch", "Draht", "Schere", "Gesetz", "Aufgabe", "Überwachung");

            Tokenizer wsTokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("Rindfleischüberwachungsgesetz"));
            DictionaryCompoundWordTokenFilter tf = new DictionaryCompoundWordTokenFilter(TEST_VERSION_CURRENT, wsTokenizer, dict, CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE, CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE, CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE, false);

            CharTermAttribute termAtt = tf.getAttribute(typeof(CharTermAttribute));
            tf.reset();
            assertTrue(tf.incrementToken());
            assertEquals("Rindfleischüberwachungsgesetz", termAtt.ToString());
            assertTrue(tf.incrementToken());
            assertEquals("Rind", termAtt.ToString());
            tf.end();
            tf.close();
            wsTokenizer.Reader = new StringReader("Rindfleischüberwachungsgesetz");
            tf.reset();
            assertTrue(tf.incrementToken());
            assertEquals("Rindfleischüberwachungsgesetz", termAtt.ToString());
        }
コード例 #10
0
 public override TokenStreamComponents createComponents(string fieldName, Reader reader)
 {
     Tokenizer tokenizer = new WhitespaceTokenizer(Version.LUCENE_CURRENT, reader);
     return new TokenStreamComponents(tokenizer, new ChineseFilter(tokenizer));
 }
コード例 #11
0
 //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
 //ORIGINAL LINE: public void testReset() throws Exception
 public virtual void testReset()
 {
     WhitespaceTokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("abcde"));
     EdgeNGramTokenFilter filter = new EdgeNGramTokenFilter(TEST_VERSION_CURRENT, tokenizer, EdgeNGramTokenFilter.Side.FRONT, 1, 3);
     assertTokenStreamContents(filter, new string[]{"a","ab","abc"}, new int[]{0,0,0}, new int[]{5,5,5});
     tokenizer.Reader = new StringReader("abcde");
     assertTokenStreamContents(filter, new string[]{"a","ab","abc"}, new int[]{0,0,0}, new int[]{5,5,5});
 }
コード例 #12
0
        //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
        //ORIGINAL LINE: public void testReset() throws Exception
        public virtual void testReset()
        {
            const string input = "How the s a brown s cow d like A B thing?";
            WhitespaceTokenizer wt = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
            CommonGramsFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords);

            CharTermAttribute term = cgf.addAttribute(typeof(CharTermAttribute));
            cgf.reset();
            assertTrue(cgf.incrementToken());
            assertEquals("How", term.ToString());
            assertTrue(cgf.incrementToken());
            assertEquals("How_the", term.ToString());
            assertTrue(cgf.incrementToken());
            assertEquals("the", term.ToString());
            assertTrue(cgf.incrementToken());
            assertEquals("the_s", term.ToString());
            cgf.close();

            wt.Reader = new StringReader(input);
            cgf.reset();
            assertTrue(cgf.incrementToken());
            assertEquals("How", term.ToString());
        }