コード例 #1
0
        public virtual void TestGraphs()
        {
            TokenStream tk = new LetterTokenizer(TEST_VERSION_CURRENT, new StringReader("abc d efgh ij klmno p q"));

            tk = new ShingleFilter(tk);
            tk = new EdgeNGramTokenFilter(TEST_VERSION_CURRENT, tk, 7, 10);
            AssertTokenStreamContents(tk, new string[] { "efgh ij", "ij klmn", "ij klmno", "klmno p" }, new int[] { 6, 11, 11, 14 }, new int[] { 13, 19, 19, 21 }, new int[] { 3, 1, 0, 1 }, new int[] { 2, 2, 2, 2 }, 23);
        }
コード例 #2
0
 private static IEnumerable <string> Tokenize(string value)
 {
     using (StringReader stringReader = new StringReader(value))
         using (LetterTokenizer letterTokenizer = new LetterTokenizer(LuceneVersion.LUCENE_48, stringReader))
         {
             letterTokenizer.Reset();
             while (letterTokenizer.IncrementToken())
             {
                 yield return(letterTokenizer.GetAttribute <ICharTermAttribute>().ToString());
             }
         }
 }
コード例 #3
0
        public static ArrayList removePunctuation(String inputString)
        {
            StringReader reader            = new StringReader(inputString);
            Tokenizer    standardTokenizer = new LetterTokenizer(reader);

            TokenStream tokenStream   = new LengthFilter(standardTokenizer, 2, int.MaxValue);
            var         termAttribute = tokenStream.GetAttribute <ITermAttribute>();

            tokenStream.Reset();

            ArrayList tokenizedString = new ArrayList();

            while (tokenStream.IncrementToken())
            {
                tokenizedString.Add(termAttribute.Term);
            }

            tokenStream.End();
            tokenStream.Dispose();

            return(tokenizedString);
        }
コード例 #4
0
 protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
 {
     Tokenizer tokenizer = new LetterTokenizer(TEST_VERSION_CURRENT, reader);
     return new TokenStreamComponents(tokenizer, tokenizer);
 }
コード例 #5
0
            protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
            {
                Tokenizer tokenizer = new LetterTokenizer(TEST_VERSION_CURRENT, reader);

                return(new TokenStreamComponents(tokenizer, tokenizer));
            }