Emits the entire input as a single token.
Inheritance: Tokenizer
 public virtual void TestNoOverrides()
 {
     StemmerOverrideFilter.Builder builder = new StemmerOverrideFilter.Builder(true);
     Tokenizer tokenizer = new KeywordTokenizer(new StringReader("book"));
     TokenStream stream = new PorterStemFilter(new StemmerOverrideFilter(tokenizer, builder.Build()));
     AssertTokenStreamContents(stream, new string[] { "book" });
 }
 public virtual void TestIgnoreCase()
 {
     // lets make booked stem to books
     // the override filter will convert "booked" to "books",
     // but also mark it with KeywordAttribute so Porter will not change it.
     StemmerOverrideFilter.Builder builder = new StemmerOverrideFilter.Builder(true);
     builder.Add("boOkEd", "books");
     Tokenizer tokenizer = new KeywordTokenizer(new StringReader("BooKeD"));
     TokenStream stream = new PorterStemFilter(new StemmerOverrideFilter(tokenizer, builder.Build()));
     AssertTokenStreamContents(stream, new string[] { "books" });
 }
 public virtual void TestRandomStrings()
 {
     for (int i = 0; i < 10000; i++)
     {
         string text = TestUtil.RandomUnicodeString(Random(), 100);
         int min = TestUtil.NextInt(Random(), 0, 100);
         int max = TestUtil.NextInt(Random(), 0, 100);
         int count = Character.CodePointCount(text, 0, text.Length);// text.codePointCount(0, text.Length);
         if (min > max)
         {
             int temp = min;
             min = max;
             max = temp;
         }
         bool expected = count >= min && count <= max;
         TokenStream stream = new KeywordTokenizer(new StringReader(text));
         stream = new CodepointCountFilter(TEST_VERSION_CURRENT, stream, min, max);
         stream.Reset();
         assertEquals(expected, stream.IncrementToken());
         stream.End();
         stream.Dispose();
     }
 }
 public override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
 {
     Tokenizer tokenizer = new KeywordTokenizer(reader);
     bool updateOffsets = Random().nextBoolean();
     LuceneVersion version = updateOffsets ? LuceneVersion.LUCENE_43 : TEST_VERSION_CURRENT;
     return new TokenStreamComponents(tokenizer, new TrimFilter(version, tokenizer, updateOffsets));
 }
 public virtual void TestRandomRealisticKeyword()
 {
     IDictionary<string, string> map = new Dictionary<string, string>();
     int numTerms = AtLeast(50);
     for (int i = 0; i < numTerms; i++)
     {
         string randomRealisticUnicodeString = TestUtil.RandomRealisticUnicodeString(Random());
         if (randomRealisticUnicodeString.Length > 0)
         {
             string value = TestUtil.RandomSimpleString(Random());
             map[randomRealisticUnicodeString] = value.Length == 0 ? "a" : value;
         }
     }
     if (map.Count == 0)
     {
         map["booked"] = "books";
     }
     StemmerOverrideFilter.Builder builder = new StemmerOverrideFilter.Builder(Random().nextBoolean());
     IDictionary<string, string> entrySet = map;
     foreach (KeyValuePair<string, string> entry in entrySet)
     {
         builder.Add(entry.Key, entry.Value);
     }
     StemmerOverrideFilter.StemmerOverrideMap build = builder.Build();
     foreach (KeyValuePair<string, string> entry in entrySet)
     {
         if (Random().nextBoolean())
         {
             Tokenizer tokenizer = new KeywordTokenizer(new StringReader(entry.Key));
             TokenStream stream = new PorterStemFilter(new StemmerOverrideFilter(tokenizer, build));
             AssertTokenStreamContents(stream, new string[] { entry.Value });
         }
     }
 }
 public override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
 {
     Tokenizer tokenizer = new KeywordTokenizer(reader);
     return new TokenStreamComponents(tokenizer, new LengthFilter(TEST_VERSION_CURRENT, tokenizer, 0, 5));
 }
 public override Analyzer.TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
 {
     Tokenizer tokenizer = new KeywordTokenizer(reader);
     return new Analyzer.TokenStreamComponents(tokenizer, new PortugueseLightStemFilter(tokenizer));
 }
 public override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
 {
     Tokenizer t = new KeywordTokenizer(reader);
     return new TokenStreamComponents(t, new GermanStemFilter(new LowerCaseFilter(TEST_VERSION_CURRENT, t)));
 }
 public override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
 {
     Tokenizer tokenizer = new KeywordTokenizer(reader);
     return new TokenStreamComponents(tokenizer, new EnglishMinimalStemFilter(tokenizer));
 }
Example #10
0
 public override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
 {
     Tokenizer tokenizer = new KeywordTokenizer(reader);
     return new Analyzer.TokenStreamComponents(tokenizer, new SnowballFilter(tokenizer, lang));
 }
 public override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
 {
     Tokenizer tokenizer = new KeywordTokenizer(reader);
     return new TokenStreamComponents(tokenizer, new NorwegianLightStemFilter(tokenizer));
 }
 public override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
 {
     Tokenizer tokenizer = new KeywordTokenizer(reader);
     return new TokenStreamComponents(tokenizer, new TurkishLowerCaseFilter(tokenizer));
 }
 public override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
 {
     Tokenizer tokenizer = new KeywordTokenizer(reader);
     return new TokenStreamComponents(tokenizer, new ScandinavianFoldingFilter(tokenizer));
 }
 public override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
 {
     Tokenizer tokenizer = new KeywordTokenizer(reader);
     return new TokenStreamComponents(tokenizer, new PatternReplaceFilter(tokenizer, new Regex("a", RegexOptions.Compiled), "b", true));
 }
 public override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
 {
     Tokenizer tokenizer = new KeywordTokenizer(reader);
     TokenFilter filter = new HyphenationCompoundWordTokenFilter(TEST_VERSION_CURRENT, tokenizer, hyphenator);
     return new TokenStreamComponents(tokenizer, filter);
 }
 public override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
 {
     Tokenizer tokenizer = new KeywordTokenizer(reader);
     return new TokenStreamComponents(tokenizer, new DictionaryCompoundWordTokenFilter(TEST_VERSION_CURRENT, tokenizer, dict));
 }
 public override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
 {
     Tokenizer tokenizer = new KeywordTokenizer(reader);
     #pragma warning disable 612, 618
     return new TokenStreamComponents(tokenizer, new EdgeNGramTokenFilter(LuceneVersion.LUCENE_43, tokenizer, EdgeNGramTokenFilter.Side.BACK, 2, 15));
     #pragma warning restore 612, 618
 }
 public override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
 {
     Tokenizer tokenizer = new KeywordTokenizer(reader);
     #pragma warning disable 612, 618
     return new TokenStreamComponents(tokenizer, new EdgeNGramTokenFilter(TEST_VERSION_CURRENT, tokenizer, EdgeNGramTokenFilter.Side.FRONT, 2, 15));
     #pragma warning restore 612, 618
 }
 public virtual void TestSupplementaryCharacters()
 {
     string s = TestUtil.RandomUnicodeString(Random(), 10);
     int codePointCount = Character.CodePointCount(s, 0, s.Length);
     int minGram = TestUtil.NextInt(Random(), 1, 3);
     int maxGram = TestUtil.NextInt(Random(), minGram, 10);
     TokenStream tk = new KeywordTokenizer(new StringReader(s));
     tk = new EdgeNGramTokenFilter(TEST_VERSION_CURRENT, tk, minGram, maxGram);
     ICharTermAttribute termAtt = tk.AddAttribute<ICharTermAttribute>();
     IOffsetAttribute offsetAtt = tk.AddAttribute<IOffsetAttribute>();
     tk.Reset();
     for (int i = minGram; i <= Math.Min(codePointCount, maxGram); ++i)
     {
         assertTrue(tk.IncrementToken());
         assertEquals(0, offsetAtt.StartOffset());
         assertEquals(s.Length, offsetAtt.EndOffset());
         int end = Character.OffsetByCodePoints(s, 0, i);
         assertEquals(s.Substring(0, end), termAtt.ToString());
     }
     assertFalse(tk.IncrementToken());
 }
 public override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
 {
     Tokenizer tokenizer = new KeywordTokenizer(reader);
     return new TokenStreamComponents(tokenizer, new SynonymFilter(tokenizer, map, ignoreCase));
 }
 public override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
 {
     Tokenizer tokenizer = new KeywordTokenizer(reader);
     return new TokenStreamComponents(tokenizer, new ASCIIFoldingFilter(tokenizer, Random().nextBoolean()));
 }
 public override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
 {
     Tokenizer tokenizer = new KeywordTokenizer(reader);
     return new TokenStreamComponents(tokenizer, new WordDelimiterFilter(TEST_VERSION_CURRENT, tokenizer, flags, protectedWords));
 }
 public override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
 {
     Tokenizer tokenizer = new KeywordTokenizer(reader);
     return new TokenStreamComponents(tokenizer, new HyphenatedWordsFilter(tokenizer));
 }
 public override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
 {
     Tokenizer tokenizer = new KeywordTokenizer(reader);
     return new TokenStreamComponents(tokenizer, new HunspellStemFilter(tokenizer, dictionary));
 }
 public override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
 {
     Tokenizer t = new KeywordTokenizer(reader);
     return new TokenStreamComponents(t, new SnowballFilter(t, snowballLanguage));
 }
 public override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
 {
     Tokenizer tokenizer = new KeywordTokenizer(reader);
     return new TokenStreamComponents(tokenizer, new CapitalizationFilter(tokenizer));
 }
Example #27
0
 public override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
 {
     Tokenizer tokenizer = new KeywordTokenizer(reader);
     return new TokenStreamComponents(tokenizer, new ElisionFilter(tokenizer, FrenchAnalyzer.DEFAULT_ARTICLES));
 }