//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testNoOverrides() throws java.io.IOException public virtual void testNoOverrides() { StemmerOverrideFilter.Builder builder = new StemmerOverrideFilter.Builder(true); Tokenizer tokenizer = new KeywordTokenizer(new StringReader("book")); TokenStream stream = new PorterStemFilter(new StemmerOverrideFilter(tokenizer, builder.build())); assertTokenStreamContents(stream, new string[] {"book"}); }
protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader) { Tokenizer tokenizer = new KeywordTokenizer(reader); TokenFilter filter = new HyphenationCompoundWordTokenFilter(TEST_VERSION_CURRENT, tokenizer, hyphenator); return(new TokenStreamComponents(tokenizer, filter)); }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testOverride() throws java.io.IOException public virtual void testOverride() { // lets make booked stem to books // the override filter will convert "booked" to "books", // but also mark it with KeywordAttribute so Porter will not change it. StemmerOverrideFilter.Builder builder = new StemmerOverrideFilter.Builder(); builder.add("booked", "books"); Tokenizer tokenizer = new KeywordTokenizer(new StringReader("booked")); TokenStream stream = new PorterStemFilter(new StemmerOverrideFilter(tokenizer, builder.build())); assertTokenStreamContents(stream, new string[] {"books"}); }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testRandomStrings() throws java.io.IOException public virtual void testRandomStrings() { for (int i = 0; i < 10000; i++) { string text = TestUtil.randomUnicodeString(random(), 100); int min = TestUtil.Next(random(), 0, 100); int max = TestUtil.Next(random(), 0, 100); int count = text.codePointCount(0, text.Length); if (min > max) { int temp = min; min = max; max = temp; } bool expected = count >= min && count <= max; TokenStream stream = new KeywordTokenizer(new StringReader(text)); stream = new CodepointCountFilter(TEST_VERSION_CURRENT, stream, min, max); stream.reset(); assertEquals(expected, stream.incrementToken()); stream.end(); stream.close(); } }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testRandomStrings() throws java.io.IOException public virtual void testRandomStrings() { for (int i = 0; i < 10000; i++) { string text = TestUtil.randomUnicodeString(random(), 100); int min = TestUtil.Next(random(), 0, 100); int max = TestUtil.Next(random(), 0, 100); int count = text.codePointCount(0, text.Length); if (min > max) { int temp = min; min = max; max = temp; } bool expected = count >= min && count <= max; TokenStream stream = new KeywordTokenizer(new StringReader(text)); stream = new CodepointCountFilter(TEST_VERSION_CURRENT, stream, min, max); stream.reset(); assertEquals(expected, stream.incrementToken()); stream.end(); stream.close(); } }
protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader) { Tokenizer t = new KeywordTokenizer(reader); return(new TokenStreamComponents(t, new GermanStemFilter(new LowerCaseFilter(TEST_VERSION_CURRENT, t)))); }
protected internal override TokenStreamComponents CreateComponents(string fieldName) { KeywordTokenizer source = new KeywordTokenizer(); return(new TokenStreamComponents(source, new LowerCaseFilter(source))); }
protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader) { Tokenizer tokenizer = new KeywordTokenizer(reader); return(new TokenStreamComponents(tokenizer, new SwedishLightStemFilter(tokenizer))); }
protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader) { Tokenizer tokenizer = new KeywordTokenizer(reader); return new TokenStreamComponents(tokenizer, new CodepointCountFilter(TEST_VERSION_CURRENT, tokenizer, 0, 5)); }
public override TokenStreamComponents createComponents(string fieldName, Reader reader) { Tokenizer tokenizer = new KeywordTokenizer(reader); return(new TokenStreamComponents(tokenizer, new IndonesianStemFilter(tokenizer))); }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testRandomRealisticKeyword() throws java.io.IOException public virtual void testRandomRealisticKeyword() { IDictionary<string, string> map = new Dictionary<string, string>(); int numTerms = atLeast(50); for (int i = 0; i < numTerms; i++) { string randomRealisticUnicodeString = TestUtil.randomRealisticUnicodeString(random()); if (randomRealisticUnicodeString.Length > 0) { string value = TestUtil.randomSimpleString(random()); map[randomRealisticUnicodeString] = value.Length == 0 ? "a" : value; } } if (map.Count == 0) { map["booked"] = "books"; } StemmerOverrideFilter.Builder builder = new StemmerOverrideFilter.Builder(random().nextBoolean()); ISet<KeyValuePair<string, string>> entrySet = map.SetOfKeyValuePairs(); foreach (KeyValuePair<string, string> entry in entrySet) { builder.add(entry.Key, entry.Value); } StemmerOverrideMap build = builder.build(); foreach (KeyValuePair<string, string> entry in entrySet) { if (random().nextBoolean()) { Tokenizer tokenizer = new KeywordTokenizer(new StringReader(entry.Key)); TokenStream stream = new PorterStemFilter(new StemmerOverrideFilter(tokenizer, build)); assertTokenStreamContents(stream, new string[] {entry.Value}); } } }
protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader) { Tokenizer tokenizer = new KeywordTokenizer(reader); return new TokenStreamComponents(tokenizer, new ScandinavianNormalizationFilter(tokenizer)); }
protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader) { Tokenizer t = new KeywordTokenizer(reader); return new TokenStreamComponents(t, new SnowballFilter(t, snowballLanguage)); }
protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader) { Tokenizer tokenizer = new KeywordTokenizer(reader); return new TokenStreamComponents(tokenizer, new EdgeNGramTokenFilter(Version.LUCENE_43, tokenizer, EdgeNGramTokenFilter.Side.BACK, 2, 15)); }
protected internal override Analyzer.TokenStreamComponents createComponents(string fieldName, Reader reader) { Tokenizer tokenizer = new KeywordTokenizer(reader); return new Analyzer.TokenStreamComponents(tokenizer, new SnowballFilter(tokenizer, lang)); }
protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader) { Tokenizer tokenizer = new KeywordTokenizer(reader); return new TokenStreamComponents(tokenizer, new EdgeNGramTokenFilter(TEST_VERSION_CURRENT, tokenizer, EdgeNGramTokenFilter.Side.FRONT, 2, 15)); }
public override TokenStreamComponents createComponents(string fieldName, Reader reader) { Tokenizer tokenizer = new KeywordTokenizer(reader); return new TokenStreamComponents(tokenizer, new IndonesianStemFilter(tokenizer, false)); }
protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader) { Tokenizer tokenizer = new KeywordTokenizer(reader); return new TokenStreamComponents(tokenizer, new ElisionFilter(tokenizer, FrenchAnalyzer.DEFAULT_ARTICLES)); }
protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader) { Tokenizer tokenizer = new KeywordTokenizer(reader); return new TokenStreamComponents(tokenizer, new SynonymFilter(tokenizer, map, ignoreCase)); }
public override TokenStreamComponents createComponents(string fieldName, Reader reader) { Tokenizer result = new KeywordTokenizer(reader); return new TokenStreamComponents(result, new CollationKeyFilter(result, _collator)); }
protected internal override Analyzer.TokenStreamComponents createComponents(string fieldName, Reader reader) { Tokenizer tokenizer = new KeywordTokenizer(reader); return(new Analyzer.TokenStreamComponents(tokenizer, new PortugueseLightStemFilter(tokenizer))); }
protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader) { Tokenizer tokenizer = new KeywordTokenizer(reader); return(new TokenStreamComponents(tokenizer, new HunspellStemFilter(tokenizer, dictionary))); }
protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader) { Tokenizer tokenizer = new KeywordTokenizer(reader); return new TokenStreamComponents(tokenizer, new PatternReplaceFilter(tokenizer, Pattern.compile("a"), "b", true)); }
protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader) { Tokenizer tokenizer = new KeywordTokenizer(reader); return new TokenStreamComponents(tokenizer, new DictionaryCompoundWordTokenFilter(TEST_VERSION_CURRENT, tokenizer, dict)); }
protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader) { Tokenizer tokenizer = new KeywordTokenizer(reader); return(new TokenStreamComponents(tokenizer, new ScandinavianFoldingFilter(tokenizer))); }
protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader) { Tokenizer tokenizer = new KeywordTokenizer(reader); return(new TokenStreamComponents(tokenizer, new DictionaryCompoundWordTokenFilter(TEST_VERSION_CURRENT, tokenizer, dict))); }
protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader) { Tokenizer tokenizer = new KeywordTokenizer(reader); return new TokenStreamComponents(tokenizer, new WordDelimiterFilter(TEST_VERSION_CURRENT, tokenizer, flags, protectedWords)); }
protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader) { Tokenizer tokenizer = new KeywordTokenizer(reader); return new TokenStreamComponents(tokenizer, new ASCIIFoldingFilter(tokenizer, random().nextBoolean())); }
protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader) { Tokenizer tokenizer = new KeywordTokenizer(reader); return(new TokenStreamComponents(tokenizer, new IndicNormalizationFilter(tokenizer))); }
protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader) { Tokenizer tokenizer = new KeywordTokenizer(reader); return new TokenStreamComponents(tokenizer, new CodepointCountFilter(TEST_VERSION_CURRENT, tokenizer, 0, 5)); }
protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader) { Tokenizer tokenizer = new KeywordTokenizer(reader); TokenFilter filter = new HyphenationCompoundWordTokenFilter(TEST_VERSION_CURRENT, tokenizer, hyphenator); return new TokenStreamComponents(tokenizer, filter); }
protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader) { Tokenizer t = new KeywordTokenizer(reader); return new TokenStreamComponents(t, new GermanStemFilter(new LowerCaseFilter(TEST_VERSION_CURRENT, t))); }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testSupplementaryCharacters() throws java.io.IOException public virtual void testSupplementaryCharacters() { //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final String s = org.apache.lucene.util.TestUtil.randomUnicodeString(random(), 10); string s = TestUtil.randomUnicodeString(random(), 10); //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final int codePointCount = s.codePointCount(0, s.length()); int codePointCount = s.codePointCount(0, s.Length); //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final int minGram = org.apache.lucene.util.TestUtil.nextInt(random(), 1, 3); int minGram = TestUtil.Next(random(), 1, 3); //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final int maxGram = org.apache.lucene.util.TestUtil.nextInt(random(), minGram, 10); int maxGram = TestUtil.Next(random(), minGram, 10); TokenStream tk = new KeywordTokenizer(new StringReader(s)); tk = new NGramTokenFilter(TEST_VERSION_CURRENT, tk, minGram, maxGram); //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final org.apache.lucene.analysis.tokenattributes.CharTermAttribute termAtt = tk.addAttribute(org.apache.lucene.analysis.tokenattributes.CharTermAttribute.class); CharTermAttribute termAtt = tk.addAttribute(typeof(CharTermAttribute)); //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final org.apache.lucene.analysis.tokenattributes.OffsetAttribute offsetAtt = tk.addAttribute(org.apache.lucene.analysis.tokenattributes.OffsetAttribute.class); OffsetAttribute offsetAtt = tk.addAttribute(typeof(OffsetAttribute)); tk.reset(); for (int start = 0; start < codePointCount; ++start) { for (int end = start + minGram; end <= Math.Min(codePointCount, start + maxGram); ++end) { assertTrue(tk.incrementToken()); assertEquals(0, offsetAtt.startOffset()); assertEquals(s.Length, offsetAtt.endOffset()); //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final int startIndex = Character.offsetByCodePoints(s, 0, start); int startIndex = char.offsetByCodePoints(s, 0, start); //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final int endIndex = Character.offsetByCodePoints(s, 0, end); int endIndex = char.offsetByCodePoints(s, 0, end); assertEquals(s.Substring(startIndex, endIndex - startIndex), termAtt.ToString()); } } assertFalse(tk.incrementToken()); }
protected internal override Analyzer.TokenStreamComponents createComponents(string fieldName, Reader reader) { Tokenizer tokenizer = new KeywordTokenizer(reader); return new Analyzer.TokenStreamComponents(tokenizer, new PortugueseMinimalStemFilter(tokenizer)); }
protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader) { Tokenizer tokenizer = new KeywordTokenizer(reader); return new TokenStreamComponents(tokenizer, new HindiStemFilter(tokenizer)); }
protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader) { Tokenizer tokenizer = new KeywordTokenizer(reader); return(new TokenStreamComponents(tokenizer, new PatternReplaceFilter(tokenizer, Pattern.compile("a"), "b", true))); }