public virtual void TestRandomStrings() { int numIters = AtLeast(10); for (int i = 0; i < numIters; i++) { SynonymMap.Builder b = new SynonymMap.Builder(Random.nextBoolean()); int numEntries = AtLeast(10); for (int j = 0; j < numEntries; j++) { Add(b, RandomNonEmptyString(), RandomNonEmptyString(), Random.nextBoolean()); } SynonymMap map = b.Build(); bool ignoreCase = Random.nextBoolean(); Analyzer analyzer = Analyzer.NewAnonymous(createComponents: (fieldName, reader) => { Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.SIMPLE, true); TokenStream stream = new SynonymFilter(tokenizer, map, ignoreCase); return(new TokenStreamComponents(tokenizer, new RemoveDuplicatesTokenFilter(stream))); }); CheckRandomData(Random, analyzer, 200); } }
public virtual void TestMaxPosition3WithSynomyms() { foreach (bool consumeAll in new bool[] { true, false }) { MockTokenizer tokenizer = new MockTokenizer(new StringReader("one two three four five"), MockTokenizer.WHITESPACE, false); // if we are consuming all tokens, we can use the checks, otherwise we can't tokenizer.EnableChecks = consumeAll; SynonymMap.Builder builder = new SynonymMap.Builder(true); builder.Add(new CharsRef("one"), new CharsRef("first"), true); builder.Add(new CharsRef("one"), new CharsRef("alpha"), true); builder.Add(new CharsRef("one"), new CharsRef("beguine"), true); CharsRef multiWordCharsRef = new CharsRef(); SynonymMap.Builder.Join(new string[] { "and", "indubitably", "single", "only" }, multiWordCharsRef); builder.Add(new CharsRef("one"), multiWordCharsRef, true); SynonymMap.Builder.Join(new string[] { "dopple", "ganger" }, multiWordCharsRef); builder.Add(new CharsRef("two"), multiWordCharsRef, true); SynonymMap synonymMap = builder.Build(); TokenStream stream = new SynonymFilter(tokenizer, synonymMap, true); stream = new LimitTokenPositionFilter(stream, 3, consumeAll); // "only", the 4th word of multi-word synonym "and indubitably single only" is not emitted, since its position is greater than 3. AssertTokenStreamContents(stream, new string[] { "one", "first", "alpha", "beguine", "and", "two", "indubitably", "dopple", "three", "single", "ganger" }, new int[] { 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0 }); } }
protected override TokenStreamComponents CreateComponents(string fieldName, TextReader reader) { Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.SIMPLE, true); TokenStream stream = new SynonymFilter(tokenizer, map, ignoreCase); return(new TokenStreamComponents(tokenizer, new RemoveDuplicatesTokenFilter(stream))); }
/// <summary> /// /// </summary> /// <remarks></remarks> /// <seealso cref=""/> /// <param name="fieldName"></param> /// <param name="reader"></param> /// <returns></returns> public override TokenStream TokenStream(String fieldName, System.IO.TextReader reader) { TokenStream result = new StandardTokenizer(Lucene.Net.Util.Version.LUCENE_30, reader); result = new StandardFilter(result); result = new LowerCaseFilter(result); result = new StopFilter(true, result, StopAnalyzer.ENGLISH_STOP_WORDS_SET); result = new StopFilter(true, result, stoptable); //result = new GermanStemFilter(result, excltable); //result = new PorterStemFilter(result); result = new SynonymFilter(result, SynonymEngine); // injects the synonyms. return(result); }
public override TokenStream TokenStream(string fieldName, System.IO.TextReader reader) { TokenStream result = new SynonymFilter( new StopFilter(true, new LowerCaseFilter( new StandardFilter( new StandardTokenizer(this.version, reader))), StopAnalyzer.ENGLISH_STOP_WORDS_SET), engine ); return result; }
public override TokenStream TokenStream (string fieldName, System.IO.TextReader reader) { //create the tokenizer TokenStream result = new StandardTokenizer(reader); //add in filters // first normalize the StandardTokenizer result = new StandardFilter(result); // makes sure everything is lower case result = new LowerCaseFilter(result); // use the default list of Stop Words, provided by the StopAnalyzer class. result = new StopFilter(result, StopAnalyzer.ENGLISH_STOP_WORDS); // injects the synonyms. result = new SynonymFilter(result, SynonymEngine); //return the built token stream. return(result); }
public override TokenStream TokenStream(System.String fieldName, System.IO.TextReader reader) { TokenStream result = new StandardTokenizer(Lucene.Net.Util.Version.LUCENE_30, reader); result = new LowerCaseFilter(result); result = new SynonymFilter(result, new SynonymEngine()); return result; }