public virtual void TestMaxPosition3WithSynomyms() { foreach (bool consumeAll in new bool[] { true, false }) { MockTokenizer tokenizer = new MockTokenizer(new StringReader("one two three four five"), MockTokenizer.WHITESPACE, false); // if we are consuming all tokens, we can use the checks, otherwise we can't tokenizer.EnableChecks = consumeAll; SynonymMap.Builder builder = new SynonymMap.Builder(true); builder.Add(new CharsRef("one"), new CharsRef("first"), true); builder.Add(new CharsRef("one"), new CharsRef("alpha"), true); builder.Add(new CharsRef("one"), new CharsRef("beguine"), true); CharsRef multiWordCharsRef = new CharsRef(); SynonymMap.Builder.Join(new string[] { "and", "indubitably", "single", "only" }, multiWordCharsRef); builder.Add(new CharsRef("one"), multiWordCharsRef, true); SynonymMap.Builder.Join(new string[] { "dopple", "ganger" }, multiWordCharsRef); builder.Add(new CharsRef("two"), multiWordCharsRef, true); SynonymMap synonymMap = builder.Build(); TokenStream stream = new SynonymFilter(tokenizer, synonymMap, true); stream = new LimitTokenPositionFilter(stream, 3, consumeAll); // "only", the 4th word of multi-word synonym "and indubitably single only" is not emitted, since its position is greater than 3. AssertTokenStreamContents(stream, new string[] { "one", "first", "alpha", "beguine", "and", "two", "indubitably", "dopple", "three", "single", "ganger" }, new int[] { 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0 }); } }
private void Add(SynonymMap.Builder b, string input, string output, bool keepOrig) { b.Add(new CharsRef(Regex.Replace(input, " +", "\u0000")), new CharsRef(Regex.Replace(output, " +", "\u0000")), keepOrig); }