Call add() until you have added all the mappings, then call build() to get an FSTSynonymMap @lucene.experimental
public virtual void TestRandomHuge() { Random random = Random(); int numIters = AtLeast(3); for (int i = 0; i < numIters; i++) { b = new SynonymMap.Builder(random.nextBoolean()); int numEntries = AtLeast(10); if (VERBOSE) { Console.WriteLine("TEST: iter=" + i + " numEntries=" + numEntries); } for (int j = 0; j < numEntries; j++) { Add(RandomNonEmptyString(), RandomNonEmptyString(), random.nextBoolean()); } SynonymMap map = b.Build(); bool ignoreCase = random.nextBoolean(); Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper4(this, map, ignoreCase); CheckRandomData(random, analyzer, 100, 1024); } }
public virtual void TestEmptyTerm() { Random random = Random; int numIters = AtLeast(10); for (int i = 0; i < numIters; i++) { b = new SynonymMap.Builder(random.nextBoolean()); int numEntries = AtLeast(10); for (int j = 0; j < numEntries; j++) { Add(RandomNonEmptyString(), RandomNonEmptyString(), random.NextBoolean()); } SynonymMap map = b.Build(); bool ignoreCase = random.nextBoolean(); Analyzer analyzer = Analyzer.NewAnonymous(createComponents: (fieldName, reader) => { Tokenizer tokenizer = new KeywordTokenizer(reader); return(new TokenStreamComponents(tokenizer, new SynonymFilter(tokenizer, map, ignoreCase))); }); CheckAnalysisConsistency(random, analyzer, random.NextBoolean(), ""); } }
public virtual void TestMatching() { b = new SynonymMap.Builder(true); const bool keepOrig = false; Add("a b", "ab", keepOrig); Add("a c", "ac", keepOrig); Add("a", "aa", keepOrig); Add("b", "bb", keepOrig); Add("z x c v", "zxcv", keepOrig); Add("x c", "xc", keepOrig); SynonymMap map = b.Build(); Analyzer a = new AnalyzerAnonymousInnerClassHelper6(this, map); CheckOneTerm(a, "$", "$"); CheckOneTerm(a, "a", "aa"); CheckOneTerm(a, "b", "bb"); AssertAnalyzesTo(a, "a $", new string[] { "aa", "$" }, new int[] { 1, 1 }); AssertAnalyzesTo(a, "$ a", new string[] { "$", "aa" }, new int[] { 1, 1 }); AssertAnalyzesTo(a, "a a", new string[] { "aa", "aa" }, new int[] { 1, 1 }); AssertAnalyzesTo(a, "z x c v", new string[] { "zxcv" }, new int[] { 1 }); AssertAnalyzesTo(a, "z x c $", new string[] { "z", "xc", "$" }, new int[] { 1, 1, 1 }); }
public virtual void TestRandom2GraphAfter() { int numIters = AtLeast(3); Random random = Random; for (int i = 0; i < numIters; i++) { b = new SynonymMap.Builder(random.nextBoolean()); int numEntries = AtLeast(10); for (int j = 0; j < numEntries; j++) { Add(RandomNonEmptyString(), RandomNonEmptyString(), random.nextBoolean()); } SynonymMap map = b.Build(); bool ignoreCase = random.nextBoolean(); Analyzer analyzer = Analyzer.NewAnonymous(createComponents: (fieldName, reader) => { Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.SIMPLE, true); TokenStream syns = new SynonymFilter(tokenizer, map, ignoreCase); TokenStream graph = new MockGraphTokenFilter(Random, syns); return(new TokenStreamComponents(tokenizer, graph)); }); CheckRandomData(random, analyzer, 100); } }
public virtual void TestRandomHuge() { Random random = Random; int numIters = AtLeast(3); for (int i = 0; i < numIters; i++) { b = new SynonymMap.Builder(random.NextBoolean()); int numEntries = AtLeast(10); if (Verbose) { Console.WriteLine("TEST: iter=" + i + " numEntries=" + numEntries); } for (int j = 0; j < numEntries; j++) { Add(RandomNonEmptyString(), RandomNonEmptyString(), random.NextBoolean()); } SynonymMap map = b.Build(); bool ignoreCase = random.NextBoolean(); Analyzer analyzer = Analyzer.NewAnonymous(createComponents: (fieldName, reader) => { Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.SIMPLE, true); return(new TokenStreamComponents(tokenizer, new SynonymFilter(tokenizer, map, ignoreCase))); }); CheckRandomData(random, analyzer, 100, 1024); } }
public virtual void TestBasic2() { b = new SynonymMap.Builder(true); const bool keepOrig = false; Add("aaa", "aaaa1 aaaa2 aaaa3", keepOrig); Add("bbb", "bbbb1 bbbb2", keepOrig); tokensIn = new MockTokenizer(new StringReader("a"), MockTokenizer.WHITESPACE, true); tokensIn.Reset(); assertTrue(tokensIn.IncrementToken()); assertFalse(tokensIn.IncrementToken()); tokensIn.End(); tokensIn.Dispose(); tokensOut = new SynonymFilter(tokensIn, b.Build(), true); termAtt = tokensOut.AddAttribute <ICharTermAttribute>(); posIncrAtt = tokensOut.AddAttribute <IPositionIncrementAttribute>(); posLenAtt = tokensOut.AddAttribute <IPositionLengthAttribute>(); offsetAtt = tokensOut.AddAttribute <IOffsetAttribute>(); #pragma warning disable 162 if (keepOrig) { Verify("xyzzy bbb pot of gold", "xyzzy bbb/bbbb1 pot/bbbb2 of gold"); Verify("xyzzy aaa pot of gold", "xyzzy aaa/aaaa1 pot/aaaa2 of/aaaa3 gold"); } else { Verify("xyzzy bbb pot of gold", "xyzzy bbbb1 pot/bbbb2 of gold"); Verify("xyzzy aaa pot of gold", "xyzzy aaaa1 pot/aaaa2 of/aaaa3 gold"); } #pragma warning restore 612, 618 }
public virtual void TestIncludeOrig() { b = new SynonymMap.Builder(true); const bool keepOrig = true; Add("a b", "ab", keepOrig); Add("a c", "ac", keepOrig); Add("a", "aa", keepOrig); Add("b", "bb", keepOrig); Add("z x c v", "zxcv", keepOrig); Add("x c", "xc", keepOrig); SynonymMap map = b.Build(); Analyzer a = Analyzer.NewAnonymous(createComponents: (fieldName, reader) => { Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); return(new TokenStreamComponents(tokenizer, new SynonymFilter(tokenizer, map, true))); }); AssertAnalyzesTo(a, "$", new string[] { "$" }, new int[] { 1 }); AssertAnalyzesTo(a, "a", new string[] { "a", "aa" }, new int[] { 1, 0 }); AssertAnalyzesTo(a, "a", new string[] { "a", "aa" }, new int[] { 1, 0 }); AssertAnalyzesTo(a, "$ a", new string[] { "$", "a", "aa" }, new int[] { 1, 1, 0 }); AssertAnalyzesTo(a, "a $", new string[] { "a", "aa", "$" }, new int[] { 1, 0, 1 }); AssertAnalyzesTo(a, "$ a !", new string[] { "$", "a", "aa", "!" }, new int[] { 1, 1, 0, 1 }); AssertAnalyzesTo(a, "a a", new string[] { "a", "aa", "a", "aa" }, new int[] { 1, 0, 1, 0 }); AssertAnalyzesTo(a, "b", new string[] { "b", "bb" }, new int[] { 1, 0 }); AssertAnalyzesTo(a, "z x c v", new string[] { "z", "zxcv", "x", "c", "v" }, new int[] { 1, 0, 1, 1, 1 }); AssertAnalyzesTo(a, "z x c $", new string[] { "z", "x", "xc", "c", "$" }, new int[] { 1, 1, 0, 1, 1 }); }
public virtual void TestIncludeOrig() { b = new SynonymMap.Builder(true); const bool keepOrig = true; Add("a b", "ab", keepOrig); Add("a c", "ac", keepOrig); Add("a", "aa", keepOrig); Add("b", "bb", keepOrig); Add("z x c v", "zxcv", keepOrig); Add("x c", "xc", keepOrig); SynonymMap map = b.Build(); Analyzer a = new AnalyzerAnonymousInnerClassHelper11(this, map); AssertAnalyzesTo(a, "$", new string[] { "$" }, new int[] { 1 }); AssertAnalyzesTo(a, "a", new string[] { "a", "aa" }, new int[] { 1, 0 }); AssertAnalyzesTo(a, "a", new string[] { "a", "aa" }, new int[] { 1, 0 }); AssertAnalyzesTo(a, "$ a", new string[] { "$", "a", "aa" }, new int[] { 1, 1, 0 }); AssertAnalyzesTo(a, "a $", new string[] { "a", "aa", "$" }, new int[] { 1, 0, 1 }); AssertAnalyzesTo(a, "$ a !", new string[] { "$", "a", "aa", "!" }, new int[] { 1, 1, 0, 1 }); AssertAnalyzesTo(a, "a a", new string[] { "a", "aa", "a", "aa" }, new int[] { 1, 0, 1, 0 }); AssertAnalyzesTo(a, "b", new string[] { "b", "bb" }, new int[] { 1, 0 }); AssertAnalyzesTo(a, "z x c v", new string[] { "z", "zxcv", "x", "c", "v" }, new int[] { 1, 0, 1, 1, 1 }); AssertAnalyzesTo(a, "z x c $", new string[] { "z", "x", "xc", "c", "$" }, new int[] { 1, 1, 0, 1, 1 }); }
public virtual void TestMaxPosition3WithSynomyms() { foreach (bool consumeAll in new bool[] { true, false }) { MockTokenizer tokenizer = new MockTokenizer(new StringReader("one two three four five"), MockTokenizer.WHITESPACE, false); // if we are consuming all tokens, we can use the checks, otherwise we can't tokenizer.EnableChecks = consumeAll; SynonymMap.Builder builder = new SynonymMap.Builder(true); builder.Add(new CharsRef("one"), new CharsRef("first"), true); builder.Add(new CharsRef("one"), new CharsRef("alpha"), true); builder.Add(new CharsRef("one"), new CharsRef("beguine"), true); CharsRef multiWordCharsRef = new CharsRef(); SynonymMap.Builder.Join(new string[] { "and", "indubitably", "single", "only" }, multiWordCharsRef); builder.Add(new CharsRef("one"), multiWordCharsRef, true); SynonymMap.Builder.Join(new string[] { "dopple", "ganger" }, multiWordCharsRef); builder.Add(new CharsRef("two"), multiWordCharsRef, true); SynonymMap synonymMap = builder.Build(); TokenStream stream = new SynonymFilter(tokenizer, synonymMap, true); stream = new LimitTokenPositionFilter(stream, 3, consumeAll); // "only", the 4th word of multi-word synonym "and indubitably single only" is not emitted, since its position is greater than 3. AssertTokenStreamContents(stream, new string[] { "one", "first", "alpha", "beguine", "and", "two", "indubitably", "dopple", "three", "single", "ganger" }, new int[] { 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0 }); } }
public virtual void TestBasic() { b = new SynonymMap.Builder(true); Add("a", "foo", true); Add("a b", "bar fee", true); Add("b c", "dog collar", true); Add("c d", "dog harness holder extras", true); Add("m c e", "dog barks loudly", false); Add("i j k", "feep", true); Add("e f", "foo bar", false); Add("e f", "baz bee", false); Add("z", "boo", false); Add("y", "bee", true); tokensIn = new MockTokenizer(new StringReader("a"), MockTokenizer.WHITESPACE, true); tokensIn.Reset(); assertTrue(tokensIn.IncrementToken()); assertFalse(tokensIn.IncrementToken()); tokensIn.End(); tokensIn.Dispose(); tokensOut = new SynonymFilter(tokensIn, b.Build(), true); termAtt = tokensOut.AddAttribute <ICharTermAttribute>(); posIncrAtt = tokensOut.AddAttribute <IPositionIncrementAttribute>(); posLenAtt = tokensOut.AddAttribute <IPositionLengthAttribute>(); offsetAtt = tokensOut.AddAttribute <IOffsetAttribute>(); Verify("a b c", "a/bar b/fee c"); // syn output extends beyond input tokens Verify("x a b c d", "x a/bar b/fee c/dog d/harness holder extras"); Verify("a b a", "a/bar b/fee a/foo"); // outputs that add to one another: Verify("c d c d", "c/dog d/harness c/holder/dog d/extras/harness holder extras"); // two outputs for same input Verify("e f", "foo/baz bar/bee"); // verify multi-word / single-output offsets: Verify("g i j k g", "g i/feep:7_3 j k g"); // mixed keepOrig true/false: Verify("a m c e x", "a/foo dog barks loudly x"); Verify("c d m c e x", "c/dog d/harness holder/dog extras/barks loudly x"); assertTrue(tokensOut.CaptureCount > 0); // no captureStates when no syns matched Verify("p q r s t", "p q r s t"); assertEquals(0, tokensOut.CaptureCount); // no captureStates when only single-input syns, w/ no // lookahead needed, matched Verify("p q z y t", "p q boo y/bee t"); assertEquals(0, tokensOut.CaptureCount); }
public virtual void TestMultiwordOffsets() { b = new SynonymMap.Builder(true); const bool keepOrig = true; Add("national hockey league", "nhl", keepOrig); SynonymMap map = b.Build(); Analyzer a = new AnalyzerAnonymousInnerClassHelper14(this, map); AssertAnalyzesTo(a, "national hockey league", new string[] { "national", "nhl", "hockey", "league" }, new int[] { 0, 0, 9, 16 }, new int[] { 8, 22, 15, 22 }, new int[] { 1, 0, 1, 1 }); }
public virtual void TestRecursion() { b = new SynonymMap.Builder(true); const bool keepOrig = false; Add("zoo", "zoo", keepOrig); SynonymMap map = b.Build(); Analyzer a = new AnalyzerAnonymousInnerClassHelper9(this, map); AssertAnalyzesTo(a, "zoo zoo $ zoo", new string[] { "zoo", "zoo", "$", "zoo" }, new int[] { 1, 1, 1, 1 }); }
public virtual void TestRepeatsOn() { b = new SynonymMap.Builder(false); const bool keepOrig = false; Add("a b", "ab", keepOrig); Add("a b", "ab", keepOrig); Add("a b", "ab", keepOrig); SynonymMap map = b.Build(); Analyzer a = new AnalyzerAnonymousInnerClassHelper8(this, map); AssertAnalyzesTo(a, "a b", new string[] { "ab", "ab", "ab" }, new int[] { 1, 0, 0 }); }
public virtual void TestMultiwordOffsets() { b = new SynonymMap.Builder(true); const bool keepOrig = true; Add("national hockey league", "nhl", keepOrig); SynonymMap map = b.Build(); Analyzer a = Analyzer.NewAnonymous(createComponents: (fieldName, reader) => { Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); return(new TokenStreamComponents(tokenizer, new SynonymFilter(tokenizer, map, true))); }); AssertAnalyzesTo(a, "national hockey league", new string[] { "national", "nhl", "hockey", "league" }, new int[] { 0, 0, 9, 16 }, new int[] { 8, 22, 15, 22 }, new int[] { 1, 0, 1, 1 }); }
public virtual void TestRecursion3() { b = new SynonymMap.Builder(true); const bool keepOrig = true; Add("zoo zoo", "zoo", keepOrig); SynonymMap map = b.Build(); Analyzer a = Analyzer.NewAnonymous(createComponents: (fieldName, reader) => { Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); return(new TokenStreamComponents(tokenizer, new SynonymFilter(tokenizer, map, true))); }); AssertAnalyzesTo(a, "zoo zoo $ zoo", new string[] { "zoo", "zoo", "zoo", "$", "zoo" }, new int[] { 1, 0, 1, 1, 1 }); }
public virtual void TestDontKeepOrig() { b = new SynonymMap.Builder(true); Add("a b", "foo", false); SynonymMap map = b.Build(); Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper(this, map); AssertAnalyzesTo(analyzer, "a b c", new string[] { "foo", "c" }, new int[] { 0, 4 }, new int[] { 3, 5 }, null, new int[] { 1, 1 }, new int[] { 1, 1 }, true); CheckAnalysisConsistency(Random(), analyzer, false, "a b c"); }
public virtual void TestRandom2() { int numIters = AtLeast(3); for (int i = 0; i < numIters; i++) { b = new SynonymMap.Builder(Random().nextBoolean()); int numEntries = AtLeast(10); for (int j = 0; j < numEntries; j++) { Add(RandomNonEmptyString(), RandomNonEmptyString(), Random().nextBoolean()); } SynonymMap map = b.Build(); bool ignoreCase = Random().nextBoolean(); Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper100(this, map, ignoreCase); CheckRandomData(Random(), analyzer, 100); } }
public virtual void TestEmptyTerm() { Random random = Random(); int numIters = AtLeast(10); for (int i = 0; i < numIters; i++) { b = new SynonymMap.Builder(random.nextBoolean()); int numEntries = AtLeast(10); for (int j = 0; j < numEntries; j++) { Add(RandomNonEmptyString(), RandomNonEmptyString(), random.nextBoolean()); } SynonymMap map = b.Build(); bool ignoreCase = random.nextBoolean(); Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper3(this, map, ignoreCase); CheckAnalysisConsistency(random, analyzer, random.nextBoolean(), ""); } }
public virtual void TestDontKeepOrig() { b = new SynonymMap.Builder(true); Add("a b", "foo", false); SynonymMap map = b.Build(); Analyzer analyzer = Analyzer.NewAnonymous(createComponents: (fieldName, reader) => { Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.SIMPLE, true); return(new TokenStreamComponents(tokenizer, new SynonymFilter(tokenizer, map, false))); }); AssertAnalyzesTo(analyzer, "a b c", new string[] { "foo", "c" }, new int[] { 0, 4 }, new int[] { 3, 5 }, null, new int[] { 1, 1 }, new int[] { 1, 1 }, true); CheckAnalysisConsistency(Random, analyzer, false, "a b c"); }
public virtual void TestOutputHangsOffEnd() { b = new SynonymMap.Builder(true); const bool keepOrig = false; // b hangs off the end (no input token under it): Add("a", "a b", keepOrig); tokensIn = new MockTokenizer(new StringReader("a"), MockTokenizer.WHITESPACE, true); tokensIn.Reset(); assertTrue(tokensIn.IncrementToken()); assertFalse(tokensIn.IncrementToken()); tokensIn.End(); tokensIn.Dispose(); tokensOut = new SynonymFilter(tokensIn, b.Build(), true); termAtt = tokensOut.AddAttribute <ICharTermAttribute>(); posIncrAtt = tokensOut.AddAttribute <IPositionIncrementAttribute>(); offsetAtt = tokensOut.AddAttribute <IOffsetAttribute>(); posLenAtt = tokensOut.AddAttribute <IPositionLengthAttribute>(); // Make sure endOffset inherits from previous input token: Verify("a", "a b:1"); }
public virtual void TestRandomStrings() { int numIters = AtLeast(10); for (int i = 0; i < numIters; i++) { SynonymMap.Builder b = new SynonymMap.Builder(Random().nextBoolean()); int numEntries = AtLeast(10); for (int j = 0; j < numEntries; j++) { Add(b, RandomNonEmptyString(), RandomNonEmptyString(), Random().nextBoolean()); } SynonymMap map = b.Build(); bool ignoreCase = Random().nextBoolean(); Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper(this, map, ignoreCase); CheckRandomData(Random(), analyzer, 200); } }
public virtual void TestBasic() { b = new SynonymMap.Builder(true); Add("a", "foo", true); Add("a b", "bar fee", true); Add("b c", "dog collar", true); Add("c d", "dog harness holder extras", true); Add("m c e", "dog barks loudly", false); Add("i j k", "feep", true); Add("e f", "foo bar", false); Add("e f", "baz bee", false); Add("z", "boo", false); Add("y", "bee", true); tokensIn = new MockTokenizer(new StringReader("a"), MockTokenizer.WHITESPACE, true); tokensIn.Reset(); assertTrue(tokensIn.IncrementToken()); assertFalse(tokensIn.IncrementToken()); tokensIn.End(); tokensIn.Dispose(); tokensOut = new SynonymFilter(tokensIn, b.Build(), true); termAtt = tokensOut.AddAttribute<ICharTermAttribute>(); posIncrAtt = tokensOut.AddAttribute<IPositionIncrementAttribute>(); posLenAtt = tokensOut.AddAttribute<IPositionLengthAttribute>(); offsetAtt = tokensOut.AddAttribute<IOffsetAttribute>(); Verify("a b c", "a/bar b/fee c"); // syn output extends beyond input tokens Verify("x a b c d", "x a/bar b/fee c/dog d/harness holder extras"); Verify("a b a", "a/bar b/fee a/foo"); // outputs that add to one another: Verify("c d c d", "c/dog d/harness c/holder/dog d/extras/harness holder extras"); // two outputs for same input Verify("e f", "foo/baz bar/bee"); // verify multi-word / single-output offsets: Verify("g i j k g", "g i/feep:7_3 j k g"); // mixed keepOrig true/false: Verify("a m c e x", "a/foo dog barks loudly x"); Verify("c d m c e x", "c/dog d/harness holder/dog extras/barks loudly x"); assertTrue(tokensOut.CaptureCount > 0); // no captureStates when no syns matched Verify("p q r s t", "p q r s t"); assertEquals(0, tokensOut.CaptureCount); // no captureStates when only single-input syns, w/ no // lookahead needed, matched Verify("p q z y t", "p q boo y/bee t"); assertEquals(0, tokensOut.CaptureCount); }
public virtual void TestDoKeepOrig() { b = new SynonymMap.Builder(true); Add("a b", "foo", true); SynonymMap map = b.Build(); Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper2(this, map); AssertAnalyzesTo(analyzer, "a b c", new string[] { "a", "foo", "b", "c" }, new int[] { 0, 0, 2, 4 }, new int[] { 1, 3, 3, 5 }, null, new int[] { 1, 0, 1, 1 }, new int[] { 1, 2, 1, 1 }, true); CheckAnalysisConsistency(Random(), analyzer, false, "a b c"); }
public virtual void TestRandom() { int alphabetSize = TestUtil.NextInt32(Random, 2, 7); int docLen = AtLeast(3000); //final int docLen = 50; string document = GetRandomString('a', alphabetSize, docLen); if (Verbose) { Console.WriteLine("TEST: doc=" + document); } int numSyn = AtLeast(5); //final int numSyn = 2; IDictionary <string, OneSyn> synMap = new Dictionary <string, OneSyn>(); IList <OneSyn> syns = new JCG.List <OneSyn>(); bool dedup = Random.nextBoolean(); if (Verbose) { Console.WriteLine(" dedup=" + dedup); } b = new SynonymMap.Builder(dedup); for (int synIDX = 0; synIDX < numSyn; synIDX++) { string synIn = GetRandomString('a', alphabetSize, TestUtil.NextInt32(Random, 1, 5)).Trim(); if (!synMap.TryGetValue(synIn, out OneSyn s) || s is null) { s = new OneSyn(); s.@in = synIn; syns.Add(s); s.@out = new JCG.List <string>(); synMap[synIn] = s; s.keepOrig = Random.nextBoolean(); } string synOut = GetRandomString('0', 10, TestUtil.NextInt32(Random, 1, 5)).Trim(); [email protected](synOut); Add(synIn, synOut, s.keepOrig); if (Verbose) { Console.WriteLine(" syns[" + synIDX + "] = " + s.@in + " -> " + s.@out + " keepOrig=" + s.keepOrig); } } tokensIn = new MockTokenizer(new StringReader("a"), MockTokenizer.WHITESPACE, true); tokensIn.Reset(); assertTrue(tokensIn.IncrementToken()); assertFalse(tokensIn.IncrementToken()); tokensIn.End(); tokensIn.Dispose(); tokensOut = new SynonymFilter(tokensIn, b.Build(), true); termAtt = tokensOut.AddAttribute <ICharTermAttribute>(); posIncrAtt = tokensOut.AddAttribute <IPositionIncrementAttribute>(); posLenAtt = tokensOut.AddAttribute <IPositionLengthAttribute>(); offsetAtt = tokensOut.AddAttribute <IOffsetAttribute>(); if (dedup) { PruneDups(syns); } string expected = SlowSynMatcher(document, syns, 5); if (Verbose) { Console.WriteLine("TEST: expected=" + expected); } Verify(document, expected); }
public virtual void TestRandom() { int alphabetSize = TestUtil.NextInt(Random(), 2, 7); int docLen = AtLeast(3000); //final int docLen = 50; string document = GetRandomString('a', alphabetSize, docLen); if (VERBOSE) { Console.WriteLine("TEST: doc=" + document); } int numSyn = AtLeast(5); //final int numSyn = 2; IDictionary<string, OneSyn> synMap = new Dictionary<string, OneSyn>(); IList<OneSyn> syns = new List<OneSyn>(); bool dedup = Random().nextBoolean(); if (VERBOSE) { Console.WriteLine(" dedup=" + dedup); } b = new SynonymMap.Builder(dedup); for (int synIDX = 0; synIDX < numSyn; synIDX++) { string synIn = GetRandomString('a', alphabetSize, TestUtil.NextInt(Random(), 1, 5)).Trim(); OneSyn s = synMap.ContainsKey(synIn) ? synMap[synIn] : null; if (s == null) { s = new OneSyn(); s.@in = synIn; syns.Add(s); s.@out = new List<string>(); synMap[synIn] = s; s.keepOrig = Random().nextBoolean(); } string synOut = GetRandomString('0', 10, TestUtil.NextInt(Random(), 1, 5)).Trim(); [email protected](synOut); Add(synIn, synOut, s.keepOrig); if (VERBOSE) { Console.WriteLine(" syns[" + synIDX + "] = " + s.@in + " -> " + s.@out + " keepOrig=" + s.keepOrig); } } tokensIn = new MockTokenizer(new StringReader("a"), MockTokenizer.WHITESPACE, true); tokensIn.Reset(); assertTrue(tokensIn.IncrementToken()); assertFalse(tokensIn.IncrementToken()); tokensIn.End(); tokensIn.Dispose(); tokensOut = new SynonymFilter(tokensIn, b.Build(), true); termAtt = tokensOut.AddAttribute<ICharTermAttribute>(); posIncrAtt = tokensOut.AddAttribute<IPositionIncrementAttribute>(); posLenAtt = tokensOut.AddAttribute<IPositionLengthAttribute>(); offsetAtt = tokensOut.AddAttribute<IOffsetAttribute>(); if (dedup) { PruneDups(syns); } string expected = SlowSynMatcher(document, syns, 5); if (VERBOSE) { Console.WriteLine("TEST: expected=" + expected); } Verify(document, expected); }
public virtual void TestRandom2GraphAfter() { int numIters = AtLeast(3); Random random = Random(); for (int i = 0; i < numIters; i++) { b = new SynonymMap.Builder(random.nextBoolean()); int numEntries = AtLeast(10); for (int j = 0; j < numEntries; j++) { Add(RandomNonEmptyString(), RandomNonEmptyString(), random.nextBoolean()); } SynonymMap map = b.Build(); bool ignoreCase = random.nextBoolean(); Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper101(this, map, ignoreCase); CheckRandomData(random, analyzer, 100); } }
public virtual void TestBasic2() { b = new SynonymMap.Builder(true); const bool keepOrig = false; Add("aaa", "aaaa1 aaaa2 aaaa3", keepOrig); Add("bbb", "bbbb1 bbbb2", keepOrig); tokensIn = new MockTokenizer(new StringReader("a"), MockTokenizer.WHITESPACE, true); tokensIn.Reset(); assertTrue(tokensIn.IncrementToken()); assertFalse(tokensIn.IncrementToken()); tokensIn.End(); tokensIn.Dispose(); tokensOut = new SynonymFilter(tokensIn, b.Build(), true); termAtt = tokensOut.AddAttribute<ICharTermAttribute>(); posIncrAtt = tokensOut.AddAttribute<IPositionIncrementAttribute>(); posLenAtt = tokensOut.AddAttribute<IPositionLengthAttribute>(); offsetAtt = tokensOut.AddAttribute<IOffsetAttribute>(); #pragma warning disable 162 if (keepOrig) { Verify("xyzzy bbb pot of gold", "xyzzy bbb/bbbb1 pot/bbbb2 of gold"); Verify("xyzzy aaa pot of gold", "xyzzy aaa/aaaa1 pot/aaaa2 of/aaaa3 gold"); } else { Verify("xyzzy bbb pot of gold", "xyzzy bbbb1 pot/bbbb2 of gold"); Verify("xyzzy aaa pot of gold", "xyzzy aaaa1 pot/aaaa2 of/aaaa3 gold"); } #pragma warning restore 612, 618 }
public virtual void TestOutputHangsOffEnd() { b = new SynonymMap.Builder(true); const bool keepOrig = false; // b hangs off the end (no input token under it): Add("a", "a b", keepOrig); tokensIn = new MockTokenizer(new StringReader("a"), MockTokenizer.WHITESPACE, true); tokensIn.Reset(); assertTrue(tokensIn.IncrementToken()); assertFalse(tokensIn.IncrementToken()); tokensIn.End(); tokensIn.Dispose(); tokensOut = new SynonymFilter(tokensIn, b.Build(), true); termAtt = tokensOut.AddAttribute<ICharTermAttribute>(); posIncrAtt = tokensOut.AddAttribute<IPositionIncrementAttribute>(); offsetAtt = tokensOut.AddAttribute<IOffsetAttribute>(); posLenAtt = tokensOut.AddAttribute<IPositionLengthAttribute>(); // Make sure endOffset inherits from previous input token: Verify("a", "a b:1"); }