public virtual void TestBigramTokenizer() { SlowSynonymMap synMap; // prepare bi-gram tokenizer factory IDictionary <string, string> args = new Dictionary <string, string>(); args[AbstractAnalysisFactory.LUCENE_MATCH_VERSION_PARAM] = "4.4"; args["minGramSize"] = "2"; args["maxGramSize"] = "2"; TokenizerFactory tf = new NGramTokenizerFactory(args); // (ab)->(bc)->(cd)->[ef][fg][gh] IList <string> rules = new List <string>(); rules.Add("abcd=>efgh"); synMap = new SlowSynonymMap(true); SlowSynonymFilterFactory.ParseRules(rules, synMap, "=>", ",", true, tf); assertEquals(1, synMap.Submap.size()); assertEquals(1, GetSubSynonymMap(synMap, "ab").Submap.size()); assertEquals(1, GetSubSynonymMap(GetSubSynonymMap(synMap, "ab"), "bc").Submap.size()); AssertTokIncludes(GetSubSynonymMap(GetSubSynonymMap(synMap, "ab"), "bc"), "cd", "ef"); AssertTokIncludes(GetSubSynonymMap(GetSubSynonymMap(synMap, "ab"), "bc"), "cd", "fg"); AssertTokIncludes(GetSubSynonymMap(GetSubSynonymMap(synMap, "ab"), "bc"), "cd", "gh"); }
public virtual void TestMultiWordSynonymsOld() { IList<string> rules = new JCG.List<string>(); rules.Add("a b c,d"); SlowSynonymMap synMap = new SlowSynonymMap(true); SlowSynonymFilterFactory.ParseRules(rules, synMap, "=>", ",", true, null); SlowSynonymFilter ts = new SlowSynonymFilter(new MockTokenizer(new StringReader("a e"), MockTokenizer.WHITESPACE, false), synMap); // This fails because ["e","e"] is the value of the token stream AssertTokenStreamContents(ts, new string[] { "a", "e" }); }
public virtual void TestRead1waySynonymRules() { SlowSynonymMap synMap; // (a)->[a] // (b)->[a] IList <string> rules = new List <string>(); rules.Add("a,b"); synMap = new SlowSynonymMap(true); SlowSynonymFilterFactory.ParseRules(rules, synMap, "=>", ",", false, null); assertEquals(2, synMap.Submap.size()); AssertTokIncludes(synMap, "a", "a"); AssertTokIncludes(synMap, "b", "a"); // (a)->[a] // (b)->[a] // (c)->[a] rules.Clear(); rules.Add("a,b,c"); synMap = new SlowSynonymMap(true); SlowSynonymFilterFactory.ParseRules(rules, synMap, "=>", ",", false, null); assertEquals(3, synMap.Submap.size()); AssertTokIncludes(synMap, "a", "a"); AssertTokIncludes(synMap, "b", "a"); AssertTokIncludes(synMap, "c", "a"); // (a)->[a] // (b1)->(b2)->[a] rules.Clear(); rules.Add("a,b1 b2"); synMap = new SlowSynonymMap(true); SlowSynonymFilterFactory.ParseRules(rules, synMap, "=>", ",", false, null); assertEquals(2, synMap.Submap.size()); AssertTokIncludes(synMap, "a", "a"); assertEquals(1, GetSubSynonymMap(synMap, "b1").Submap.size()); AssertTokIncludes(GetSubSynonymMap(synMap, "b1"), "b2", "a"); // (a1)->(a2)->[a1][a2] // (b)->[a1][a2] rules.Clear(); rules.Add("a1 a2,b"); synMap = new SlowSynonymMap(true); SlowSynonymFilterFactory.ParseRules(rules, synMap, "=>", ",", false, null); assertEquals(2, synMap.Submap.size()); assertEquals(1, GetSubSynonymMap(synMap, "a1").Submap.size()); AssertTokIncludes(GetSubSynonymMap(synMap, "a1"), "a2", "a1"); AssertTokIncludes(GetSubSynonymMap(synMap, "a1"), "a2", "a2"); AssertTokIncludes(synMap, "b", "a1"); AssertTokIncludes(synMap, "b", "a2"); }
public virtual void TestInvalidMappingRules() { SlowSynonymMap synMap = new SlowSynonymMap(true); IList <string> rules = new List <string>(1); rules.Add("a=>b=>c"); try { SlowSynonymFilterFactory.ParseRules(rules, synMap, "=>", ",", true, null); fail("IllegalArgumentException must be thrown."); } catch (ArgumentException) { } }
public virtual void TestLoadRules() { IDictionary <string, string> args = new Dictionary <string, string>(); args["synonyms"] = "something.txt"; SlowSynonymFilterFactory ff = new SlowSynonymFilterFactory(args); ff.Inform(new ResourceLoaderAnonymousInnerClassHelper()); SlowSynonymMap synMap = ff.SynonymMap; assertEquals(2, synMap.Submap.size()); AssertTokIncludes(synMap, "a", "a"); AssertTokIncludes(synMap, "a", "b"); AssertTokIncludes(synMap, "b", "a"); AssertTokIncludes(synMap, "b", "b"); }
public virtual void TestReadMappingRules() { SlowSynonymMap synMap; // (a)->[b] IList <string> rules = new List <string>(); rules.Add("a=>b"); synMap = new SlowSynonymMap(true); SlowSynonymFilterFactory.ParseRules(rules, synMap, "=>", ",", true, null); assertEquals(1, synMap.Submap.size()); AssertTokIncludes(synMap, "a", "b"); // (a)->[c] // (b)->[c] rules.Clear(); rules.Add("a,b=>c"); synMap = new SlowSynonymMap(true); SlowSynonymFilterFactory.ParseRules(rules, synMap, "=>", ",", true, null); assertEquals(2, synMap.Submap.size()); AssertTokIncludes(synMap, "a", "c"); AssertTokIncludes(synMap, "b", "c"); // (a)->[b][c] rules.Clear(); rules.Add("a=>b,c"); synMap = new SlowSynonymMap(true); SlowSynonymFilterFactory.ParseRules(rules, synMap, "=>", ",", true, null); assertEquals(1, synMap.Submap.size()); AssertTokIncludes(synMap, "a", "b"); AssertTokIncludes(synMap, "a", "c"); // (a)->(b)->[a2] // [a1] rules.Clear(); rules.Add("a=>a1"); rules.Add("a b=>a2"); synMap = new SlowSynonymMap(true); SlowSynonymFilterFactory.ParseRules(rules, synMap, "=>", ",", true, null); assertEquals(1, synMap.Submap.size()); AssertTokIncludes(synMap, "a", "a1"); assertEquals(1, GetSubSynonymMap(synMap, "a").Submap.size()); AssertTokIncludes(GetSubSynonymMap(synMap, "a"), "b", "a2"); // (a)->(b)->[a2] // (c)->[a3] // [a1] rules.Clear(); rules.Add("a=>a1"); rules.Add("a b=>a2"); rules.Add("a c=>a3"); synMap = new SlowSynonymMap(true); SlowSynonymFilterFactory.ParseRules(rules, synMap, "=>", ",", true, null); assertEquals(1, synMap.Submap.size()); AssertTokIncludes(synMap, "a", "a1"); assertEquals(2, GetSubSynonymMap(synMap, "a").Submap.size()); AssertTokIncludes(GetSubSynonymMap(synMap, "a"), "b", "a2"); AssertTokIncludes(GetSubSynonymMap(synMap, "a"), "c", "a3"); // (a)->(b)->[a2] // [a1] // (b)->(c)->[b2] // [b1] rules.Clear(); rules.Add("a=>a1"); rules.Add("a b=>a2"); rules.Add("b=>b1"); rules.Add("b c=>b2"); synMap = new SlowSynonymMap(true); SlowSynonymFilterFactory.ParseRules(rules, synMap, "=>", ",", true, null); assertEquals(2, synMap.Submap.size()); AssertTokIncludes(synMap, "a", "a1"); assertEquals(1, GetSubSynonymMap(synMap, "a").Submap.size()); AssertTokIncludes(GetSubSynonymMap(synMap, "a"), "b", "a2"); AssertTokIncludes(synMap, "b", "b1"); assertEquals(1, GetSubSynonymMap(synMap, "b").Submap.size()); AssertTokIncludes(GetSubSynonymMap(synMap, "b"), "c", "b2"); }