/** * This code is similar in style to code found in Solr: * solr/core/src/java/org/apache/solr/analysis/BeiderMorseFilterFactory.java * * Making a JUnit test out of it to protect Solr from possible future * regressions in Commons-Codec. */ private static String Encode(IDictionary <String, String> args, bool concat, String input) { LanguageSet languageSet; PhoneticEngine engine; // PhoneticEngine = NameType + RuleType + concat // we use common-codec's defaults: GENERIC + APPROX + true String nameTypeArg; args.TryGetValue("nameType", out nameTypeArg); NameType nameType = (nameTypeArg == null) ? NameType.GENERIC : (NameType)Enum.Parse(typeof(NameType), nameTypeArg, true); String ruleTypeArg; args.TryGetValue("ruleType", out ruleTypeArg); RuleType ruleType = (ruleTypeArg == null) ? RuleType.APPROX : (RuleType)Enum.Parse(typeof(RuleType), ruleTypeArg, true); engine = new PhoneticEngine(nameType, ruleType, concat); // LanguageSet: defaults to automagic, otherwise a comma-separated list. String languageSetArg; args.TryGetValue("languageSet", out languageSetArg); if (languageSetArg == null || languageSetArg.equals("auto")) { languageSet = null; } else { languageSet = LanguageSet.From(new HashSet <String>(Arrays.AsList(languageSetArg.Split(',').TrimEnd()))); } /* * org/apache/lucene/analysis/phonetic/BeiderMorseFilter.java (lines 96-98) does this: * * encoded = (languages == null) * ? engine.encode(termAtt.toString()) * : engine.encode(termAtt.toString(), languages); * * Hence our approach, below: */ if (languageSet == null) { return(engine.Encode(input)); } else { return(engine.Encode(input, languageSet)); } }
public void TestEncode(String name, String phoneticExpected, NameType nameType, RuleType ruleType, bool concat, int maxPhonemes) { PhoneticEngine engine = new PhoneticEngine(nameType, ruleType, concat, maxPhonemes); String phoneticActual = engine.Encode(name); //System.err.println("expecting: " + this.phoneticExpected); //System.err.println("actual: " + phoneticActual); Assert.AreEqual(phoneticExpected, phoneticActual, "phoneme incorrect"); if (concat) { String[] split = new Regex("\\|").Split(phoneticActual).TrimEnd(); Assert.True(split.Length <= maxPhonemes); } else { String[] words = phoneticActual.Split('-').TrimEnd(); foreach (String word in words) { String[] split = new Regex("\\|").Split(word).TrimEnd(); Assert.True(split.Length <= maxPhonemes); } } }
public void Test() { PhoneticEngine engine = new PhoneticEngine(NameType.GENERIC, RuleType.APPROX, true); String input = "Angelo"; long startMillis = DateTime.UtcNow.Ticks; for (int i = 0; i < LOOP; i++) { engine.Encode(input); } long totalMillis = DateTime.UtcNow.Ticks - startMillis; Console.WriteLine(String.Format("Time for encoding {0} times the input '{1}': {2} millis.", LOOP, input, totalMillis)); }
public void Test() { PhoneticEngine engine = new PhoneticEngine(NameType.GENERIC, RuleType.APPROX, true); string input = "Angelo"; var sw = new Stopwatch(); sw.Start(); for (int i = 0; i < LOOP; i++) { engine.Encode(input); } sw.Stop(); long totalMillis = sw.ElapsedMilliseconds; Console.WriteLine(string.Format("Time for encoding {0} times the input '{1}': {2} millis.", LOOP, input, totalMillis)); }