コード例 #1
0
        public void TestEncode(String name, String phoneticExpected, NameType nameType,
                               RuleType ruleType, bool concat, int maxPhonemes)
        {
            PhoneticEngine engine = new PhoneticEngine(nameType, ruleType, concat, maxPhonemes);

            String phoneticActual = engine.Encode(name);

            //System.err.println("expecting: " + this.phoneticExpected);
            //System.err.println("actual:    " + phoneticActual);
            Assert.AreEqual(phoneticExpected, phoneticActual, "phoneme incorrect");

            if (concat)
            {
                String[] split = new Regex("\\|").Split(phoneticActual).TrimEnd();
                Assert.True(split.Length <= maxPhonemes);
            }
            else
            {
                String[] words = phoneticActual.Split('-').TrimEnd();
                foreach (String word in words)
                {
                    String[] split = new Regex("\\|").Split(word).TrimEnd();
                    Assert.True(split.Length <= maxPhonemes);
                }
            }
        }
コード例 #2
0
        /**
         * This code is similar in style to code found in Solr:
         * solr/core/src/java/org/apache/solr/analysis/BeiderMorseFilterFactory.java
         *
         * Making a JUnit test out of it to protect Solr from possible future
         * regressions in Commons-Codec.
         */
        private static String Encode(IDictionary <String, String> args, bool concat, String input)
        {
            LanguageSet    languageSet;
            PhoneticEngine engine;

            // PhoneticEngine = NameType + RuleType + concat
            // we use common-codec's defaults: GENERIC + APPROX + true
            String nameTypeArg;

            args.TryGetValue("nameType", out nameTypeArg);
            NameType nameType = (nameTypeArg == null) ? NameType.GENERIC : (NameType)Enum.Parse(typeof(NameType), nameTypeArg, true);

            String ruleTypeArg;

            args.TryGetValue("ruleType", out ruleTypeArg);
            RuleType ruleType = (ruleTypeArg == null) ? RuleType.APPROX : (RuleType)Enum.Parse(typeof(RuleType), ruleTypeArg, true);

            engine = new PhoneticEngine(nameType, ruleType, concat);

            // LanguageSet: defaults to automagic, otherwise a comma-separated list.
            String languageSetArg;

            args.TryGetValue("languageSet", out languageSetArg);
            if (languageSetArg == null || languageSetArg.equals("auto"))
            {
                languageSet = null;
            }
            else
            {
                languageSet = LanguageSet.From(new HashSet <String>(Arrays.AsList(languageSetArg.Split(',').TrimEnd())));
            }

            /*
             *  org/apache/lucene/analysis/phonetic/BeiderMorseFilter.java (lines 96-98) does this:
             *
             *  encoded = (languages == null)
             *      ? engine.encode(termAtt.toString())
             *      : engine.encode(termAtt.toString(), languages);
             *
             *  Hence our approach, below:
             */
            if (languageSet == null)
            {
                return(engine.Encode(input));
            }
            else
            {
                return(engine.Encode(input, languageSet));
            }
        }
コード例 #3
0
        public void Test()
        {
            PhoneticEngine engine      = new PhoneticEngine(NameType.GENERIC, RuleType.APPROX, true);
            String         input       = "Angelo";
            long           startMillis = DateTime.UtcNow.Ticks;

            for (int i = 0; i < LOOP; i++)
            {
                engine.Encode(input);
            }
            long totalMillis = DateTime.UtcNow.Ticks - startMillis;

            Console.WriteLine(String.Format("Time for encoding {0} times the input '{1}': {2} millis.", LOOP, input, totalMillis));
        }
コード例 #4
0
        public void Test()
        {
            PhoneticEngine engine = new PhoneticEngine(NameType.GENERIC, RuleType.APPROX, true);
            string         input  = "Angelo";
            var            sw     = new Stopwatch();

            sw.Start();
            for (int i = 0; i < LOOP; i++)
            {
                engine.Encode(input);
            }
            sw.Stop();
            long totalMillis = sw.ElapsedMilliseconds;

            Console.WriteLine(string.Format("Time for encoding {0} times the input '{1}': {2} millis.", LOOP, input, totalMillis));
        }
コード例 #5
0
        /**
         * This code is similar in style to code found in Solr:
         * solr/core/src/java/org/apache/solr/analysis/BeiderMorseFilterFactory.java
         *
         * Making a JUnit test out of it to protect Solr from possible future
         * regressions in Commons-Codec.
         */
        private static string Encode(IDictionary <string, string> args, bool concat, string input)
        {
            LanguageSet    languageSet;
            PhoneticEngine engine;

            // PhoneticEngine = NameType + RuleType + concat
            // we use common-codec's defaults: GENERIC + APPROX + true
            args.TryGetValue("nameType", out string nameTypeArg);
            NameType nameType = (nameTypeArg is null) ? NameType.GENERIC : (NameType)Enum.Parse(typeof(NameType), nameTypeArg, true);

            args.TryGetValue("ruleType", out string ruleTypeArg);
            RuleType ruleType = (ruleTypeArg is null) ? RuleType.APPROX : (RuleType)Enum.Parse(typeof(RuleType), ruleTypeArg, true);

            engine = new PhoneticEngine(nameType, ruleType, concat);

            // LanguageSet: defaults to automagic, otherwise a comma-separated list.
            args.TryGetValue("languageSet", out string languageSetArg);
            if (languageSetArg is null || languageSetArg.Equals("auto", StringComparison.Ordinal))
            {
                languageSet = null;
            }