예제 #1
0
        public void testDetector3()
        {
            Detector detect = DetectorFactory.Create();

            detect.Append("d e");
            Assert.AreEqual(detect.Detect(), "en");
        }
예제 #2
0
        public void testDetector4()
        {
            Detector detect = DetectorFactory.Create();

            detect.Append("\u3042\u3042\u3042\u3042a");
            Assert.AreEqual(detect.Detect(), "ja");
        }
예제 #3
0
        public void testDetector2()
        {
            Detector detect = DetectorFactory.Create();

            detect.Append("b d");
            Assert.AreEqual(detect.Detect(), "fr");
        }
예제 #4
0
        /// <summary>
        /// Language detection test for each file (--detectlang option)
        /// <pre>
        /// usage: --detectlang -d [profile directory] -a [alpha] -s [seed] [test file(s)]
        /// </pre>
        public void DetectLang()
        {
            if (LoadProfile())
            {
                return;
            }
            foreach (string filename in arglist)
            {
                StreamReader strm = null;
                try
                {
                    strm = new StreamReader(File.OpenRead(filename));

                    Detector detector = DetectorFactory.Create(GetDouble("alpha", DEFAULT_ALPHA));
                    if (HasOpt("--debug"))
                    {
                        detector.SetVerbose();
                    }
                    detector.Append(strm);
                    var probs = string.Join(" ", detector.GetProbabilities().Select((lang) => lang.ToString()));
                    Console.WriteLine("{0}: {1}", filename, probs);
                }
                catch (IOException e)
                {
                    Debug.WriteLine(e);
                }
                catch (LangDetectException e)
                {
                    Debug.WriteLine(e);
                }
                finally
                {
                    try
                    {
                        if (strm != null)
                        {
                            strm.Close();
                        }
                    }
                    catch (IOException e) { }
                }
            }
        }
예제 #5
0
        /// <summary>
        /// Batch Test of Language Detection (--batchtest option)
        /// <pre>
        /// usage: --batchtest -d [profile directory] -a [alpha] -s [seed] [test data(s)]
        /// </pre>
        /// The format of test data(s):
        /// <pre>
        ///   [correct language name]\t[text body for test]\n
        /// </pre>
        public void BatchTest()
        {
            if (LoadProfile())
            {
                return;
            }
            Dictionary <string, List <string> > result = new Dictionary <string, List <string> >();

            foreach (string filename in arglist)
            {
                StreamReader strm = null;
                try
                {
                    strm = new StreamReader(File.OpenRead(filename));
                    while (!strm.EndOfStream)
                    {
                        string line = strm.ReadLine();
                        int    idx  = line.IndexOf('\t');
                        if (idx <= 0)
                        {
                            continue;
                        }
                        string correctLang = line.Substring(0, idx);
                        string text        = line.Substring(idx + 1);

                        Detector detector = DetectorFactory.Create(GetDouble("alpha", DEFAULT_ALPHA));
                        detector.Append(text);
                        string lang = "";
                        try
                        {
                            lang = detector.Detect();
                        }
                        catch (Exception e)
                        {
                            Debug.WriteLine(e);
                        }
                        if (!result.ContainsKey(correctLang))
                        {
                            result[correctLang] = new List <string>();
                        }
                        result[correctLang].Add(lang);
                        if (HasOpt("--debug"))
                        {
                            Console.WriteLine(correctLang + "," + lang + "," + (text.Length > 100 ? text.Substring(0, 100) : text));
                        }
                    }
                }
                catch (IOException e)
                {
                    Debug.WriteLine(e);
                }
                catch (LangDetectException e)
                {
                    Debug.WriteLine(e);
                }
                finally
                {
                    try
                    {
                        if (strm != null)
                        {
                            strm.Close();
                        }
                    }
                    catch (IOException e) { }
                }

                List <string> langlist = new List <string>(result.Keys);
                langlist.Sort();

                int totalCount = 0, totalCorrect = 0;
                foreach (string lang in langlist)
                {
                    Dictionary <string, int> resultCount = new Dictionary <string, int>();
                    int           count = 0;
                    List <string> list  = result[lang];
                    foreach (string detectedLang in list)
                    {
                        ++count;
                        if (resultCount.ContainsKey(detectedLang))
                        {
                            resultCount[detectedLang] = resultCount[detectedLang] + 1;
                        }
                        else
                        {
                            resultCount[detectedLang] = 1;
                        }
                    }
                    int    correct = resultCount.ContainsKey(lang) ? resultCount[lang] : 0;
                    double rate    = correct / (double)count;
                    Console.WriteLine(string.Format("%s (%d/%d=%.2f): %s", lang, correct, count, rate, resultCount));
                    totalCorrect += correct;
                    totalCount   += count;
                }
                Console.WriteLine(string.Format("total: %d/%d = %.3f", totalCorrect, totalCount, totalCorrect / (double)totalCount));
            }
        }