예제 #1
0
 /// <summary>
 /// Detector instance can be constructed via etectorFactory#create().
 /// </summary>
 /// <param name="factory">DetectorFactory instance (only DetectorFactory inside)</param>
 public Detector(DetectorFactory factory)
 {
     this.wordLangProbMap = factory.wordLangProbMap;
     this.langlist        = factory.langlist;
     this.text            = new StringBuilder();
     this.seed            = factory.seed;
 }
예제 #2
0
        /// <summary>
        /// load profiles
        /// </summary>
        /// <returns>false if load success</returns>
        private bool loadProfile()
        {
            string profileDirectory = get("directory") + "/";

            try
            {
                DetectorFactory.loadProfile(profileDirectory);
                long?seed = getLong("seed");
                if (seed != null)
                {
                    DetectorFactory.setSeed(seed.Value);
                }
                return(false);
            }
            catch (LangDetectException e)
            {
                System.Console.WriteLine("ERROR: " + e.StackTrace);
                return(true);
            }
        }
예제 #3
0
        /// <summary>
        /// Language detection test for each file (--detectlang option)
        /// <para />
        /// usage: --detectlang -d [profile directory] -a [alpha] -s [seed] [test file(s)]
        /// </summary>
        public void detectLang()
        {
            if (loadProfile())
            {
                return;
            }
            foreach (string filename in arglist)
            {
                using (StreamReader _is = new StreamReader(filename, System.Text.Encoding.UTF8))
                {
                    Detector detector = DetectorFactory.create(getDouble("alpha", DEFAULT_ALPHA));
                    if (hasOpt("--debug"))
                    {
                        detector.setVerbose();
                    }

                    detector.append(_is);
                    System.Console.WriteLine(filename + ":" + detector.getProbabilities());
                }
            }
        }
예제 #4
0
        /// <summary>
        /// Batch Test of Language Detection (--batchtest option)
        /// <para />
        /// usage: --batchtest -d [profile directory] -a [alpha] -s [seed] [test data(s)]
        /// <para />
        /// The format of test data(s):
        /// <para />
        ///   [correct language name]\t[text body for test]\n
        /// </summary>
        public void batchTest()
        {
            if (loadProfile())
            {
                return;
            }
            IDictionary <string, IList <string> > result = new Dictionary <string, IList <string> >();

            foreach (string filename in arglist)
            {
                using (StreamReader _is = new StreamReader(filename, System.Text.Encoding.UTF8))
                {
                    while (!_is.EndOfStream)
                    {
                        string line = _is.ReadLine();
                        int    idx  = line.IndexOf('\t');
                        if (idx <= 0)
                        {
                            continue;
                        }
                        string correctLang = line.Substring(0, idx);
                        string text        = line.Substring(idx + 1);

                        Detector detector = DetectorFactory.create(getDouble("alpha", DEFAULT_ALPHA));
                        detector.append(text);
                        string lang = "";

                        lang = detector.detect();

                        if (!result.ContainsKey(correctLang))
                        {
                            result[correctLang] = new List <string>();
                        }
                        result[correctLang].Add(lang);
                        if (hasOpt("--debug"))
                        {
                            System.Console.WriteLine(correctLang + "," + lang + "," + (text.Length > 100 ? text.Substring(0, 100) : text));
                        }
                    }

                    List <string> langlist = new List <string>(result.Keys);
                    langlist.Sort();

                    int totalCount = 0, totalCorrect = 0;
                    foreach (string lang in langlist)
                    {
                        IDictionary <string, int> resultCount = new Dictionary <string, int>();
                        int            count = 0;
                        IList <string> list  = result[lang];
                        foreach (string detectedLang in list)
                        {
                            ++count;
                            if (resultCount.ContainsKey(detectedLang))
                            {
                                ++resultCount[detectedLang];
                            }
                            else
                            {
                                resultCount[detectedLang] = 1;
                            }
                        }
                        int    correct = resultCount.ContainsKey(lang) ? resultCount[lang] : 0;
                        double rate    = correct / (double)count;
                        System.Console.WriteLine(string.Format("{0} ({1}/{2}={3:##}): {4}", lang, correct, count, rate, resultCount));
                        totalCorrect += correct;
                        totalCount   += count;
                    }
                    System.Console.WriteLine(string.Format("total: %d/%d = %.3f", totalCorrect, totalCount, totalCorrect / (double)totalCount));
                }
            }
        }