/// <summary> /// Language detection test for each file (--detectlang option) /// <para /> /// usage: --detectlang -d [profile directory] -a [alpha] -s [seed] [test file(s)] /// </summary> public void detectLang() { if (loadProfile()) { return; } foreach (string filename in arglist) { using (StreamReader _is = new StreamReader(filename, System.Text.Encoding.UTF8)) { Detector detector = DetectorFactory.create(getDouble("alpha", DEFAULT_ALPHA)); if (hasOpt("--debug")) { detector.setVerbose(); } detector.append(_is); System.Console.WriteLine(filename + ":" + detector.getProbabilities()); } } }
/// <summary> /// Batch Test of Language Detection (--batchtest option) /// <para /> /// usage: --batchtest -d [profile directory] -a [alpha] -s [seed] [test data(s)] /// <para /> /// The format of test data(s): /// <para /> /// [correct language name]\t[text body for test]\n /// </summary> public void batchTest() { if (loadProfile()) { return; } IDictionary <string, IList <string> > result = new Dictionary <string, IList <string> >(); foreach (string filename in arglist) { using (StreamReader _is = new StreamReader(filename, System.Text.Encoding.UTF8)) { while (!_is.EndOfStream) { string line = _is.ReadLine(); int idx = line.IndexOf('\t'); if (idx <= 0) { continue; } string correctLang = line.Substring(0, idx); string text = line.Substring(idx + 1); Detector detector = DetectorFactory.create(getDouble("alpha", DEFAULT_ALPHA)); detector.append(text); string lang = ""; lang = detector.detect(); if (!result.ContainsKey(correctLang)) { result[correctLang] = new List <string>(); } result[correctLang].Add(lang); if (hasOpt("--debug")) { System.Console.WriteLine(correctLang + "," + lang + "," + (text.Length > 100 ? text.Substring(0, 100) : text)); } } List <string> langlist = new List <string>(result.Keys); langlist.Sort(); int totalCount = 0, totalCorrect = 0; foreach (string lang in langlist) { IDictionary <string, int> resultCount = new Dictionary <string, int>(); int count = 0; IList <string> list = result[lang]; foreach (string detectedLang in list) { ++count; if (resultCount.ContainsKey(detectedLang)) { ++resultCount[detectedLang]; } else { resultCount[detectedLang] = 1; } } int correct = resultCount.ContainsKey(lang) ? resultCount[lang] : 0; double rate = correct / (double)count; System.Console.WriteLine(string.Format("{0} ({1}/{2}={3:##}): {4}", lang, correct, count, rate, resultCount)); totalCorrect += correct; totalCount += count; } System.Console.WriteLine(string.Format("total: %d/%d = %.3f", totalCorrect, totalCount, totalCorrect / (double)totalCount)); } } }