public void testDetector3() { Detector detect = DetectorFactory.Create(); detect.Append("d e"); Assert.AreEqual(detect.Detect(), "en"); }
public void testDetector4() { Detector detect = DetectorFactory.Create(); detect.Append("\u3042\u3042\u3042\u3042a"); Assert.AreEqual(detect.Detect(), "ja"); }
public void testDetector2() { Detector detect = DetectorFactory.Create(); detect.Append("b d"); Assert.AreEqual(detect.Detect(), "fr"); }
/// <summary> /// Language detection test for each file (--detectlang option) /// <pre> /// usage: --detectlang -d [profile directory] -a [alpha] -s [seed] [test file(s)] /// </pre> public void DetectLang() { if (LoadProfile()) { return; } foreach (string filename in arglist) { StreamReader strm = null; try { strm = new StreamReader(File.OpenRead(filename)); Detector detector = DetectorFactory.Create(GetDouble("alpha", DEFAULT_ALPHA)); if (HasOpt("--debug")) { detector.SetVerbose(); } detector.Append(strm); var probs = string.Join(" ", detector.GetProbabilities().Select((lang) => lang.ToString())); Console.WriteLine("{0}: {1}", filename, probs); } catch (IOException e) { Debug.WriteLine(e); } catch (LangDetectException e) { Debug.WriteLine(e); } finally { try { if (strm != null) { strm.Close(); } } catch (IOException e) { } } } }
/// <summary> /// Batch Test of Language Detection (--batchtest option) /// <pre> /// usage: --batchtest -d [profile directory] -a [alpha] -s [seed] [test data(s)] /// </pre> /// The format of test data(s): /// <pre> /// [correct language name]\t[text body for test]\n /// </pre> public void BatchTest() { if (LoadProfile()) { return; } Dictionary <string, List <string> > result = new Dictionary <string, List <string> >(); foreach (string filename in arglist) { StreamReader strm = null; try { strm = new StreamReader(File.OpenRead(filename)); while (!strm.EndOfStream) { string line = strm.ReadLine(); int idx = line.IndexOf('\t'); if (idx <= 0) { continue; } string correctLang = line.Substring(0, idx); string text = line.Substring(idx + 1); Detector detector = DetectorFactory.Create(GetDouble("alpha", DEFAULT_ALPHA)); detector.Append(text); string lang = ""; try { lang = detector.Detect(); } catch (Exception e) { Debug.WriteLine(e); } if (!result.ContainsKey(correctLang)) { result[correctLang] = new List <string>(); } result[correctLang].Add(lang); if (HasOpt("--debug")) { Console.WriteLine(correctLang + "," + lang + "," + (text.Length > 100 ? text.Substring(0, 100) : text)); } } } catch (IOException e) { Debug.WriteLine(e); } catch (LangDetectException e) { Debug.WriteLine(e); } finally { try { if (strm != null) { strm.Close(); } } catch (IOException e) { } } List <string> langlist = new List <string>(result.Keys); langlist.Sort(); int totalCount = 0, totalCorrect = 0; foreach (string lang in langlist) { Dictionary <string, int> resultCount = new Dictionary <string, int>(); int count = 0; List <string> list = result[lang]; foreach (string detectedLang in list) { ++count; if (resultCount.ContainsKey(detectedLang)) { resultCount[detectedLang] = resultCount[detectedLang] + 1; } else { resultCount[detectedLang] = 1; } } int correct = resultCount.ContainsKey(lang) ? resultCount[lang] : 0; double rate = correct / (double)count; Console.WriteLine(string.Format("%s (%d/%d=%.2f): %s", lang, correct, count, rate, resultCount)); totalCorrect += correct; totalCount += count; } Console.WriteLine(string.Format("total: %d/%d = %.3f", totalCorrect, totalCount, totalCorrect / (double)totalCount)); } }