static void Main(string[] args) { LanguageDetector langDet = LanguageDetector.GetLanguageDetectorPrebuilt(); //LanguageDetector langDet = new LanguageDetector(); //langDet.ReadCorpus(@"C:\Users\mIHA\Desktop\langdet"); LanguageProfile p = langDet.FindMatchingLanguage("To je slovenski stavek. Čeprav ga naš detektor ne zazna pravilno. Mogoče šumniki pomagajo..."); Console.WriteLine(p.Language); p = langDet.FindMatchingLanguage("I love you."); Console.WriteLine(p.Language); p = langDet.FindMatchingLanguage("Baš te volim."); Console.WriteLine(p.Language); p = langDet.FindMatchingLanguage("Je t'aime."); Console.WriteLine(p.Language); foreach (LanguageProfile pr in langDet.LanguageProfiles) { BinarySerializer ser = new BinarySerializer(string.Format(@"C:\Users\mIHA\Desktop\langdet\{0}.ldp", pr.Language), FileMode.Create); pr.Save(ser); ser.Close(); } //Console.WriteLine(langDet.GetLanguageProfile("et")); //StreamWriter w = new StreamWriter("c:\\krneki\\langSim.txt"); //foreach (LanguageProfile p in langDet.LanguageProfiles) //{ // w.Write("{0}\t", p.Code); //} //w.WriteLine(); //foreach (LanguageProfile p in langDet.LanguageProfiles) //{ // foreach (LanguageProfile p2 in langDet.LanguageProfiles) // { // //w.Write("{0}\t", Math.Max(p.CalcSpearman(p2), p2.CalcSpearman(p))); // } // w.WriteLine(); //} //w.Close(); }
public /*protected*/ override void ProcessDocument(Document document) { string contentType = document.Features.GetFeatureValue("contentType"); if (contentType != "Text") { return; } StringBuilder strBuilder = new StringBuilder(); try { TextBlock[] blocks = document.GetAnnotatedBlocks(mBlockSelector); foreach (TextBlock block in blocks) { strBuilder.AppendLine(block.Text); } string text = strBuilder.ToString(); if (text.Length >= mMinTextLen) { LanguageProfile langProfile = mLanguageDetector.FindMatchingLanguage(text); if (langProfile != null) { document.Features.SetFeatureValue("detectedLanguage", langProfile.Language.ToString()); } } if (text.Length > 0) { document.Features.SetFeatureValue("detectedCharRange", TextMiningUtils.GetCharRange(text)); } } catch (Exception exception) { mLogger.Error("ProcessDocument", exception); } }