Exemple #1
0
        static void Main(string[] args)
        {
            LanguageDetector langDet = LanguageDetector.GetLanguageDetectorPrebuilt();
            //LanguageDetector langDet = new LanguageDetector();
            //langDet.ReadCorpus(@"C:\Users\mIHA\Desktop\langdet");
            LanguageProfile p = langDet.FindMatchingLanguage("To je slovenski stavek. Čeprav ga naš detektor ne zazna pravilno. Mogoče šumniki pomagajo...");

            Console.WriteLine(p.Language);
            p = langDet.FindMatchingLanguage("I love you.");
            Console.WriteLine(p.Language);
            p = langDet.FindMatchingLanguage("Baš te volim.");
            Console.WriteLine(p.Language);
            p = langDet.FindMatchingLanguage("Je t'aime.");
            Console.WriteLine(p.Language);
            foreach (LanguageProfile pr in langDet.LanguageProfiles)
            {
                BinarySerializer ser = new BinarySerializer(string.Format(@"C:\Users\mIHA\Desktop\langdet\{0}.ldp", pr.Language), FileMode.Create);
                pr.Save(ser);
                ser.Close();
            }
            //Console.WriteLine(langDet.GetLanguageProfile("et"));
            //StreamWriter w = new StreamWriter("c:\\krneki\\langSim.txt");
            //foreach (LanguageProfile p in langDet.LanguageProfiles)
            //{
            //    w.Write("{0}\t", p.Code);
            //}
            //w.WriteLine();
            //foreach (LanguageProfile p in langDet.LanguageProfiles)
            //{
            //    foreach (LanguageProfile p2 in langDet.LanguageProfiles)
            //    {
            //        //w.Write("{0}\t", Math.Max(p.CalcSpearman(p2), p2.CalcSpearman(p)));
            //    }
            //    w.WriteLine();
            //}
            //w.Close();
        }
Exemple #2
0
        public /*protected*/ override void ProcessDocument(Document document)
        {
            string contentType = document.Features.GetFeatureValue("contentType");

            if (contentType != "Text")
            {
                return;
            }
            StringBuilder strBuilder = new StringBuilder();

            try
            {
                TextBlock[] blocks = document.GetAnnotatedBlocks(mBlockSelector);
                foreach (TextBlock block in blocks)
                {
                    strBuilder.AppendLine(block.Text);
                }
                string text = strBuilder.ToString();
                if (text.Length >= mMinTextLen)
                {
                    LanguageProfile langProfile = mLanguageDetector.FindMatchingLanguage(text);
                    if (langProfile != null)
                    {
                        document.Features.SetFeatureValue("detectedLanguage", langProfile.Language.ToString());
                    }
                }
                if (text.Length > 0)
                {
                    document.Features.SetFeatureValue("detectedCharRange", TextMiningUtils.GetCharRange(text));
                }
            }
            catch (Exception exception)
            {
                mLogger.Error("ProcessDocument", exception);
            }
        }