//[Benchmark]
        public LanguageDetector EnglishLoad()
        {
            var d = new LanguageDetector();

            d.AddLanguages("eng");
            return(d);
        }
Esempio n. 2
0
        private void Test(string lang, string[] texts, string[][] pairs = null)
        {
            LanguageDetector detector;

            detector            = new LanguageDetector();
            detector.RandomSeed = 1;
            detector.AddAllLanguages();

            foreach (string text in texts)
            {
                Assert.AreEqual(lang, detector.Detect(text));
            }

            if (pairs != null)
            {
                foreach (string[] pair in pairs)
                {
                    detector            = new LanguageDetector();
                    detector.RandomSeed = 1;
                    detector.AddLanguages(pair);
                    detector.AddLanguages(lang);

                    foreach (string text in texts)
                    {
                        Assert.AreEqual(lang, detector.Detect(text));
                    }
                }
            }
        }
Esempio n. 3
0
        public virtual async Task MessageReceivedAsync(IDialogContext context, IAwaitable <IMessageActivity> item)
        {
            _roomsDictionary = new Dictionary <string, string>();

            var rooms = _roomService.GetRooms();

            foreach (var room in rooms)
            {
                _roomsDictionary.Add(room.Address, room.Name);
            }

            var message = await item;

            var detector        = new LanguageDetector();
            var defaultLanguage = ConfigurationManager.AppSettings["BotDefaultLanguage"];
            var localLanguage   = ConfigurationManager.AppSettings["BotLocalLanguage"];

            detector.AddLanguages(defaultLanguage, localLanguage);

            // issue; when message.Text is in Japanese.Detect(message.Text)) will give null
            _detectedCulture = Equals(defaultLanguage, detector.Detect(message.Text)) ? ConfigurationManager.AppSettings["BotDefaultCulture"] : ConfigurationManager.AppSettings["BotLocalCulture"];

            SetCulture(_detectedCulture);

            accessToken = await GetAccessToken("graph");

            accessToken_office = await GetAccessToken("office");

            PromptDialog.Text(context, SubjectMessageReceivedAsync, Properties.Resources.Text_PleaseEnterSubject);
        }
Esempio n. 4
0
        private string DetectLanguage(byte[] bytes)
        {
            try
            {
                return(LanguageDetector.DetectLanguage(Encoding.UTF8.GetString(bytes)));
            }
            catch (NLangDetectException ex)
            {
            }

            try
            {
                return(LanguageDetector.DetectLanguage(Encoding.ASCII.GetString(bytes)));
            }
            catch (NLangDetectException ex)
            {
            }

            try
            {
                return(LanguageDetector.DetectLanguage(Encoding.Unicode.GetString(bytes)));
            }
            catch (NLangDetectException ex)
            {
            }

            return(null);
        }
Esempio n. 5
0
        public static async Task Detect(string testString)
        {
            // 1. Language Detection

            Storage.Current = new OnlineRepositoryStorage(new DiskStorage("catalyst-models"));
            //Console.WriteLine("Loading models... This might take a bit longer the first time you run this sample, as the models have to be downloaded from the online repository");
            var cld2LanguageDetector = await LanguageDetector.FromStoreAsync(Language.Any, Version.Latest, "");

            //var fastTextLanguageDetector = await FastTextLanguageDetector.FromStoreAsync(Language.Any, Version.Latest, "");

            //var doc = new Document(testString);
            //fastTextLanguageDetector.Process(doc);

            var doc2 = new Document(testString);

            //var doc2 = new Document("Familie Müller plant ihren Urlaub. Sie geht in ein Reisebüro und lässt sich von einem Angestellten beraten.");
            cld2LanguageDetector.Process(doc2);
            Console.WriteLine($"CLD2\t{doc2.Language}");



            //// You can also access all predictions via the Predict method:
            //var allPredictions = fastTextLanguageDetector.Predict(new Document(LanguageData.LongSamples[Language.Spanish]));

            //Console.WriteLine($"\n\nTop 10 predictions and scores for the Spanish sample:");
            //foreach (var kv in allPredictions.OrderByDescending(kv => kv.Value).Take(10))
            //{
            //    Console.WriteLine($"{kv.Key.ToString().PadRight(40)}\tScore: {kv.Value:n2}");
            //}
        }
        public LanguageDetectModule(LanguageDetector languageDetector)
            : base("/language-detector")
        {
            Get["/"] = _ =>
            {
                return(View[LanguageDetectionViewName, new DetectedLanguageResponse()]);
            };

            Post["/"] = _ =>
            {
                var model = this.Bind <LanguageDetectRequest>();
                if (ModelIsInvalid(model))
                {
                    return(BadRequest());
                }

                DetectedLanguageResponse detectedLanguageResponse = languageDetector.DetectLanguage(model);

                //return Response.AsXml(detectedLanguageResponse);

                return(Negotiate
                       .WithContentType("text/html; charset=utf-8")
                       .WithModel(detectedLanguageResponse)
                       .WithView(LanguageDetectionViewName));
            };
        }
Esempio n. 7
0
        static void Test()
        {
            const string testFilesPath = @"C:\Users\kevin\Desktop\data";

            var learner = new LanguageLearner();

            var english = learner.Learn("en", Path.Combine(testFilesPath, "en-source.txt"));

            var dutch = learner.Learn("nl", Path.Combine(testFilesPath, "nl-source.txt"));

            var spanish = learner.Learn("es", Path.Combine(testFilesPath, "es-source.txt"));

            var bulgarian = learner.Learn("bg", Path.Combine(testFilesPath, "bg-source.txt"));

            var russian = learner.Learn("ru", Path.Combine(testFilesPath, "ru-source.txt"));

            var german = learner.Learn("de", Path.Combine(testFilesPath, "de-source.txt"));

            var languages = new Dictionary <string, Dictionary <string, int> >
            {
                { "en", english },
                { "nl", dutch },
                { "es", spanish },
                { "bg", bulgarian },
                { "ru", russian },
                { "de", german },
            };

            var detector = new LanguageDetector(languages);

            int scoreEnglish;
            var testEnglish = detector.Detect(Path.Combine(testFilesPath, "en-sample.txt"), out scoreEnglish);

            int scoreDutch;
            var testDutch = detector.Detect(Path.Combine(testFilesPath, "nl-sample.txt"), out scoreDutch);

            int scoreSpanish;
            var testSpanish = detector.Detect(Path.Combine(testFilesPath, "es-sample.txt"), out scoreSpanish);

            int scoreBulgarian;
            var testBulgarian = detector.Detect(Path.Combine(testFilesPath, "bg-sample.txt"), out scoreBulgarian);

            int scoreRussian;
            var testRussian = detector.Detect(Path.Combine(testFilesPath, "ru-sample.txt"), out scoreRussian);

            int scoreGerman;
            var testGerman = detector.Detect(Path.Combine(testFilesPath, "de-sample.txt"), out scoreGerman);

            Console.WriteLine("Test 1: {0} ({1}%)", testEnglish, scoreEnglish);

            Console.WriteLine("Test 2: {0} ({1}%)", testDutch, scoreDutch);

            Console.WriteLine("Test 3: {0} ({1}%)", testSpanish, scoreSpanish);

            Console.WriteLine("Test 4: {0} ({1}%)", testBulgarian, scoreBulgarian);

            Console.WriteLine("Test 5: {0} ({1}%)", testRussian, scoreRussian);

            Console.WriteLine("Test 6: {0} ({1}%)", testGerman, scoreGerman);
        }
Esempio n. 8
0
        public WebTranslator(ILog logger)
        {
            _Logger = logger;

            if (!Helper.LoadStaticFromJson(typeof(GlobalTranslationSettings), _TransaltionSettingsPath))
            {
                Helper.SaveStaticToJson(typeof(GlobalTranslationSettings), _TransaltionSettingsPath);
                Helper.LoadStaticFromJson(typeof(GlobalTranslationSettings), _TransaltionSettingsPath);
            }

            transaltionCache = new List <KeyValuePair <TranslationRequest, string> >(GlobalTranslationSettings.TranslationCacheSize);

            _MultillectTranslator = new MultillectTranslator(_Logger);

            _GoogleTranslator = new GoogleTranslator(_Logger);

            _YandexTranslator = new YandexTranslator(_Logger);

            _DeepLTranslator = new DeepLTranslator(_Logger);

            _PapagoTranslator = new PapagoTranslator(_Logger);

            _BaiduTranslator = new BaiduTranslater(_Logger);

            _LanguageDetector = new LanguageDetector(GlobalTranslationSettings.MaxSameLanguagePercent,
                                                     GlobalTranslationSettings.NTextCatLanguageModelsPath, _Logger);
        }
        //[Benchmark]
        public LanguageDetector AllLoad()
        {
            var d = new LanguageDetector();

            d.AddLanguages("spa", "fra", "deu", "jpn", "por", "ukr", "zho", "ita", "rus", "kor");
            return(d);
        }
Esempio n. 10
0
        private string DetectLanguage(byte[] bytes, int index, int count)
        {
            try
            {
                return(LanguageDetector.DetectLanguage(Encoding.UTF8.GetString(bytes, index, count)));
            }
            catch (NLangDetectException ex)
            {
                _logger.LogDebug(ex, "LanguageDetector.DetectLanguage threw a NLangDetectException.");
            }

            try
            {
                return(LanguageDetector.DetectLanguage(Encoding.ASCII.GetString(bytes, index, count)));
            }
            catch (NLangDetectException ex)
            {
                _logger.LogDebug(ex, "LanguageDetector.DetectLanguage threw a NLangDetectException.");
            }

            try
            {
                return(LanguageDetector.DetectLanguage(Encoding.Unicode.GetString(bytes, index, count)));
            }
            catch (NLangDetectException ex)
            {
                _logger.LogDebug(ex, "LanguageDetector.DetectLanguage threw a NLangDetectException.");
            }

            return(null);
        }
Esempio n. 11
0
        public static async Task Main(string[] args)
        {
            Console.OutputEncoding = Encoding.UTF8;
            ApplicationLogging.SetLoggerFactory(LoggerFactory.Create(lb => lb.AddConsole()));

            //Need to register the languages we want to use first
            Catalyst.Models.English.Register();

            //This example shows the two language detection models available on Catalyst.
            //The first is derived from the Chrome former language detection code Compact Language Detector 2 (https://github.com/CLD2Owners/cld2)
            //and the newer model is derived from Facebook's FastText language detection dataset (see: https://fasttext.cc/blog/2017/10/02/blog-post.html)

            //Configures the model storage to use the local folder ./catalyst-models/
            Storage.Current = new DiskStorage("catalyst-models");

            var cld2LanguageDetector = await LanguageDetector.FromStoreAsync(Language.Any, Version.Latest, "");

            var fastTextLanguageDetector = await FastTextLanguageDetector.FromStoreAsync(Language.Any, Version.Latest, "");

            //We show bellow the detection on short and longer samples. You can expect lower precision on shorter texts, as there is less information for the model to work with
            //It's also interesting to see the kind of mistakes these models make, such as detecting Welsh as Gaelic_Scottish_Gaelic

            foreach (var(lang, text) in Data.ShortSamples)
            {
                var doc = new Document(text);
                fastTextLanguageDetector.Process(doc);

                var doc2 = new Document(text);
                cld2LanguageDetector.Process(doc2);

                Console.WriteLine(text);
                Console.WriteLine($"Actual:\t{lang}\nFT:\t{doc.Language}\nCLD2\t{doc2.Language}");
                Console.WriteLine();
            }

            foreach (var(lang, text) in Data.LongSamples)
            {
                var doc = new Document(text);
                fastTextLanguageDetector.Process(doc);

                var doc2 = new Document(text);
                cld2LanguageDetector.Process(doc2);

                Console.WriteLine(text);
                Console.WriteLine($"Actual:\t{lang}\nFT:\t{doc.Language}\nCLD2\t{doc2.Language}");
                Console.WriteLine();
            }

            // You can also access all predictions via the Predict method:
            var allPredictions = fastTextLanguageDetector.Predict(new Document(Data.LongSamples[Language.Spanish]));

            Console.WriteLine($"\n\nTop 10 predictions and scores for the Spanish sample:");
            foreach (var kv in allPredictions.OrderByDescending(kv => kv.Value).Take(10))
            {
                Console.WriteLine($"{kv.Key.ToString().PadRight(40)}\tScore: {kv.Value:n2}");
            }
        }
        public static Language MainProgram(string stringInput)
        {
            Stopwatch stopwatch = new Stopwatch();
            Language  lang      = new Language();

            stopwatch.Start();
            var learner        = new LanguageLearner();
            var knownLanguages = learner.Remember(knownLanguagesFile);
            var detector       = new LanguageDetector(knownLanguages);
            int score;
            var languageCode = detector.Detect(stringInput, out score);

            lang.inputString      = stringInput;
            lang.languageType     = languageCode;
            lang.probability      = score;
            lang.nGramProbability = score;

            switch (languageCode)
            {
            case "en":
                lang = fastBrainProcessEnglish(lang, stopwatch);
                break;

            case "es":
                lang = fastBrainProcessSpanish(lang, stopwatch);
                break;

            case "ru":
                lang = fastBrainProcessRussian(lang, stopwatch);
                break;

            default:
                // Launch Error Window? Loc: I handle this by outputing result = "Undefined" when probability = 0%
                break;
            }

            if (lang.probability < 50)
            {
                Stopwatch stopwatch2 = new Stopwatch();
                stopwatch2.Start();
                //Thread.Sleep(2999);//This line right here should be eliminated. I'm just including a 2999s delay to simulte the Slow BP. Diego

                // Slow Brain processes
                lang = SlowBrainProcess.SlowBrainProcessing(lang);

                stopwatch2.Stop();
                TimeSpan ts = stopwatch2.Elapsed;

                double second     = ts.Seconds;
                double milisecond = ts.Milliseconds;

                double TIME = second * 1000 + milisecond;
                lang.slowBrainRuntime = TIME;
            }

            return(lang);
        }
Esempio n. 13
0
        public void TestDetectLanguage(string text, string language)
        {
            var detector = new LanguageDetector(generateConfig());
            var result   = detector.DetectLanguage(new Lyric {
                Text = text
            });

            Assert.AreEqual(result, new CultureInfo(language));
        }
Esempio n. 14
0
        /// <summary>
        /// Creates (but doesn't start) new Dexter process
        /// </summary>
        /// <param name="createUser">if true, this process will create new user account</param>
        private void CreateDexterProcess(bool createUser = false)
        {
            string configFlag          = File.Exists(Configuration.DefaultConfigurationPath) ? " -f " + Configuration.DefaultConfigurationPath : "";
            string createUserFlag      = createUser ? " -c " : "";
            string createXmlResultFlag = " -x ";
            string credentialsParams   = (configuration.standalone && !createUser)
                ? " -s "
                : " -u " + configuration.userName + " -p " + configuration.userPassword + " -h " + configuration.dexterServerIp + " -o " + configuration.dexterServerPort;
            string resultFileFormatFlag = " -F xml";

            dexterProcess = new Process();

            DexterInfo dexterInfo = DexterInfo.fromConfiguration(configuration);

            if (LanguageDetector.IsCodeModelLanguageCSharp())
            {
                if (!File.Exists(configuration.DexterCSPath))
                {
                    throw new DexterRuntimeException("DexterCS.exe not found in \"" + configuration.DexterCSPath + "\"");
                }

                dexterProcess.StartInfo = new ProcessStartInfo()
                {
                    FileName               = configuration.DexterCSPath,
                    Arguments              = createUserFlag + createXmlResultFlag + configFlag + credentialsParams + resultFileFormatFlag,
                    WorkingDirectory       = Path.GetDirectoryName(configuration.DexterCSPath),
                    CreateNoWindow         = true,
                    UseShellExecute        = false,
                    RedirectStandardOutput = true,
                    RedirectStandardError  = true
                };
            }
            else
            {
                if (!File.Exists(configuration.DexterExecutorPath))
                {
                    throw new DexterRuntimeException("dexter-executor.jar not found in \"" + configuration.DexterExecutorPath + "\"");
                }

                dexterProcess.StartInfo = new ProcessStartInfo()
                {
                    FileName               = "java.exe",
                    Arguments              = "-jar " + configuration.DexterExecutorPath + createUserFlag + createXmlResultFlag + configFlag + credentialsParams,
                    WorkingDirectory       = Path.GetDirectoryName(configuration.DexterExecutorPath),
                    CreateNoWindow         = true,
                    UseShellExecute        = false,
                    RedirectStandardOutput = true,
                    RedirectStandardError  = true
                };
            }

            dexterProcess.OutputDataReceived += OutputDataReceived;
            dexterProcess.ErrorDataReceived  += ErrorDataReceived;
            dexterProcess.Disposed           += (s, e) => dexterProcess = null;
        }
        public void detect_with_language_analyzer(string text, string expectedLang)
        {
            if (text[0] == ':')
            {
                text = TestConfig.ReadLangFile(text.Substring(1));
            }

            var lang = LanguageDetector.GetLanguage(text);

            Assert.AreEqual(expectedLang, lang);
        }
        public LanguageDetectionTests()
        {
            var ed = new LanguageDetector();

            ed.AddLanguages("eng");
            EnglishDetector = ed;

            var ad = new LanguageDetector();

            ad.AddLanguages("spa", "fra", "deu", "jpn", "por", "ukr", "zho", "ita", "rus", "kor");
            AllDetector = ad;
        }
Esempio n. 17
0
        static void Detect(string file, string knownLanguagesFile)
        {
            var learner = new LanguageLearner();

            var knownLanguages = learner.Remember(knownLanguagesFile);

            var detector = new LanguageDetector(knownLanguages);

            int score;

            var languageCode = detector.Detect(file, out score);

            Console.WriteLine("The language code of the detected language is: {0} ({1}%)", languageCode, score);
        }
Esempio n. 18
0
        public static void Main(string[] args)
        {
            Console.WriteLine("Starting app...");

            ILanguageDetector languageDetector = new LanguageDetector();

            languageDetector.LoadProfile(@"~/../../../../languagedetector.net/profiles/profiles/");
            var language = languageDetector.Detect("This is a test to test the language.");

            Console.WriteLine("Language picked up: {0}", language.LanguageCode);

            Console.WriteLine("Done...Press any key to quit");
            Console.ReadKey();
        }
Esempio n. 19
0
        public AnswerModel <string> Execute(string text)
        {
            AnswerModel <string> answer   = new AnswerModel <string>();
            LanguageDetector     detector = new LanguageDetector();

            detector.AddAllLanguages();
            Assert.AreEqual("lv", detector.Detect("Привет"));


            answer.Property = "Определен язык: ";
            answer.Value    = "Определен язык: ";

            return(answer);
        }
Esempio n. 20
0
        public string GetDetectedEncodingName(byte[] bytes, int count, string language, bool enableLanguageDetection)
        {
            var index = 0;

            var encoding = GetInitialEncoding(bytes, count);

            if (encoding != null && encoding.Equals(Encoding.UTF8))
            {
                return("utf-8");
            }

            if (string.IsNullOrWhiteSpace(language) && enableLanguageDetection)
            {
                if (!_langDetectInitialized)
                {
                    _langDetectInitialized = true;
                    LanguageDetector.Initialize(_json);
                }

                language = DetectLanguage(bytes, index, count);

                if (!string.IsNullOrWhiteSpace(language))
                {
                    _logger.LogDebug("Text language detected as {0}", language);
                }
            }

            var charset = DetectCharset(bytes, index, count, language);

            if (!string.IsNullOrWhiteSpace(charset))
            {
                if (string.Equals(charset, "utf-8", StringComparison.OrdinalIgnoreCase))
                {
                    return("utf-8");
                }

                if (!string.Equals(charset, "windows-1252", StringComparison.OrdinalIgnoreCase))
                {
                    return(charset);
                }
            }

            if (!string.IsNullOrWhiteSpace(language))
            {
                return(GetFileCharacterSetFromLanguage(language));
            }

            return(null);
        }
Esempio n. 21
0
        private static bool IsValidWord(string word, LanguageDetector languageDetector, string languageName)
        {
            if (languageDetector != null)
            {
                var detectedLanguage = languageDetector.Detect(word);

                return(detectedLanguage != null && detectedLanguage.Equals(languageName));
            }
            else
            {
                var nonCharEntries = word.Where(c => !Char.IsLetter(c));

                return(!nonCharEntries.Any());
            }
        }
Esempio n. 22
0
        private void TranslateBtn_Clicked(object sender, EventArgs e)
        {
            AzureAuthToken azureAuthToken = new AzureAuthToken("731cf5d466e543409989ce06c9499979");
            var            authToken      = azureAuthToken.GetAccessToken();

            LanguageDetector languageDetector = new LanguageDetector(authToken);
            var lang = languageDetector.Detect(toTranslate.Text);

            Translator translator = new Translator(authToken);
            var        result     = translator.Translate(toTranslate.Text, lang, languagePicker.SelectedItem.ToString());

            translated.Text = result;

            App.Database.SaveItem(new Models.Translation {
                FromLang = languagePicker.SelectedItem.ToString(), ToLang = lang, FromText = toTranslate.Text, ToText = result
            });
        }
        public TwitterRetriever(string accessToken, string accessTokenSecret, string consumerKey, string consumerSecret)
        {
            tokens = new OAuthTokens()
            {
                AccessToken       = accessToken,
                AccessTokenSecret = accessTokenSecret,
                ConsumerKey       = consumerKey,
                ConsumerSecret    = consumerSecret
            };

            so                 = new SearchOptions();
            so.ResultType      = SearchOptionsResultType.Recent;
            so.IncludeEntities = false;
            so.Count           = 100;

            detector = new LanguageDetector();
            detector.AddAllLanguages(AppDomain.CurrentDomain.BaseDirectory + "Profiles");
        }
Esempio n. 24
0
        /**************************************************************************/

        public MacroscopeAnalyzeTextLanguage()
        {
            this.SuppressDebugMsg = true;

            this.DetectLanguage = new LanguageDetector();

            this.DetectLanguage.AddLanguages(
                "en",    // English
                "es",    // Spanish
                "de",    // German
                "fr",    // French
                "it",    // Italian
                "ja",    // Japanese
                "no",    // Norwegian
                "pt",    // Portuguese
                "sv",    // Swedish
                "zh-cn", // Chinese Simplified
                "zh-tw"  // Chinese Traditional
                );
        }
Esempio n. 25
0
        public void Issue_2()
        {
            string text = "Výsledky kola švýcarské hokejové ligy";

            LanguageDetector detector = new LanguageDetector();

            detector.RandomSeed = 1;
            detector.AddAllLanguages();

            Assert.AreEqual("slk", detector.Detect(text));
            Assert.AreEqual(1, detector.DetectAll(text).Count());

            detector                      = new LanguageDetector();
            detector.RandomSeed           = 1;
            detector.ConvergenceThreshold = 0.9;
            detector.MaxIterations        = 50;
            detector.AddAllLanguages();

            Assert.AreEqual("slk", detector.Detect(text));
            Assert.AreEqual(2, detector.DetectAll(text).Count());
        }
Esempio n. 26
0
        /** -------------------------------------------------------------------- **/

        public MacroscopeAnalyzeTextLanguage(string IsoLanguageCode)
        {
            this.SuppressDebugMsg = true;

            this.DetectLanguage = new LanguageDetector();

            this.DetectLanguage.RandomSeed           = 666;
            this.DetectLanguage.ProbabilityThreshold = ( double )0.5;
            this.DetectLanguage.MaxTextLength        = 1024 * 8;

            if (string.IsNullOrEmpty(IsoLanguageCode))
            {
                this.DetectLanguage.AddAllLanguages();
            }
            else
            {
                if (IsoLanguageCode.ToLower().Equals("x-default"))
                {
                    this.DetectLanguage.AddAllLanguages();
                }
                else
                {
                    this.DetectLanguage.AddLanguages("en");

                    if (!IsoLanguageCode.ToLower().Equals("en"))
                    {
                        try
                        {
                            this.DetectLanguage.AddLanguages(IsoLanguageCode.ToLower());
                        }
                        catch (Exception ex)
                        {
                            DebugMsg(string.Format("MacroscopeAnalyzeTextLanguage: {0}", ex.Message));
                        }
                    }
                }
            }
        }
Esempio n. 27
0
        public void AutoDetectLyricLanguage()
        {
            var lyrics = beatmap.HitObjects.OfType <Lyric>().ToList();

            if (!lyrics.Any())
            {
                return;
            }

            // todo : should get the config from setting.
            var config   = new LanguageDetectorConfig();
            var detector = new LanguageDetector(config);

            changeHandler?.BeginChange();

            foreach (var lyric in lyrics)
            {
                var detectedLanguage = detector.DetectLanguage(lyric);
                lyric.Language = detectedLanguage;
            }

            changeHandler?.EndChange();
        }
Esempio n. 28
0
        static void Main(string[] args)
        {
            LanguageDetector langDet = LanguageDetector.GetLanguageDetectorPrebuilt();
            //LanguageDetector langDet = new LanguageDetector();
            //langDet.ReadCorpus(@"C:\Users\mIHA\Desktop\langdet");
            LanguageProfile p = langDet.FindMatchingLanguage("To je slovenski stavek. Čeprav ga naš detektor ne zazna pravilno. Mogoče šumniki pomagajo...");

            Console.WriteLine(p.Language);
            p = langDet.FindMatchingLanguage("I love you.");
            Console.WriteLine(p.Language);
            p = langDet.FindMatchingLanguage("Baš te volim.");
            Console.WriteLine(p.Language);
            p = langDet.FindMatchingLanguage("Je t'aime.");
            Console.WriteLine(p.Language);
            foreach (LanguageProfile pr in langDet.LanguageProfiles)
            {
                BinarySerializer ser = new BinarySerializer(string.Format(@"C:\Users\mIHA\Desktop\langdet\{0}.ldp", pr.Language), FileMode.Create);
                pr.Save(ser);
                ser.Close();
            }
            //Console.WriteLine(langDet.GetLanguageProfile("et"));
            //StreamWriter w = new StreamWriter("c:\\krneki\\langSim.txt");
            //foreach (LanguageProfile p in langDet.LanguageProfiles)
            //{
            //    w.Write("{0}\t", p.Code);
            //}
            //w.WriteLine();
            //foreach (LanguageProfile p in langDet.LanguageProfiles)
            //{
            //    foreach (LanguageProfile p2 in langDet.LanguageProfiles)
            //    {
            //        //w.Write("{0}\t", Math.Max(p.CalcSpearman(p2), p2.CalcSpearman(p)));
            //    }
            //    w.WriteLine();
            //}
            //w.Close();
        }
        private void Test(string lang, string[] texts, string[][] pairs = null)
        {
            LanguageDetector detector;

            detector = new LanguageDetector();
            detector.RandomSeed = 1;
            detector.AddAllLanguages();

            foreach (string text in texts)
                Assert.AreEqual(lang, detector.Detect(text));

            if (pairs != null)
            {
                foreach (string[] pair in pairs)
                {
                    detector = new LanguageDetector();
                    detector.RandomSeed = 1;
                    detector.AddLanguages(pair);
                    detector.AddLanguages(lang);

                    foreach (string text in texts)
                        Assert.AreEqual(lang, detector.Detect(text));
                }
            }
        }
 public static void CreateLanguageDetector(string pathToJson)
 {
     LanguageDetector.TransformJsonDataInModelData(pathToJson);
 }
Esempio n. 31
0
        public static async Task <List <Words> > Search(string searchTerm)
        {
            LanguageDetector detector = new LanguageDetector();

            detector.AddAllLanguages();

            HttpClient client    = new HttpClient();
            var        loginPage = await client.GetStringAsync("https://www.altmetric.com/explorer/login");

            Match  matchObject = Regex.Match(loginPage, @"name=""authenticity_token"" value=""(?<key>.+)""");
            string token       = string.Empty;

            if (matchObject.Success)
            {
                token = matchObject.Groups["key"].Value;
            }

            Dictionary <string, string> formFields = new Dictionary <string, string>()
            {
                { "email", "*****@*****.**" },
                { "password", "bigdatachallenge" },
                { "authenticity_token", token },
                { "commit", "Sign in" }
            };

            FormUrlEncodedContent content = new FormUrlEncodedContent(formFields);
            var response = await client.PostAsync("https://www.altmetric.com/explorer/login", content);

            var searchResults =
                await client.GetStringAsync("https://www.altmetric.com/explorer/json_data/research_outputs?q=" + searchTerm +
                                            "&scope=all");

            Console.WriteLine("A");

            var serializer = new System.Web.Script.Serialization.JavaScriptSerializer();

            serializer.MaxJsonLength = Int32.MaxValue;
            dynamic papersDict = serializer.DeserializeObject(searchResults);

            List <Words> newsList = new List <Words>();

            List <Task <string> > taskList  = new List <Task <string> >();
            List <int>            scoreList = new List <int>();

            if (papersDict["outputs"].Length == 0)
            {
                return(newsList);
            }

            Console.WriteLine("B");

            for (int i = 0; i < Math.Min(10, papersDict["outputs"].Length); i++)
            {
                string altId = papersDict["outputs"][i]["id"].ToString();
                int    score = papersDict["outputs"][i]["score"];
                scoreList.Add(score);
                taskList.Add(client.GetStringAsync("https://api.altmetric.com/v1/fetch/id/" + altId + "?key=ef2e9b9961415ba4b6510ec82c3e9cba"));
            }

            int counter = 0;

            while (taskList.Count > 0)
            {
                Console.WriteLine(counter);
                Task <string> firstFinishedTask = await Task.WhenAny(taskList);

                taskList.Remove(firstFinishedTask);
                string detailsText = await firstFinishedTask;

                dynamic details = serializer.DeserializeObject(detailsText);

                if (details["posts"].ContainsKey("news") && details["posts"]["news"].Length > 0)
                {
                    for (int j = 0; j < Math.Min(3, details["posts"]["news"].Length); j++)
                    {
                        if (details["posts"]["news"][j].ContainsKey("title") &&
                            details["posts"]["news"][j].ContainsKey("url"))
                        {
                            string title = details["posts"]["news"][j]["title"];

                            if (detector.Detect(title) == "en" && details["posts"]["news"][j]["url"] != null)
                            {
                                var request = new HttpRequestMessage(HttpMethod.Head, details["posts"]["news"][j]["url"]);
                                try
                                {
                                    var validityResponse = await client.SendAsync(request);

                                    if (validityResponse.IsSuccessStatusCode)
                                    {
                                        Words newsArticle = new Words(title, details["posts"]["news"][j]["url"], scoreList[counter], WordType.Article);
                                        newsList.Add(newsArticle);
                                    }
                                }
                                catch (HttpRequestException e)
                                {
                                }
                            }
                        }
                    }
                }

                if (details["posts"].ContainsKey("blogs") && details["posts"]["blogs"].Length > 0)
                {
                    string title = details["posts"]["blogs"][0]["title"];

                    if (detector.Detect(title) == "en" && details["posts"]["blogs"][0]["url"] != null)
                    {
                        var request = new HttpRequestMessage(HttpMethod.Head, details["posts"]["blogs"][0]["url"]);
                        try
                        {
                            var validityResponse = await client.SendAsync(request);

                            if (validityResponse.IsSuccessStatusCode)
                            {
                                Words blogPost = new Words(title, details["posts"]["blogs"][0]["url"], scoreList[counter], WordType.Blog);
                                newsList.Add(blogPost);
                            }
                        }
                        catch (HttpRequestException e)
                        {
                        }
                    }
                }

                if (details["posts"].ContainsKey("video") && details["posts"]["video"].Length > 0)
                {
                    string title = details["posts"]["video"][0]["title"];

                    if (detector.Detect(title) == "en" && details["posts"]["video"][0]["url"] != null)
                    {
                        var request = new HttpRequestMessage(HttpMethod.Head, details["posts"]["video"][0]["url"]);
                        try
                        {
                            var validityResponse = await client.SendAsync(request);

                            if (validityResponse.IsSuccessStatusCode)
                            {
                                Words video = new Words(title, details["posts"]["video"][0]["url"], scoreList[counter], WordType.Video);
                                newsList.Add(video);
                            }
                        }
                        catch (HttpRequestException e)
                        {
                        }
                    }
                }
                counter++;
            }

            client.Dispose();

            return(newsList);
        }
        public void Issue_2()
        {
            string text = "Výsledky kola švýcarské hokejové ligy";

            LanguageDetector detector = new LanguageDetector();
            detector.RandomSeed = 1;
            detector.AddAllLanguages();

            Assert.AreEqual("sk", detector.Detect(text));
            Assert.AreEqual(1, detector.DetectAll(text).Count());

            detector = new LanguageDetector();
            detector.RandomSeed = 1;
            detector.ConvergenceThreshold = 0.9;
            detector.MaxIterations = 50;
            detector.AddAllLanguages();

            Assert.AreEqual("sk", detector.Detect(text));
            Assert.AreEqual(2, detector.DetectAll(text).Count());
        }