Ejemplo n.º 1
0
        private void Test(string lang, string[] texts, string[][] pairs = null)
        {
            LanguageDetector detector;

            detector            = new LanguageDetector();
            detector.RandomSeed = 1;
            detector.AddAllLanguages();

            foreach (string text in texts)
            {
                Assert.AreEqual(lang, detector.Detect(text));
            }

            if (pairs != null)
            {
                foreach (string[] pair in pairs)
                {
                    detector            = new LanguageDetector();
                    detector.RandomSeed = 1;
                    detector.AddLanguages(pair);
                    detector.AddLanguages(lang);

                    foreach (string text in texts)
                    {
                        Assert.AreEqual(lang, detector.Detect(text));
                    }
                }
            }
        }
Ejemplo n.º 2
0
        public virtual async Task MessageReceivedAsync(IDialogContext context, IAwaitable <IMessageActivity> item)
        {
            _roomsDictionary = new Dictionary <string, string>();

            var rooms = _roomService.GetRooms();

            foreach (var room in rooms)
            {
                _roomsDictionary.Add(room.Address, room.Name);
            }

            var message = await item;

            var detector        = new LanguageDetector();
            var defaultLanguage = ConfigurationManager.AppSettings["BotDefaultLanguage"];
            var localLanguage   = ConfigurationManager.AppSettings["BotLocalLanguage"];

            detector.AddLanguages(defaultLanguage, localLanguage);

            // issue; when message.Text is in Japanese.Detect(message.Text)) will give null
            _detectedCulture = Equals(defaultLanguage, detector.Detect(message.Text)) ? ConfigurationManager.AppSettings["BotDefaultCulture"] : ConfigurationManager.AppSettings["BotLocalCulture"];

            SetCulture(_detectedCulture);

            accessToken = await GetAccessToken("graph");

            accessToken_office = await GetAccessToken("office");

            PromptDialog.Text(context, SubjectMessageReceivedAsync, Properties.Resources.Text_PleaseEnterSubject);
        }
        //[Benchmark]
        public LanguageDetector AllLoad()
        {
            var d = new LanguageDetector();

            d.AddLanguages("spa", "fra", "deu", "jpn", "por", "ukr", "zho", "ita", "rus", "kor");
            return(d);
        }
        //[Benchmark]
        public LanguageDetector EnglishLoad()
        {
            var d = new LanguageDetector();

            d.AddLanguages("eng");
            return(d);
        }
        public LanguageDetectionTests()
        {
            var ed = new LanguageDetector();

            ed.AddLanguages("eng");
            EnglishDetector = ed;

            var ad = new LanguageDetector();

            ad.AddLanguages("spa", "fra", "deu", "jpn", "por", "ukr", "zho", "ita", "rus", "kor");
            AllDetector = ad;
        }
        private void Test(string lang, string[] texts, string[][] pairs = null)
        {
            LanguageDetector detector;

            detector = new LanguageDetector();
            detector.RandomSeed = 1;
            detector.AddAllLanguages();

            foreach (string text in texts)
                Assert.AreEqual(lang, detector.Detect(text));

            if (pairs != null)
            {
                foreach (string[] pair in pairs)
                {
                    detector = new LanguageDetector();
                    detector.RandomSeed = 1;
                    detector.AddLanguages(pair);
                    detector.AddLanguages(lang);

                    foreach (string text in texts)
                        Assert.AreEqual(lang, detector.Detect(text));
                }
            }
        }
Ejemplo n.º 7
0
        static void Main(string[] args)
        {
            if (args == null || args.Length == 0)
            {
                Console.Error.WriteLine("you need to pass directly path to start the process");
                return;
            }

            string type = args[0];

            InputType inputType = InputType.Unknown;

            if (!Enum.TryParse(type, true, out inputType))
            {
                Console.Error.WriteLine("Expected input in one of the following format");
                Console.Error.WriteLine(@"FrequencyListBuilder directory c:\MyContentDir\en");
                Console.Error.WriteLine(@"FrequencyListBuilder archive c:\MyContentDir\en.tar.gz");
                return;
            }

            string pathInput = args[1];

            //string dirPath = @"C:\OpenSubtitles2016\xml\br";
            string nameWithExtension = Path.GetFileName(pathInput);
            string languageName      = null;
            string extension         = null;

            if (inputType == InputType.Directory)
            {
                languageName = nameWithExtension;
            }
            else
            {
                int pos = nameWithExtension.IndexOf(".");
                languageName = nameWithExtension.Substring(0, pos);
                extension    = nameWithExtension.Substring(pos);
            }

            string parentPath = Path.Combine(Path.GetDirectoryName(pathInput), languageName);

            if (!Directory.Exists(parentPath))
            {
                Directory.CreateDirectory(parentPath);
            }

            string fileLog     = Path.Combine(parentPath, $"{languageName}.log");
            string fullData    = Path.Combine(parentPath, $"{languageName}_full.txt");
            string partialData = Path.Combine(parentPath, $"{languageName}_50k.txt");
            string ignoredData = Path.Combine(parentPath, $"{languageName}_ignored.txt");

            Dictionary <string, long> wordFrequencyDictionary = new Dictionary <string, long>();

            var logWriter = File.CreateText(fileLog);

            try
            {
                if (inputType == InputType.Directory)
                {
                    DirectoryInfo startDir = new DirectoryInfo(pathInput);

                    ProcessFilesInDirectory(startDir, wordFrequencyDictionary, logWriter);
                }
                else
                {
                    FileInfo startFileInfo = new FileInfo(pathInput);

                    switch (extension)
                    {
                    case ".xml.gz":
                        using (var stream = startFileInfo.OpenRead())
                        {
                            try
                            {
                                ProcessSubtitleGZ(stream, wordFrequencyDictionary, logWriter);
                            }
                            catch { }
                        }
                        break;

                    case ".zip":
                        ProcessZipArchive(startFileInfo, wordFrequencyDictionary, logWriter);
                        break;

                    case ".rar":
                        break;

                    case ".tar":
                        break;

                    case ".tar.gz":
                        ProcessTarGzArchive(startFileInfo, wordFrequencyDictionary, logWriter);
                        break;
                    }
                }

                LanguageDetector detector = null;
                try
                {
                    var languageDetector = new LanguageDetector();
                    languageDetector.AddLanguages(languageName);

                    detector = languageDetector;
                }
                catch { }
                //Assert.AreEqual("lv", detector.Detect("čau, man iet labi, un kā iet tev?"));

                List <KeyValuePair <string, long> > validWords   = new List <KeyValuePair <string, long> >();
                List <KeyValuePair <string, long> > ignoredWords = new List <KeyValuePair <string, long> >();

                //var myList = wordFrequencyDictionary.ToList().FindAll(kvp => IsValidWord(kvp.Key, detector, languageName));
                wordFrequencyDictionary.ToList().ForEach((kvp) =>
                {
                    if (IsValidWord(kvp.Key, detector, languageName))
                    {
                        validWords.Add(kvp);
                    }
                    else
                    {
                        ignoredWords.Add(kvp);
                    }
                });

                validWords.Sort((pair1, pair2) => pair2.Value.CompareTo(pair1.Value));
                ignoredWords.Sort((pair1, pair2) => pair2.Value.CompareTo(pair1.Value));

                LogWordlistToFile(validWords, ignoredWords, fullData, partialData, ignoredData);
            }
            catch (Exception ex)
            {
                Console.Error.WriteLine(ex.Message);
                LogMessage(logWriter, $"Error: {ex.Message}");
            }
            finally
            {
                logWriter.Flush();
                logWriter.Dispose();
            }
        }