private void Test(string lang, string[] texts, string[][] pairs = null) { LanguageDetector detector; detector = new LanguageDetector(); detector.RandomSeed = 1; detector.AddAllLanguages(); foreach (string text in texts) { Assert.AreEqual(lang, detector.Detect(text)); } if (pairs != null) { foreach (string[] pair in pairs) { detector = new LanguageDetector(); detector.RandomSeed = 1; detector.AddLanguages(pair); detector.AddLanguages(lang); foreach (string text in texts) { Assert.AreEqual(lang, detector.Detect(text)); } } } }
public virtual async Task MessageReceivedAsync(IDialogContext context, IAwaitable <IMessageActivity> item) { _roomsDictionary = new Dictionary <string, string>(); var rooms = _roomService.GetRooms(); foreach (var room in rooms) { _roomsDictionary.Add(room.Address, room.Name); } var message = await item; var detector = new LanguageDetector(); var defaultLanguage = ConfigurationManager.AppSettings["BotDefaultLanguage"]; var localLanguage = ConfigurationManager.AppSettings["BotLocalLanguage"]; detector.AddLanguages(defaultLanguage, localLanguage); // issue; when message.Text is in Japanese.Detect(message.Text)) will give null _detectedCulture = Equals(defaultLanguage, detector.Detect(message.Text)) ? ConfigurationManager.AppSettings["BotDefaultCulture"] : ConfigurationManager.AppSettings["BotLocalCulture"]; SetCulture(_detectedCulture); accessToken = await GetAccessToken("graph"); accessToken_office = await GetAccessToken("office"); PromptDialog.Text(context, SubjectMessageReceivedAsync, Properties.Resources.Text_PleaseEnterSubject); }
//[Benchmark] public LanguageDetector AllLoad() { var d = new LanguageDetector(); d.AddLanguages("spa", "fra", "deu", "jpn", "por", "ukr", "zho", "ita", "rus", "kor"); return(d); }
//[Benchmark] public LanguageDetector EnglishLoad() { var d = new LanguageDetector(); d.AddLanguages("eng"); return(d); }
public LanguageDetectionTests() { var ed = new LanguageDetector(); ed.AddLanguages("eng"); EnglishDetector = ed; var ad = new LanguageDetector(); ad.AddLanguages("spa", "fra", "deu", "jpn", "por", "ukr", "zho", "ita", "rus", "kor"); AllDetector = ad; }
private void Test(string lang, string[] texts, string[][] pairs = null) { LanguageDetector detector; detector = new LanguageDetector(); detector.RandomSeed = 1; detector.AddAllLanguages(); foreach (string text in texts) Assert.AreEqual(lang, detector.Detect(text)); if (pairs != null) { foreach (string[] pair in pairs) { detector = new LanguageDetector(); detector.RandomSeed = 1; detector.AddLanguages(pair); detector.AddLanguages(lang); foreach (string text in texts) Assert.AreEqual(lang, detector.Detect(text)); } } }
static void Main(string[] args) { if (args == null || args.Length == 0) { Console.Error.WriteLine("you need to pass directly path to start the process"); return; } string type = args[0]; InputType inputType = InputType.Unknown; if (!Enum.TryParse(type, true, out inputType)) { Console.Error.WriteLine("Expected input in one of the following format"); Console.Error.WriteLine(@"FrequencyListBuilder directory c:\MyContentDir\en"); Console.Error.WriteLine(@"FrequencyListBuilder archive c:\MyContentDir\en.tar.gz"); return; } string pathInput = args[1]; //string dirPath = @"C:\OpenSubtitles2016\xml\br"; string nameWithExtension = Path.GetFileName(pathInput); string languageName = null; string extension = null; if (inputType == InputType.Directory) { languageName = nameWithExtension; } else { int pos = nameWithExtension.IndexOf("."); languageName = nameWithExtension.Substring(0, pos); extension = nameWithExtension.Substring(pos); } string parentPath = Path.Combine(Path.GetDirectoryName(pathInput), languageName); if (!Directory.Exists(parentPath)) { Directory.CreateDirectory(parentPath); } string fileLog = Path.Combine(parentPath, $"{languageName}.log"); string fullData = Path.Combine(parentPath, $"{languageName}_full.txt"); string partialData = Path.Combine(parentPath, $"{languageName}_50k.txt"); string ignoredData = Path.Combine(parentPath, $"{languageName}_ignored.txt"); Dictionary <string, long> wordFrequencyDictionary = new Dictionary <string, long>(); var logWriter = File.CreateText(fileLog); try { if (inputType == InputType.Directory) { DirectoryInfo startDir = new DirectoryInfo(pathInput); ProcessFilesInDirectory(startDir, wordFrequencyDictionary, logWriter); } else { FileInfo startFileInfo = new FileInfo(pathInput); switch (extension) { case ".xml.gz": using (var stream = startFileInfo.OpenRead()) { try { ProcessSubtitleGZ(stream, wordFrequencyDictionary, logWriter); } catch { } } break; case ".zip": ProcessZipArchive(startFileInfo, wordFrequencyDictionary, logWriter); break; case ".rar": break; case ".tar": break; case ".tar.gz": ProcessTarGzArchive(startFileInfo, wordFrequencyDictionary, logWriter); break; } } LanguageDetector detector = null; try { var languageDetector = new LanguageDetector(); languageDetector.AddLanguages(languageName); detector = languageDetector; } catch { } //Assert.AreEqual("lv", detector.Detect("čau, man iet labi, un kā iet tev?")); List <KeyValuePair <string, long> > validWords = new List <KeyValuePair <string, long> >(); List <KeyValuePair <string, long> > ignoredWords = new List <KeyValuePair <string, long> >(); //var myList = wordFrequencyDictionary.ToList().FindAll(kvp => IsValidWord(kvp.Key, detector, languageName)); wordFrequencyDictionary.ToList().ForEach((kvp) => { if (IsValidWord(kvp.Key, detector, languageName)) { validWords.Add(kvp); } else { ignoredWords.Add(kvp); } }); validWords.Sort((pair1, pair2) => pair2.Value.CompareTo(pair1.Value)); ignoredWords.Sort((pair1, pair2) => pair2.Value.CompareTo(pair1.Value)); LogWordlistToFile(validWords, ignoredWords, fullData, partialData, ignoredData); } catch (Exception ex) { Console.Error.WriteLine(ex.Message); LogMessage(logWriter, $"Error: {ex.Message}"); } finally { logWriter.Flush(); logWriter.Dispose(); } }