public void Run(string basePath) { var task = _newsArticleProvider.GetNextArticleAsync(); task.Wait(); var article = task.Result; var alg1 = new Alg1Analizer(basePath); var alg2 = new Alg2Analizer(basePath); var analysis1 = alg1.Analize(article.Content); var analysis2 = alg2.Analize(article.Content); var googleDetected = _translateServiceClient.DetectLanguage(article.Content); Console.WriteLine("Article content:"); Console.WriteLine(article.Content); Console.WriteLine("-----------------------------------------------"); Console.WriteLine($"Actual lng: {article.ActualLanguage}"); Console.WriteLine("Alg1 analysis:"); PrintFullAnalysis(analysis1); Console.WriteLine("Alg2 analysis:"); PrintFullAnalysis(analysis2); Console.WriteLine($"Google lng: {googleDetected}"); Console.WriteLine("-----------------------------------------------"); }
// Porównanie efektywności algorytmów 1 i 2 dla wszystkich języków public void CreateComparisonOfAlgorithmEffectivenessForAllLanguages() { Dictionary <Language, int> alg1SuccessfulDetection = new Dictionary <Language, int>(); Dictionary <Language, int> alg2SuccessfulDetection = new Dictionary <Language, int>(); Dictionary <Language, int> googleSuccessfulDetection = new Dictionary <Language, int>(); Alg1Analizer alg1 = new Alg1Analizer(languageDictionaries); Alg2Analizer alg2 = new Alg2Analizer(languageDictionaries); TranslateServiceClient translateServiceClient = new TranslateServiceClient(); foreach (Language language in pathsToArticles.Keys) { int alg1SuccessfulDetectionCount = 0; int alg2SuccessfulDetectionCount = 0; int googleSuccessfulDetectionCount = 0; int numberOfArticlesToAnalyze = 10; string path = pathsToArticles[language]; string[] lines = System.IO.File.ReadAllLines(path); foreach (string url in lines) { string content = urlsToArticles[url].Content; content = content.Substring(0, content.Length / 10); // Algorytm 1 Analysis analysis = alg1.Analize(content); if (analysis.GetDiscoveredLanguage().Equals(language)) { alg1SuccessfulDetectionCount++; } // Algorytm 2 Analysis analysis2 = alg2.Analize(content); if (analysis2.GetDiscoveredLanguage().Equals(language)) { alg2SuccessfulDetectionCount++; } Console.WriteLine("Google: {0}", url); Language googleLanguage = translateServiceClient.DetectLanguage(content); if (googleLanguage.Equals(language)) { googleSuccessfulDetectionCount++; } } alg1SuccessfulDetection.Add(language, alg1SuccessfulDetectionCount); alg2SuccessfulDetection.Add(language, alg2SuccessfulDetectionCount); googleSuccessfulDetection.Add(language, googleSuccessfulDetectionCount); } string csvPath = "../../comparisons/alg_comparison_all_lang.csv"; if (!File.Exists(csvPath)) { File.Create(csvPath).Close(); } using (TextWriter writer = new StreamWriter(csvPath, false, Encoding.UTF8)) { writer.WriteLine("Porównanie algorytmów 1 i 2 - wszystkie języki;;;;;;;"); writer.WriteLine(";EN;DE;FR;ES;PT;IT"); writer.WriteLine("Alg. 1;" + alg1SuccessfulDetection[Language.English] + ";" + alg1SuccessfulDetection[Language.German] + ";" // + alg1SuccessfulDetection[Language.Polish] + ";" + alg1SuccessfulDetection[Language.French] + ";" + alg1SuccessfulDetection[Language.Spanish] + ";" + alg1SuccessfulDetection[Language.Portuguese] + ";" + alg1SuccessfulDetection[Language.Italian]); writer.WriteLine("Alg. 2;" + alg2SuccessfulDetection[Language.English] + ";" + alg2SuccessfulDetection[Language.German] + ";" // + alg2SuccessfulDetection[Language.Polish] + ";" + alg2SuccessfulDetection[Language.French] + ";" + alg2SuccessfulDetection[Language.Spanish] + ";" + alg2SuccessfulDetection[Language.Portuguese] + ";" + alg2SuccessfulDetection[Language.Italian]); writer.WriteLine("Google API;" + googleSuccessfulDetection[Language.English] + ";" + googleSuccessfulDetection[Language.German] + ";" // + alg2SuccessfulDetection[Language.Polish] + ";" + googleSuccessfulDetection[Language.French] + ";" + googleSuccessfulDetection[Language.Spanish] + ";" + googleSuccessfulDetection[Language.Portuguese] + ";" + googleSuccessfulDetection[Language.Italian]); } }
// Porównuje algorytmy 1 i 2 w zależności od liczby tokenów w artykule dla podanego języka public void CreateComparisonOfAlgorithmEffectivenessOnTokenNumberInArticle(Language analyzedLanguage) { string path = pathsToArticles[analyzedLanguage]; int numberOfArticlesToAnalyze = 10; // Dla jakiej długości artykułów (liczba tokenów) chcemy wykonać analizę List <int> numberOfTokensToAnalyze = new List <int>(); for (int i = 10; i <= 300; i += 10) { numberOfTokensToAnalyze.Add(i); } // Długość artykułu (liczba tokenów) - liczba poprawnych wykryć języka Dictionary <int, int> alg1SuccessfulDetection = new Dictionary <int, int>(); Dictionary <int, int> alg2SuccessfulDetection = new Dictionary <int, int>(); Alg1Analizer alg1 = new Alg1Analizer(languageDictionaries); Alg2Analizer alg2 = new Alg2Analizer(languageDictionaries); // Analizujemy dla różnych długości artykułów foreach (int tokensNumber in numberOfTokensToAnalyze) { int alg1SuccessfulDetectionCount = 0; int alg2SuccessfulDetectionCount = 0; string[] lines = System.IO.File.ReadAllLines(path); foreach (string url in lines) { string content = urlsToArticles[url].Content; // Algorytm 1 Analysis analysis = alg1.Analize(content, tokensNumber); if (analysis.GetDiscoveredLanguage().Equals(analyzedLanguage)) { alg1SuccessfulDetectionCount++; } // Algorytm 2 Analysis analysis2 = alg2.Analize(content, tokensNumber); if (analysis2.GetDiscoveredLanguage().Equals(analyzedLanguage)) { alg2SuccessfulDetectionCount++; } } alg1SuccessfulDetection.Add(tokensNumber, alg1SuccessfulDetectionCount); alg2SuccessfulDetection.Add(tokensNumber, alg2SuccessfulDetectionCount); } string csvPath = "../../comparisons/article_length_comparison_" + analyzedLanguage.ToString() + ".csv"; if (!File.Exists(csvPath)) { File.Create(csvPath).Close(); } using (TextWriter writer = new StreamWriter(csvPath, false, Encoding.UTF8)) { writer.WriteLine("Porównanie algorytmów 1 i 2 dla różnych długości artykułów (liczby tokenów) " + " dla języka: {0}", analyzedLanguage); writer.WriteLine(";Alg. 1; Alg. 2"); foreach (int i in numberOfTokensToAnalyze) { writer.WriteLine(i + ";" + alg1SuccessfulDetection[i] + ";" + alg2SuccessfulDetection[i]); } } }
// Porównuje algorytmy 1 i 2 dla podanego języka w zależności od liczby słów w słowniku public void CreateComparisonOfAlgorithmEffectivenessOnDictionaryLength(Language analyzedLanguage) { string path = pathsToArticles[analyzedLanguage]; int numberOfArticlesToAnalyze = 10; // Dla jakiej liczby wyrazów w słowniku chcemy wykonać analizę List <int> dictionariesSizeToAnalyze = new List <int>(); for (int i = 20; i <= 1000; i += 20) { dictionariesSizeToAnalyze.Add(i); } // Wielkość słownika - liczba poprawnych wykryć języka Dictionary <int, int> alg1SuccessfulDetection = new Dictionary <int, int>(); Dictionary <int, int> alg2SuccessfulDetection = new Dictionary <int, int>(); LanguageDictionaryFactory factory = new LanguageDictionaryFactory(); foreach (int dictionarySize in dictionariesSizeToAnalyze) { IEnumerable <LanguageDictionary> dictionaries = factory.Create(pathsToDictionaries, dictionarySize); Alg1Analizer alg1 = new Alg1Analizer(dictionaries); Alg2Analizer alg2 = new Alg2Analizer(dictionaries); int alg1SuccessfulDetectionCount = 0; int alg2SuccessfulDetectionCount = 0; string[] lines = System.IO.File.ReadAllLines(path); foreach (string url in lines) { string content = urlsToArticles[url].Content; // Algorytm 1 Analysis analysis = alg1.Analize(content); if (analysis.GetDiscoveredLanguage().Equals(analyzedLanguage)) { alg1SuccessfulDetectionCount++; } // Algorytm 2 Analysis analysis2 = alg2.Analize(content); if (analysis2.GetDiscoveredLanguage().Equals(analyzedLanguage)) { alg2SuccessfulDetectionCount++; } } alg1SuccessfulDetection.Add(dictionarySize, alg1SuccessfulDetectionCount); alg2SuccessfulDetection.Add(dictionarySize, alg2SuccessfulDetectionCount); } string csvPath = "../../comparisons/dictionary_length_comparison_" + analyzedLanguage.ToString() + ".csv"; if (!File.Exists(path)) { File.Create(path).Close(); } using (TextWriter writer = new StreamWriter(csvPath, false, Encoding.UTF8)) { writer.WriteLine("Porównanie algorytmów 1 i 2 dla różnych długości słownika dla języka: " + analyzedLanguage.ToString() + ""); writer.WriteLine(";Alg. 1; Alg. 2"); foreach (int dictionarySize in dictionariesSizeToAnalyze) { writer.WriteLine(dictionarySize + ";" + alg1SuccessfulDetection[dictionarySize] + ";" + alg2SuccessfulDetection[dictionarySize]); } } }