Exemplo n.º 1
0
        public void Run(string basePath)
        {
            var task = _newsArticleProvider.GetNextArticleAsync();

            task.Wait();
            var article = task.Result;

            var alg1 = new Alg1Analizer(basePath);
            var alg2 = new Alg2Analizer(basePath);

            var analysis1      = alg1.Analize(article.Content);
            var analysis2      = alg2.Analize(article.Content);
            var googleDetected = _translateServiceClient.DetectLanguage(article.Content);

            Console.WriteLine("Article content:");
            Console.WriteLine(article.Content);
            Console.WriteLine("-----------------------------------------------");
            Console.WriteLine($"Actual lng: {article.ActualLanguage}");
            Console.WriteLine("Alg1 analysis:");
            PrintFullAnalysis(analysis1);
            Console.WriteLine("Alg2 analysis:");
            PrintFullAnalysis(analysis2);
            Console.WriteLine($"Google lng: {googleDetected}");
            Console.WriteLine("-----------------------------------------------");
        }
Exemplo n.º 2
0
        // Porównanie efektywności algorytmów 1 i 2 dla wszystkich języków
        public void CreateComparisonOfAlgorithmEffectivenessForAllLanguages()
        {
            Dictionary <Language, int> alg1SuccessfulDetection   = new Dictionary <Language, int>();
            Dictionary <Language, int> alg2SuccessfulDetection   = new Dictionary <Language, int>();
            Dictionary <Language, int> googleSuccessfulDetection = new Dictionary <Language, int>();
            Alg1Analizer           alg1 = new Alg1Analizer(languageDictionaries);
            Alg2Analizer           alg2 = new Alg2Analizer(languageDictionaries);
            TranslateServiceClient translateServiceClient = new TranslateServiceClient();

            foreach (Language language in pathsToArticles.Keys)
            {
                int alg1SuccessfulDetectionCount   = 0;
                int alg2SuccessfulDetectionCount   = 0;
                int googleSuccessfulDetectionCount = 0;
                int numberOfArticlesToAnalyze      = 10;

                string   path  = pathsToArticles[language];
                string[] lines = System.IO.File.ReadAllLines(path);
                foreach (string url in lines)
                {
                    string content = urlsToArticles[url].Content;
                    content = content.Substring(0, content.Length / 10);

                    // Algorytm 1
                    Analysis analysis = alg1.Analize(content);
                    if (analysis.GetDiscoveredLanguage().Equals(language))
                    {
                        alg1SuccessfulDetectionCount++;
                    }

                    // Algorytm 2
                    Analysis analysis2 = alg2.Analize(content);
                    if (analysis2.GetDiscoveredLanguage().Equals(language))
                    {
                        alg2SuccessfulDetectionCount++;
                    }

                    Console.WriteLine("Google: {0}", url);

                    Language googleLanguage = translateServiceClient.DetectLanguage(content);
                    if (googleLanguage.Equals(language))
                    {
                        googleSuccessfulDetectionCount++;
                    }
                }
                alg1SuccessfulDetection.Add(language, alg1SuccessfulDetectionCount);
                alg2SuccessfulDetection.Add(language, alg2SuccessfulDetectionCount);
                googleSuccessfulDetection.Add(language, googleSuccessfulDetectionCount);
            }

            string csvPath = "../../comparisons/alg_comparison_all_lang.csv";

            if (!File.Exists(csvPath))
            {
                File.Create(csvPath).Close();
            }
            using (TextWriter writer = new StreamWriter(csvPath, false, Encoding.UTF8)) {
                writer.WriteLine("Porównanie algorytmów 1 i 2 - wszystkie języki;;;;;;;");
                writer.WriteLine(";EN;DE;FR;ES;PT;IT");
                writer.WriteLine("Alg. 1;" + alg1SuccessfulDetection[Language.English] + ";"
                                 + alg1SuccessfulDetection[Language.German] + ";"
                                 // + alg1SuccessfulDetection[Language.Polish] + ";"
                                 + alg1SuccessfulDetection[Language.French] + ";"
                                 + alg1SuccessfulDetection[Language.Spanish] + ";"
                                 + alg1SuccessfulDetection[Language.Portuguese] + ";"
                                 + alg1SuccessfulDetection[Language.Italian]);

                writer.WriteLine("Alg. 2;" + alg2SuccessfulDetection[Language.English] + ";"
                                 + alg2SuccessfulDetection[Language.German] + ";"
                                 // + alg2SuccessfulDetection[Language.Polish] + ";"
                                 + alg2SuccessfulDetection[Language.French] + ";"
                                 + alg2SuccessfulDetection[Language.Spanish] + ";"
                                 + alg2SuccessfulDetection[Language.Portuguese] + ";"
                                 + alg2SuccessfulDetection[Language.Italian]);

                writer.WriteLine("Google API;" + googleSuccessfulDetection[Language.English] + ";"
                                 + googleSuccessfulDetection[Language.German] + ";"
                                 // + alg2SuccessfulDetection[Language.Polish] + ";"
                                 + googleSuccessfulDetection[Language.French] + ";"
                                 + googleSuccessfulDetection[Language.Spanish] + ";"
                                 + googleSuccessfulDetection[Language.Portuguese] + ";"
                                 + googleSuccessfulDetection[Language.Italian]);
            }
        }
Exemplo n.º 3
0
        // Porównuje algorytmy 1 i 2 w zależności od liczby tokenów w artykule dla podanego języka
        public void CreateComparisonOfAlgorithmEffectivenessOnTokenNumberInArticle(Language analyzedLanguage)
        {
            string path = pathsToArticles[analyzedLanguage];
            int    numberOfArticlesToAnalyze = 10;

            // Dla jakiej długości artykułów (liczba tokenów) chcemy wykonać analizę
            List <int> numberOfTokensToAnalyze = new List <int>();

            for (int i = 10; i <= 300; i += 10)
            {
                numberOfTokensToAnalyze.Add(i);
            }

            // Długość artykułu (liczba tokenów) - liczba poprawnych wykryć języka
            Dictionary <int, int> alg1SuccessfulDetection = new Dictionary <int, int>();
            Dictionary <int, int> alg2SuccessfulDetection = new Dictionary <int, int>();

            Alg1Analizer alg1 = new Alg1Analizer(languageDictionaries);
            Alg2Analizer alg2 = new Alg2Analizer(languageDictionaries);

            // Analizujemy dla różnych długości artykułów
            foreach (int tokensNumber in numberOfTokensToAnalyze)
            {
                int alg1SuccessfulDetectionCount = 0;
                int alg2SuccessfulDetectionCount = 0;

                string[] lines = System.IO.File.ReadAllLines(path);
                foreach (string url in lines)
                {
                    string content = urlsToArticles[url].Content;

                    // Algorytm 1
                    Analysis analysis = alg1.Analize(content, tokensNumber);
                    if (analysis.GetDiscoveredLanguage().Equals(analyzedLanguage))
                    {
                        alg1SuccessfulDetectionCount++;
                    }

                    // Algorytm 2
                    Analysis analysis2 = alg2.Analize(content, tokensNumber);
                    if (analysis2.GetDiscoveredLanguage().Equals(analyzedLanguage))
                    {
                        alg2SuccessfulDetectionCount++;
                    }
                }
                alg1SuccessfulDetection.Add(tokensNumber, alg1SuccessfulDetectionCount);
                alg2SuccessfulDetection.Add(tokensNumber, alg2SuccessfulDetectionCount);
            }

            string csvPath = "../../comparisons/article_length_comparison_" + analyzedLanguage.ToString() + ".csv";

            if (!File.Exists(csvPath))
            {
                File.Create(csvPath).Close();
            }
            using (TextWriter writer = new StreamWriter(csvPath, false, Encoding.UTF8)) {
                writer.WriteLine("Porównanie algorytmów 1 i 2 dla różnych długości artykułów (liczby tokenów) " +
                                 " dla języka: {0}", analyzedLanguage);
                writer.WriteLine(";Alg. 1; Alg. 2");
                foreach (int i in numberOfTokensToAnalyze)
                {
                    writer.WriteLine(i + ";" + alg1SuccessfulDetection[i] + ";" + alg2SuccessfulDetection[i]);
                }
            }
        }
Exemplo n.º 4
0
        // Porównuje algorytmy 1 i 2 dla podanego języka w zależności od liczby słów w słowniku
        public void CreateComparisonOfAlgorithmEffectivenessOnDictionaryLength(Language analyzedLanguage)
        {
            string path = pathsToArticles[analyzedLanguage];
            int    numberOfArticlesToAnalyze = 10;

            // Dla jakiej liczby wyrazów w słowniku chcemy wykonać analizę
            List <int> dictionariesSizeToAnalyze = new List <int>();

            for (int i = 20; i <= 1000; i += 20)
            {
                dictionariesSizeToAnalyze.Add(i);
            }

            // Wielkość słownika - liczba poprawnych wykryć języka
            Dictionary <int, int> alg1SuccessfulDetection = new Dictionary <int, int>();
            Dictionary <int, int> alg2SuccessfulDetection = new Dictionary <int, int>();

            LanguageDictionaryFactory factory = new LanguageDictionaryFactory();

            foreach (int dictionarySize in dictionariesSizeToAnalyze)
            {
                IEnumerable <LanguageDictionary> dictionaries = factory.Create(pathsToDictionaries, dictionarySize);
                Alg1Analizer alg1 = new Alg1Analizer(dictionaries);
                Alg2Analizer alg2 = new Alg2Analizer(dictionaries);
                int          alg1SuccessfulDetectionCount = 0;
                int          alg2SuccessfulDetectionCount = 0;

                string[] lines = System.IO.File.ReadAllLines(path);
                foreach (string url in lines)
                {
                    string content = urlsToArticles[url].Content;

                    // Algorytm 1
                    Analysis analysis = alg1.Analize(content);
                    if (analysis.GetDiscoveredLanguage().Equals(analyzedLanguage))
                    {
                        alg1SuccessfulDetectionCount++;
                    }

                    // Algorytm 2
                    Analysis analysis2 = alg2.Analize(content);
                    if (analysis2.GetDiscoveredLanguage().Equals(analyzedLanguage))
                    {
                        alg2SuccessfulDetectionCount++;
                    }
                }
                alg1SuccessfulDetection.Add(dictionarySize, alg1SuccessfulDetectionCount);
                alg2SuccessfulDetection.Add(dictionarySize, alg2SuccessfulDetectionCount);
            }

            string csvPath = "../../comparisons/dictionary_length_comparison_" + analyzedLanguage.ToString() + ".csv";

            if (!File.Exists(path))
            {
                File.Create(path).Close();
            }
            using (TextWriter writer = new StreamWriter(csvPath, false, Encoding.UTF8)) {
                writer.WriteLine("Porównanie algorytmów 1 i 2 dla różnych długości słownika dla języka: " +
                                 analyzedLanguage.ToString() + "");
                writer.WriteLine(";Alg. 1; Alg. 2");
                foreach (int dictionarySize in dictionariesSizeToAnalyze)
                {
                    writer.WriteLine(dictionarySize + ";" + alg1SuccessfulDetection[dictionarySize] + ";" +
                                     alg2SuccessfulDetection[dictionarySize]);
                }
            }
        }