Example #1
0
        static LanguageInfo loadLanguage(string languageCode)
        {
            var languageFactory = new LanguageFactory(new LettersFileReader(), new LexiconFileReader());
            var language = languageFactory.GetLanguage(languageCode);

            Console.WriteLine("Loaded language: {0}", language.Code);
            Console.WriteLine("  Lexicon: {0} words loaded", language.Lexicon.Count);
            Console.WriteLine("  Letters: {0} letters loaded", language.Letters.Count);
            Console.WriteLine();

            return language;
        }
        protected ILanguage ConvertFromDto(LanguageDto dto)
        {
            var entity = LanguageFactory.BuildEntity(dto);

            return(entity);
        }
Example #3
0
 public static Language GetActive()
 {
     return(LanguageFactory.Create(Context.Language));
 }
        public void tokenize(string path, int classIndex)
        {
            StreamReader r  = new StreamReader(path, Encoding.GetEncoding("windows-1254"));
            Language     tr = LanguageFactory.Create(LanguageType.Turkish);
            string       line;

            string[] rawWords;
            string[] token;
            string   analyzMax  = "";
            int      analyzMaxL = Int32.MinValue;
            string   temp;
            int      tempL;
            int      docMax  = Int32.MinValue;
            int      docMax2 = Int32.MinValue;
            int      analyzL;
            int      tokenL;
            Dictionary <string, int> document = new Dictionary <string, int>();

            while (!r.EndOfStream)//tokenizer kısımları
            {
                line     = r.ReadLine();
                rawWords = Regex.Split(line, @"((\)('|’)\w+)|\W('|’)|('|’)\W|^('|’)|$('|’)|\d+('|’)\w+|\d+\w+|\d+[^a-zA-Z ]+\w+|\w+\d+|\d+|(\)|\())|[^\w('|’)]", RegexOptions.ExplicitCapture | RegexOptions.Compiled);
                //Büyülü regex'e göre metini parçalara ayırır
                foreach (string w in rawWords)
                {
                    //boş stringler ve istenmeyen bazı durumlar da -örneğin sayılar- atılır
                    if (w != "" && Regex.IsMatch(w, @"\D\w", RegexOptions.Compiled))
                    {
                        analyzMaxL = Int32.MinValue;
                        analyzMax  = "";
                        //daha çok büyülü regex
                        token    = Regex.Split(w, @"(\W*)('|’)(\w+|\W+)", RegexOptions.ExplicitCapture | RegexOptions.Compiled);
                        token[0] = token[0].ToLower();
                        IList <Word> solutions = tr.Analyze(token[0]);//morfolojik analiz
                        foreach (var solution in solutions)
                        {
                            temp  = solution.GetStem().GetSurface(); //Stemming
                            tempL = temp.Length;
                            //Genel olarak köke ne kadar az yaklaşırsa metinle o kadar alakalı
                            //olduğunu tespit ettik bu yüzden en uzun stemi aldık
                            if (tempL > analyzMaxL)
                            {
                                analyzMaxL = tempL;
                                analyzMax  = temp;
                            }
                        }
                        analyzL = analyzMax.Length;
                        tokenL  = token[0].Length;
                        //stop words leri eliyor
                        if (analyzMax != "")
                        {
                            if (analyzL > 2 && !stopWords.Contains(analyzMax))
                            {
                                if (corpus.ContainsKey(analyzMax))
                                {
                                    corpus[analyzMax]++;
                                }
                                else
                                {
                                    corpus.Add(analyzMax, 1);
                                    wordIndex.Add(analyzMax, Index);
                                    Index++;
                                }
                                if (document.ContainsKey(analyzMax))
                                {
                                    document[analyzMax]++;
                                }
                                else
                                {
                                    document.Add(analyzMax, 1);
                                }
                            }
                        }
                        else
                        {
                            if (token[0].Length > 2 && !stopWords.Contains(token[0]))
                            {
                                if (corpus.ContainsKey(token[0]))
                                {
                                    corpus[token[0]]++;
                                }
                                else
                                {
                                    corpus.Add(token[0], 1);
                                    wordIndex.Add(token[0], Index);
                                    Index++;
                                }
                                if (document.ContainsKey(token[0]))
                                {
                                    document[token[0]]++;
                                }
                                else
                                {
                                    document.Add(token[0], 1);
                                }
                            }
                        }
                        if (analyzL > 2 && !stopWords.Contains(analyzMax))
                        {
                            if (document[analyzMax] > docMax)
                            {
                                docMax2 = docMax;
                                docMax  = document[analyzMax];
                            }
                            else if (document[analyzMax] > docMax2)
                            {
                                docMax2 = document[analyzMax];
                            }
                            else
                            {
                                if (tokenL > 2 && !stopWords.Contains(token[0]) && analyzMax == "")
                                {
                                    if (document[token[0]] > docMax)
                                    {
                                        docMax2 = docMax;
                                        docMax  = document[token[0]];
                                    }
                                    else if (document[token[0]] > docMax2)
                                    {
                                        docMax2 = document[token[0]];
                                    }
                                }
                            }
                        }
                    }
                }
            }
            DocumentMax.Add(new int[] { docMax, docMax2 });
            DocumentClassIndex.Add(classIndex);
            DocumentWordFreq.Add(document);
        }
Example #5
0
        private static (Language, Language) InsertLanguages(ILanguageRepository languageRepository, LanguageFactory languageFactory)
        {
            var chinese    = languageFactory.CreateEntity("zh", "zho", "Simplified Chinese", "简化字");
            var spanish    = languageFactory.CreateEntity("es", "spa", "Spanish", "Español");
            var english    = languageFactory.CreateEntity("en", "eng", "English", "English");
            var hindi      = languageFactory.CreateEntity("hi", "hin", "Hindi", "हिन्दी");
            var arabic     = languageFactory.CreateEntity("ar", "ara", "Arabic", "العربية");
            var portuguese = languageFactory.CreateEntity("po", "por", "Portuguese", "Português");
            var russian    = languageFactory.CreateEntity("ru", "rus", "Russian", "русский");
            var japanese   = languageFactory.CreateEntity("ja", "jpn", "Japanese", "日本語");
            var turkish    = languageFactory.CreateEntity("tr", "tur", "Turkish", "Türkçe");

            languageRepository.Insert(0, chinese).Wait();
            languageRepository.Insert(0, spanish).Wait();
            var englishId = languageRepository.Insert(0, english).Result;

            english.Id = englishId;
            languageRepository.Insert(0, hindi).Wait();
            languageRepository.Insert(0, arabic).Wait();
            languageRepository.Insert(0, portuguese).Wait();
            languageRepository.Insert(0, russian).Wait();
            languageRepository.Insert(0, japanese).Wait();
            var turkishId = languageRepository.Insert(0, turkish).Result;

            turkish.Id = turkishId;

            return(turkish, english);
        }
Example #6
0
 public HomeController()
 {
     _LanguageFactory  = new LanguageFactory();
     _dashBoardFactory = new DashBoardFactory();
 }