static LanguageInfo loadLanguage(string languageCode) { var languageFactory = new LanguageFactory(new LettersFileReader(), new LexiconFileReader()); var language = languageFactory.GetLanguage(languageCode); Console.WriteLine("Loaded language: {0}", language.Code); Console.WriteLine(" Lexicon: {0} words loaded", language.Lexicon.Count); Console.WriteLine(" Letters: {0} letters loaded", language.Letters.Count); Console.WriteLine(); return language; }
protected ILanguage ConvertFromDto(LanguageDto dto) { var entity = LanguageFactory.BuildEntity(dto); return(entity); }
public static Language GetActive() { return(LanguageFactory.Create(Context.Language)); }
public void tokenize(string path, int classIndex) { StreamReader r = new StreamReader(path, Encoding.GetEncoding("windows-1254")); Language tr = LanguageFactory.Create(LanguageType.Turkish); string line; string[] rawWords; string[] token; string analyzMax = ""; int analyzMaxL = Int32.MinValue; string temp; int tempL; int docMax = Int32.MinValue; int docMax2 = Int32.MinValue; int analyzL; int tokenL; Dictionary <string, int> document = new Dictionary <string, int>(); while (!r.EndOfStream)//tokenizer kısımları { line = r.ReadLine(); rawWords = Regex.Split(line, @"((\)('|’)\w+)|\W('|’)|('|’)\W|^('|’)|$('|’)|\d+('|’)\w+|\d+\w+|\d+[^a-zA-Z ]+\w+|\w+\d+|\d+|(\)|\())|[^\w('|’)]", RegexOptions.ExplicitCapture | RegexOptions.Compiled); //Büyülü regex'e göre metini parçalara ayırır foreach (string w in rawWords) { //boş stringler ve istenmeyen bazı durumlar da -örneğin sayılar- atılır if (w != "" && Regex.IsMatch(w, @"\D\w", RegexOptions.Compiled)) { analyzMaxL = Int32.MinValue; analyzMax = ""; //daha çok büyülü regex token = Regex.Split(w, @"(\W*)('|’)(\w+|\W+)", RegexOptions.ExplicitCapture | RegexOptions.Compiled); token[0] = token[0].ToLower(); IList <Word> solutions = tr.Analyze(token[0]);//morfolojik analiz foreach (var solution in solutions) { temp = solution.GetStem().GetSurface(); //Stemming tempL = temp.Length; //Genel olarak köke ne kadar az yaklaşırsa metinle o kadar alakalı //olduğunu tespit ettik bu yüzden en uzun stemi aldık if (tempL > analyzMaxL) { analyzMaxL = tempL; analyzMax = temp; } } analyzL = analyzMax.Length; tokenL = token[0].Length; //stop words leri eliyor if (analyzMax != "") { if (analyzL > 2 && !stopWords.Contains(analyzMax)) { if (corpus.ContainsKey(analyzMax)) { corpus[analyzMax]++; } else { corpus.Add(analyzMax, 1); wordIndex.Add(analyzMax, Index); Index++; } if (document.ContainsKey(analyzMax)) { document[analyzMax]++; } else { document.Add(analyzMax, 1); } } } else { if (token[0].Length > 2 && !stopWords.Contains(token[0])) { if (corpus.ContainsKey(token[0])) { corpus[token[0]]++; } else { corpus.Add(token[0], 1); wordIndex.Add(token[0], Index); Index++; } if (document.ContainsKey(token[0])) { document[token[0]]++; } else { document.Add(token[0], 1); } } } if (analyzL > 2 && !stopWords.Contains(analyzMax)) { if (document[analyzMax] > docMax) { docMax2 = docMax; docMax = document[analyzMax]; } else if (document[analyzMax] > docMax2) { docMax2 = document[analyzMax]; } else { if (tokenL > 2 && !stopWords.Contains(token[0]) && analyzMax == "") { if (document[token[0]] > docMax) { docMax2 = docMax; docMax = document[token[0]]; } else if (document[token[0]] > docMax2) { docMax2 = document[token[0]]; } } } } } } } DocumentMax.Add(new int[] { docMax, docMax2 }); DocumentClassIndex.Add(classIndex); DocumentWordFreq.Add(document); }
private static (Language, Language) InsertLanguages(ILanguageRepository languageRepository, LanguageFactory languageFactory) { var chinese = languageFactory.CreateEntity("zh", "zho", "Simplified Chinese", "简化字"); var spanish = languageFactory.CreateEntity("es", "spa", "Spanish", "Español"); var english = languageFactory.CreateEntity("en", "eng", "English", "English"); var hindi = languageFactory.CreateEntity("hi", "hin", "Hindi", "हिन्दी"); var arabic = languageFactory.CreateEntity("ar", "ara", "Arabic", "العربية"); var portuguese = languageFactory.CreateEntity("po", "por", "Portuguese", "Português"); var russian = languageFactory.CreateEntity("ru", "rus", "Russian", "русский"); var japanese = languageFactory.CreateEntity("ja", "jpn", "Japanese", "日本語"); var turkish = languageFactory.CreateEntity("tr", "tur", "Turkish", "Türkçe"); languageRepository.Insert(0, chinese).Wait(); languageRepository.Insert(0, spanish).Wait(); var englishId = languageRepository.Insert(0, english).Result; english.Id = englishId; languageRepository.Insert(0, hindi).Wait(); languageRepository.Insert(0, arabic).Wait(); languageRepository.Insert(0, portuguese).Wait(); languageRepository.Insert(0, russian).Wait(); languageRepository.Insert(0, japanese).Wait(); var turkishId = languageRepository.Insert(0, turkish).Result; turkish.Id = turkishId; return(turkish, english); }
public HomeController() { _LanguageFactory = new LanguageFactory(); _dashBoardFactory = new DashBoardFactory(); }