private List <string> GetKeywords() { Console.WriteLine("Keywords:"); string keywords = Console.ReadLine(); keywords = keywords.ToLower() + " "; IList <string> tokens = StandartSplitter.Split(keywords); List <string> stems = new List <string>(); Language tr = LanguageFactory.Create(LanguageType.Turkish); int ind = 0; foreach (string token in tokens) { if (token.Equals("dokuz")) { ind = tokens.IndexOf(token); } } tokens[ind] = "eylül"; foreach (string token in tokens) { IList <Word> solutions = tr.Analyze(token); if (solutions.Count > 0) { if (!IsStopWord(solutions[solutions.Count - 1].GetStem().GetSurface())) { stems.Add(solutions[solutions.Count - 1].GetStem().GetSurface()); } } } return(stems); }
private void Create() { for (int k = 0; k < uniNames.Length; k++) { int uniPageCounter = 0; foreach (string filePath in Directory.GetFiles(@"C:\Users\Gokce\Desktop\" + uniNames[k])) { int pageIndex = Convert.ToInt32(Path.GetFileName(filePath).Split('.')[0]); pageCount++; uniPageCounter++; pageIndexes[k].Add(Convert.ToInt32(Path.GetFileName(filePath).Split('.')[0])); // Sayfalar okundu ve kucuk harfe cevrildi. byte[] byteArray = File.ReadAllBytes(filePath); string page = Encoding.UTF8.GetString(byteArray); page = page.ToLower(); // Kelimeler birbirinden ayrildi. IList <string> tokens = StandartSplitter.Split(page); // Kelimeler kok haline getirildi. List <string> stems = new List <string>(); Language tr = LanguageFactory.Create(LanguageType.Turkish); foreach (string token in tokens) { IList <Word> solutions = tr.Analyze(token); if (solutions.Count > 0) { if (!IsStopWord(solutions[solutions.Count - 1].GetStem().GetSurface())) { stems.Add(solutions[solutions.Count - 1].GetStem().GetSurface()); } } } // Vocabulary - inverted index olusturuldu. bool found = false; for (int i = 0; i < stems.Count; i++) { found = false; for (int j = 0; j < i; j++) { if (stems[j] == stems[i]) { found = true; break; } } if (!found) { if (vocabulary.Count != 0) { foreach (VocabItem item in vocabulary) { if (item.word.Equals(stems[i])) { found = true; List <int> indexes = Enumerable.Range(0, stems.Count).Where(x => stems[x] == stems[i]).ToList(); item.AddIndexes(uniNames[k], pageIndex, indexes); // break; } } } if (!found) { VocabItem myVocabItem = new VocabItem(stems[i], uniNames[k]); List <int> indexes = Enumerable.Range(0, stems.Count).Where(x => stems[x] == stems[i]).ToList(); myVocabItem.AddIndexes(uniNames[k], pageIndex, indexes); vocabulary.Add(myVocabItem); } } } } uniPageCounts.Add(uniPageCounter); } }