Esempio n. 1
0
        private List <string> GetKeywords()
        {
            Console.WriteLine("Keywords:");
            string keywords = Console.ReadLine();

            keywords = keywords.ToLower() + " ";

            IList <string> tokens = StandartSplitter.Split(keywords);
            List <string>  stems  = new List <string>();
            Language       tr     = LanguageFactory.Create(LanguageType.Turkish);
            int            ind    = 0;

            foreach (string token in tokens)
            {
                if (token.Equals("dokuz"))
                {
                    ind = tokens.IndexOf(token);
                }
            }
            tokens[ind] = "eylül";

            foreach (string token in tokens)
            {
                IList <Word> solutions = tr.Analyze(token);
                if (solutions.Count > 0)
                {
                    if (!IsStopWord(solutions[solutions.Count - 1].GetStem().GetSurface()))
                    {
                        stems.Add(solutions[solutions.Count - 1].GetStem().GetSurface());
                    }
                }
            }
            return(stems);
        }
Esempio n. 2
0
        private void Create()
        {
            for (int k = 0; k < uniNames.Length; k++)
            {
                int uniPageCounter = 0;

                foreach (string filePath in Directory.GetFiles(@"C:\Users\Gokce\Desktop\" + uniNames[k]))
                {
                    int pageIndex = Convert.ToInt32(Path.GetFileName(filePath).Split('.')[0]);
                    pageCount++;
                    uniPageCounter++;
                    pageIndexes[k].Add(Convert.ToInt32(Path.GetFileName(filePath).Split('.')[0]));

                    // Sayfalar okundu ve kucuk harfe cevrildi.
                    byte[] byteArray = File.ReadAllBytes(filePath);
                    string page      = Encoding.UTF8.GetString(byteArray);
                    page = page.ToLower();

                    // Kelimeler birbirinden ayrildi.
                    IList <string> tokens = StandartSplitter.Split(page);

                    // Kelimeler kok haline getirildi.
                    List <string> stems = new List <string>();
                    Language      tr    = LanguageFactory.Create(LanguageType.Turkish);
                    foreach (string token in tokens)
                    {
                        IList <Word> solutions = tr.Analyze(token);
                        if (solutions.Count > 0)
                        {
                            if (!IsStopWord(solutions[solutions.Count - 1].GetStem().GetSurface()))
                            {
                                stems.Add(solutions[solutions.Count - 1].GetStem().GetSurface());
                            }
                        }
                    }

                    // Vocabulary - inverted index olusturuldu.
                    bool found = false;

                    for (int i = 0; i < stems.Count; i++)
                    {
                        found = false;
                        for (int j = 0; j < i; j++)
                        {
                            if (stems[j] == stems[i])
                            {
                                found = true;
                                break;
                            }
                        }
                        if (!found)
                        {
                            if (vocabulary.Count != 0)
                            {
                                foreach (VocabItem item in vocabulary)
                                {
                                    if (item.word.Equals(stems[i]))
                                    {
                                        found = true;
                                        List <int> indexes = Enumerable.Range(0, stems.Count).Where(x => stems[x] == stems[i]).ToList();
                                        item.AddIndexes(uniNames[k], pageIndex, indexes); //
                                        break;
                                    }
                                }
                            }
                            if (!found)
                            {
                                VocabItem  myVocabItem = new VocabItem(stems[i], uniNames[k]);
                                List <int> indexes     = Enumerable.Range(0, stems.Count).Where(x => stems[x] == stems[i]).ToList();
                                myVocabItem.AddIndexes(uniNames[k], pageIndex, indexes);
                                vocabulary.Add(myVocabItem);
                            }
                        }
                    }
                }
                uniPageCounts.Add(uniPageCounter);
            }
        }