예제 #1
0
        public void Invert()
        {
            var dict  = new Dictionary <string, IList <int> >();
            var index = fileProvider.GetTextFromFile($"{ParentForderPath}/{uri.Host}/index.txt");
            var files = index.Split("\n").Select(x => x.Split(" ")).Select(x => x[2]).ToList();

            for (int i = 0; i < files.Count; i++)
            {
                var words = fileProvider.GetTextFromFile($"{StemingForder}{i}.txt").Split(' ').ToList();
                foreach (var word in words)
                {
                    if (dict.ContainsKey(word))
                    {
                        dict[word].Add(i);
                    }
                    else
                    {
                        dict.Add(word, new List <int> {
                            i
                        });
                    }
                }
            }

            foreach (var item in dict)
            {
                dict[item.Key] = item.Value.Distinct().ToList();
            }

            var invertDict = JsonConvert.SerializeObject(dict);

            fileProvider.WriteTextToFile($"{StemingForder}invertlist.txt", invertDict);
        }
예제 #2
0
 public void TF()
 {
     for (int i = 0; i < docNum; i++)
     {
         var dict  = new Dictionary <string, int>();
         var text  = fileProvider.GetTextFromFile($"{StemingForder}{i}.txt");
         var words = fileProvider.GetTextFromFile($"{StemingForder}{i}.txt").Split(' ').ToList();
         foreach (var word in words)
         {
             if (dict.ContainsKey(word))
             {
                 dict[word]++;
             }
             else
             {
                 dict.Add(word, 1);
             }
         }
         var allWords = dict.Select(x => x.Value).Sum();
         var result   = string.Join('\n', dict.Select(x => $"{x.Key} : {Math.Round((double)x.Value / allWords,5).ToString("0.00000")}").ToList());
         fileProvider.WriteTextToFile($"{tfForder}{i}.txt", result);
     }
 }
예제 #3
0
        private void StemmingFile(string path)
        {
            var             text       = fileProvider.GetTextFromFile(path);
            var             words      = new List <string>();
            MatchCollection collection = Regex.Matches(text, @"([\w]{1,})");
            var             porter     = new Porter();

            foreach (Match word in collection)
            {
                string stremmed;
                if (word.Value.Length > 4)
                {
                    stremmed = porter.Stemm(word.Value);
                }
                else
                {
                    stremmed = word.Value;
                }
                words.Add(stremmed);
            }
            var filename = Regex.Match(path, @"([\d]*.txt)");

            fileProvider.WriteTextToFile($"{ParentForderPath}/{uri.Host}/stemming/{filename.Value}", string.Join(' ', words));
        }