public void Invert() { var dict = new Dictionary <string, IList <int> >(); var index = fileProvider.GetTextFromFile($"{ParentForderPath}/{uri.Host}/index.txt"); var files = index.Split("\n").Select(x => x.Split(" ")).Select(x => x[2]).ToList(); for (int i = 0; i < files.Count; i++) { var words = fileProvider.GetTextFromFile($"{StemingForder}{i}.txt").Split(' ').ToList(); foreach (var word in words) { if (dict.ContainsKey(word)) { dict[word].Add(i); } else { dict.Add(word, new List <int> { i }); } } } foreach (var item in dict) { dict[item.Key] = item.Value.Distinct().ToList(); } var invertDict = JsonConvert.SerializeObject(dict); fileProvider.WriteTextToFile($"{StemingForder}invertlist.txt", invertDict); }
public void TF() { for (int i = 0; i < docNum; i++) { var dict = new Dictionary <string, int>(); var text = fileProvider.GetTextFromFile($"{StemingForder}{i}.txt"); var words = fileProvider.GetTextFromFile($"{StemingForder}{i}.txt").Split(' ').ToList(); foreach (var word in words) { if (dict.ContainsKey(word)) { dict[word]++; } else { dict.Add(word, 1); } } var allWords = dict.Select(x => x.Value).Sum(); var result = string.Join('\n', dict.Select(x => $"{x.Key} : {Math.Round((double)x.Value / allWords,5).ToString("0.00000")}").ToList()); fileProvider.WriteTextToFile($"{tfForder}{i}.txt", result); } }
private void StemmingFile(string path) { var text = fileProvider.GetTextFromFile(path); var words = new List <string>(); MatchCollection collection = Regex.Matches(text, @"([\w]{1,})"); var porter = new Porter(); foreach (Match word in collection) { string stremmed; if (word.Value.Length > 4) { stremmed = porter.Stemm(word.Value); } else { stremmed = word.Value; } words.Add(stremmed); } var filename = Regex.Match(path, @"([\d]*.txt)"); fileProvider.WriteTextToFile($"{ParentForderPath}/{uri.Host}/stemming/{filename.Value}", string.Join(' ', words)); }