/// <summary> /// Отношение числа вхождений некоторого слова к общему числу слов документа. /// </summary> public void TF() { for (int i = 0; i < docNum; i++) { var dict = new Dictionary <string, int>(); //var text = fileProvider.GetTextFromFile($"{StemingForder}{i}.txt"); var words = fileProvider.GetTextFromFile($"{StemingForder}{i}.txt").Split(' ').ToList(); foreach (var word in words) { if (dict.ContainsKey(word)) { dict[word]++; } else { dict.Add(word, 1); } } var allWords = words.Count; var result = string.Join('\n', dict.Select(x => $"{x.Key} : {Math.Round((double) x.Value / allWords, 5).ToString("0.00000")}") .ToList()); fileProvider.WriteTextToFile($"{tfForder}{i}.txt", result); } }
public void StartStemming() { var index = fileProvider.GetTextFromFile($"{ParentForderPath}/{uri.Host}/index.txt"); //пути к фалам var files = index.Split("\n").Select(x => x.Split(" ")).Select(x => x[2]).ToList(); //приводим к начальной форме foreach (var file in files) { StemmingFile(file); } }
public Search(string domain) { uri = new Uri(domain); fileProvider = new FileProvider(); StemingForder = $"{ParentForderPath}/{uri.Host}/tokenlemma/"; tfIdfForder = $"{ParentForderPath}/{uri.Host}/tfidf/"; wordMatrix = new Dictionary <int, Dictionary <string, double> >(); for (int i = 0; i < CountDocument; i++) { var text = fileProvider.GetTextFromFile($"{tfIdfForder}{i}.txt"); var dict = text.Split('\n').Select(x => x.Split(":")).ToDictionary(x => x[0].Trim(), y => double.Parse(y[1])); wordMatrix.Add(i, dict); } if (invertDict == null) { invertDict = JsonConvert.DeserializeObject <Dictionary <string, List <int> > >(fileProvider.GetTextFromFile($"{StemingForder}invertlist.txt")); } invertDict = invertDict.ToDictionary(x => x.Key.Trim(), y => y.Value); }
public TfIdf(string domain, string parentForderPath) { ParentForderPath = parentForderPath; uri = new Uri(domain); fileProvider = new FileProvider(); StemingForder = $"{ParentForderPath}/{uri.Host}/stemming/"; tfForder = $"{ParentForderPath}/{uri.Host}/tf/"; tfIdfForder = $"{ParentForderPath}/{uri.Host}/tf-idf/"; if (!Directory.Exists(tfForder)) {//Create forder for link Directory.CreateDirectory(tfForder); } if (!Directory.Exists(tfIdfForder)) {//Create forder for link Directory.CreateDirectory(tfIdfForder); } var index = fileProvider.GetTextFromFile($"{ParentForderPath}/{uri.Host}/index.txt"); var files = index.Split("\n").Select(x => x.Split(" ")).Select(x => x[2]).ToList(); docNum = files.Count; }