示例#1
0
 /// <summary>
 /// Отношение числа вхождений некоторого слова к общему числу слов документа.
 /// </summary>
 public void TF()
 {
     for (int i = 0; i < docNum; i++)
     {
         var dict = new Dictionary <string, int>();
         //var text = fileProvider.GetTextFromFile($"{StemingForder}{i}.txt");
         var words = fileProvider.GetTextFromFile($"{StemingForder}{i}.txt").Split(' ').ToList();
         foreach (var word in words)
         {
             if (dict.ContainsKey(word))
             {
                 dict[word]++;
             }
             else
             {
                 dict.Add(word, 1);
             }
         }
         var allWords = words.Count;
         var result   = string.Join('\n',
                                    dict.Select(x => $"{x.Key} : {Math.Round((double) x.Value / allWords, 5).ToString("0.00000")}")
                                    .ToList());
         fileProvider.WriteTextToFile($"{tfForder}{i}.txt", result);
     }
 }
示例#2
0
        public void StartStemming()
        {
            var index = fileProvider.GetTextFromFile($"{ParentForderPath}/{uri.Host}/index.txt");
            //пути к фалам
            var files = index.Split("\n").Select(x => x.Split(" ")).Select(x => x[2]).ToList();

            //приводим к начальной форме
            foreach (var file in files)
            {
                StemmingFile(file);
            }
        }
示例#3
0
    public Search(string domain)
    {
        uri           = new Uri(domain);
        fileProvider  = new FileProvider();
        StemingForder = $"{ParentForderPath}/{uri.Host}/tokenlemma/";
        tfIdfForder   = $"{ParentForderPath}/{uri.Host}/tfidf/";
        wordMatrix    = new Dictionary <int, Dictionary <string, double> >();

        for (int i = 0; i < CountDocument; i++)
        {
            var text = fileProvider.GetTextFromFile($"{tfIdfForder}{i}.txt");
            var dict = text.Split('\n').Select(x => x.Split(":")).ToDictionary(x => x[0].Trim(), y => double.Parse(y[1]));
            wordMatrix.Add(i, dict);
        }

        if (invertDict == null)
        {
            invertDict = JsonConvert.DeserializeObject <Dictionary <string, List <int> > >(fileProvider.GetTextFromFile($"{StemingForder}invertlist.txt"));
        }
        invertDict = invertDict.ToDictionary(x => x.Key.Trim(), y => y.Value);
    }
示例#4
0
        public TfIdf(string domain, string parentForderPath)
        {
            ParentForderPath = parentForderPath;
            uri           = new Uri(domain);
            fileProvider  = new FileProvider();
            StemingForder = $"{ParentForderPath}/{uri.Host}/stemming/";
            tfForder      = $"{ParentForderPath}/{uri.Host}/tf/";
            tfIdfForder   = $"{ParentForderPath}/{uri.Host}/tf-idf/";

            if (!Directory.Exists(tfForder))
            {//Create forder for link
                Directory.CreateDirectory(tfForder);
            }

            if (!Directory.Exists(tfIdfForder))
            {//Create forder for link
                Directory.CreateDirectory(tfIdfForder);
            }
            var index = fileProvider.GetTextFromFile($"{ParentForderPath}/{uri.Host}/index.txt");
            var files = index.Split("\n").Select(x => x.Split(" ")).Select(x => x[2]).ToList();

            docNum = files.Count;
        }