示例#1
0
        public void Invert()
        {
            var dict  = new Dictionary <string, IList <int> >();
            var index = fileProvider.GetTextFromFile($"{ParentForderPath}/{uri.Host}/index.txt");
            var files = index.Split("\n").Select(x => x.Split(" ")).Select(x => x[2]).ToList();

            for (int i = 0; i < files.Count; i++)
            {
                var words = fileProvider.GetTextFromFile($"{StemingForder}{i}.txt").Split(' ').ToList();
                foreach (var word in words)
                {
                    if (dict.ContainsKey(word))
                    {
                        dict[word].Add(i);
                    }
                    else
                    {
                        dict.Add(word, new List <int> {
                            i
                        });
                    }
                }
            }

            foreach (var item in dict)
            {
                dict[item.Key] = item.Value.Distinct().ToList();
            }

            var invertDict = JsonConvert.SerializeObject(dict);

            fileProvider.WriteTextToFile($"{StemingForder}invertlist.txt", invertDict);
        }
示例#2
0
        public void StartStemming()
        {
            var index = fileProvider.GetTextFromFile($"{ParentForderPath}/{uri.Host}/index.txt");
            var files = index.Split("\n").Select(x => x.Split(" ")).Select(x => x[2]).ToList();

            foreach (var file in files)
            {
                StemmingFile(file);
            }
        }
示例#3
0
        public TfIdf(string domain)
        {
            uri           = new Uri(domain);
            fileProvider  = new FileProvider();
            StemingForder = $"{ParentForderPath}/{uri.Host}/stemming/";
            tfForder      = $"{ParentForderPath}/{uri.Host}/tf/";
            tfIdfForder   = $"{ParentForderPath}/{uri.Host}/tf-idf/";

            if (!Directory.Exists(tfForder))
            {//Create forder for link
                Directory.CreateDirectory(tfForder);
            }

            if (!Directory.Exists(tfIdfForder))
            {//Create forder for link
                Directory.CreateDirectory(tfIdfForder);
            }
            var index = fileProvider.GetTextFromFile($"{ParentForderPath}/{uri.Host}/index.txt");
            var files = index.Split("\n").Select(x => x.Split(" ")).Select(x => x[2]).ToList();

            docNum = files.Count;
        }
示例#4
0
        public Search(string domain)
        {
            uri           = new Uri(domain);
            fileProvider  = new FileProvider();
            StemingForder = $"{ParentForderPath}/{uri.Host}/stemming/";
            //tfForder = $"{ParentForderPath}/{uri.Host}/tf/";
            tfIdfForder = $"{ParentForderPath}/{uri.Host}/tf-idf/";
            wordMatrix  = new Dictionary <int, Dictionary <string, double> >();

            for (int i = 0; i < CountDocument; i++)
            {
                var text = fileProvider.GetTextFromFile($"{tfIdfForder}{i}.txt");
                var dict = text.Split('\n').Select(x => x.Split(":")).ToDictionary(x => x[0].Trim(), y => double.Parse(y[1]));
                wordMatrix.Add(i, dict);
            }

            if (invertDict == null)
            {
                invertDict = JsonConvert.DeserializeObject <Dictionary <string, List <int> > >(fileProvider.GetTextFromFile($"{StemingForder}invertlist.txt"));
            }
            invertDict = invertDict.ToDictionary(x => x.Key.Trim(), y => y.Value);
        }
示例#5
0
 public void TF()
 {
     for (int i = 0; i < docNum; i++)
     {
         var dict  = new Dictionary <string, int>();
         var text  = fileProvider.GetTextFromFile($"{StemingForder}{i}.txt");
         var words = fileProvider.GetTextFromFile($"{StemingForder}{i}.txt").Split(' ').ToList();
         foreach (var word in words)
         {
             if (dict.ContainsKey(word))
             {
                 dict[word]++;
             }
             else
             {
                 dict.Add(word, 1);
             }
         }
         var allWords = dict.Select(x => x.Value).Sum();
         var result   = string.Join('\n', dict.Select(x => $"{x.Key} : {Math.Round((double)x.Value / allWords,5).ToString("0.00000")}").ToList());
         fileProvider.WriteTextToFile($"{tfForder}{i}.txt", result);
     }
 }