public void DoMatchTest()
        {
            if (Directory.Exists(_handler.IdxDir))
            {
                Directory.Delete(_handler.IdxDir, true);
            }

            Price   price1  = session.Query <Price>().FirstOrDefault();
            Price   price2  = session.Query <Price>().FirstOrDefault(p => p.Id != price1.Id && p.Supplier.Id != price1.Supplier.Id);
            Product product = Product.Queryable.FirstOrDefault();

            DateTime now = DateTime.Now;

            IList <string> names = new List <string>();

            names.Add(String.Format("Тестовое наименование 1 ({0})", now));
            names.Add(String.Format("Тестовое наименование 2 ({0})", now));
            names.Add(String.Format("Тестовое наименование 3 ({0})", now));
            names.Add(String.Format("Тестовое наименование 4 ({0})", now));
            names.Add(String.Format("Тестовое наименование 5 ({0})", now));


            IList <ProductSynonym> synonyms = new List <ProductSynonym>();

            synonyms.Add(new ProductSynonym()
            {
                Product = product, Junk = false, Price = price1, Synonym = names[0]
            });
            synonyms.Add(new ProductSynonym()
            {
                Product = product, Junk = false, Price = price1, Synonym = names[1]
            });
            synonyms.Add(new ProductSynonym()
            {
                Product = product, Junk = false, Price = price1, Synonym = names[2]
            });
            synonyms.Add(new ProductSynonym()
            {
                Product = product, Junk = false, Price = price2, Synonym = names[3]
            });
            synonyms.Add(new ProductSynonym()
            {
                Product = product, Junk = false, Price = price2, Synonym = names[4]
            });
            synonyms.Add(new ProductSynonym()
            {
                Product = product, Junk = false, Price = price1, Synonym = names[4]
            });

            using (new TransactionScope()) {
                foreach (var synonymProduct in synonyms)
                {
                    synonymProduct.Save();
                }
            }

            if (Directory.Exists(_handler.IdxDir))
            {
                Directory.Delete(_handler.IdxDir, true);
            }

            _handler.DoIndex(synonyms, false, true);

            Assert.That(Directory.Exists(_handler.IdxDir), Is.True);
            var files = Directory.GetFiles(_handler.IdxDir, "*.*");

            Assert.That(files.Count(), Is.GreaterThan(0));
            long size = 0;

            foreach (var file in files)
            {
                FileInfo f = new FileInfo(file);
                size += f.Length;
            }
            Assert.That(size, Is.GreaterThan(0));
            long taskId = _handler.AddTask(names, 0);

            Assert.That(_handler.GetTask(taskId), Is.Not.Null);
            while (_handler.GetTask(taskId).State == TaskState.Running)
            {
                Thread.Sleep(1000);
            }
            var matches = _handler.GetTask(taskId).Matches;
            var rate    = _handler.GetTask(taskId).Rate;
            var str_res = IndexerHandler.TransformToSynonymBox(matches);

            Assert.That(rate, Is.EqualTo(100));
            Assert.That(matches.Count, Is.EqualTo(5));
            for (int i = 1; i <= 5; i++)
            {
                Assert.That(matches.ContainsKey(String.Format("Тестовое наименование {0} ({1})", i, now.ToString()).ToUpper()));
            }
            Assert.That(matches[names[0].Trim().ToUpper()].Summary().Count, Is.EqualTo(1));
            Assert.That(matches[names[1].Trim().ToUpper()].Summary().Count, Is.EqualTo(1));
            Assert.That(matches[names[2].Trim().ToUpper()].Summary().Count, Is.EqualTo(1));
            Assert.That(matches[names[3].Trim().ToUpper()].Summary().Count, Is.EqualTo(1));
            Assert.That(matches[names[4].Trim().ToUpper()].Summary().Count, Is.EqualTo(2));
            Assert.That(matches[names[0].Trim().ToUpper()].Summary()[0].FirmCode, Is.EqualTo(price1.Supplier.Id));
            Assert.That(matches[names[1].Trim().ToUpper()].Summary()[0].FirmCode, Is.EqualTo(price1.Supplier.Id));
            Assert.That(matches[names[2].Trim().ToUpper()].Summary()[0].FirmCode, Is.EqualTo(price1.Supplier.Id));
            Assert.That(matches[names[3].Trim().ToUpper()].Summary()[0].FirmCode, Is.EqualTo(price2.Supplier.Id));
            Assert.That(matches[names[4].Trim().ToUpper()].Summary()[0].FirmCode, Is.EqualTo(price2.Supplier.Id));
            Assert.That(matches[names[4].Trim().ToUpper()].Summary()[1].FirmCode, Is.EqualTo(price1.Supplier.Id));
            Assert.That(str_res.SynonymBox.SelectMany(s => s.SynonymList.Select(l => l)).ToList().Count, Is.EqualTo(6));
            Assert.IsTrue(str_res.SynonymBox[4].SynonymList.Count == 2);
            Assert.IsTrue(str_res.SynonymBox[4].SynonymList[0].FirmCode == price2.Supplier.Id);
            Assert.IsTrue(str_res.SynonymBox[4].SynonymList[0].FirmName == price2.Supplier.Name + " (" + price1.Supplier.FullName + ")");
            Assert.IsTrue(str_res.SynonymBox[4].SynonymList[0].ProductId == product.Id);
            Assert.IsTrue(str_res.SynonymBox[4].SynonymList[0].Junk == false);
            Assert.IsTrue(str_res.SynonymBox[4].SynonymList[1].FirmCode == price1.Supplier.Id);
            Assert.IsTrue(str_res.SynonymBox[4].SynonymList[1].FirmName == price1.Supplier.Name + " (" + price1.Supplier.FullName + ")");
            Assert.IsTrue(str_res.SynonymBox[4].SynonymList[1].ProductId == product.Id);
            Assert.IsTrue(str_res.SynonymBox[4].SynonymList[1].Junk == false);
            Assert.IsTrue(str_res.SynonymBox[4].OriginalName == names[4]);

            names.Add(String.Format("Тестовое наименование 6 ({0})", now));
            synonyms.Clear();
            synonyms.Add(new ProductSynonym()
            {
                Product = product, Junk = false, Price = price1, Synonym = names[5]
            });
            using (new TransactionScope()) {
                synonyms[0].Save();
            }
            _handler.DoIndex(synonyms, true, false);

            taskId = _handler.AddTask(names, 0);
            Thread.Sleep(1000);
            Assert.That(_handler.GetTask(taskId), Is.Not.Null);
            for (int i = 0; i < 10; i++)
            {
                if (_handler.GetTask(taskId).State != TaskState.Running)
                {
                    break;
                }
                Thread.Sleep(10000);
            }
            matches = _handler.GetTask(taskId).Matches;
            Assert.That(rate, Is.EqualTo(100));
            Assert.That(matches.Count, Is.EqualTo(6));
            for (int i = 1; i <= 6; i++)
            {
                Assert.That(matches.ContainsKey(String.Format("Тестовое наименование {0} ({1})", i, now.ToString()).ToUpper()));
            }
            Assert.That(matches[names[0].Trim().ToUpper()].Summary().Count, Is.EqualTo(1));
            Assert.That(matches[names[1].Trim().ToUpper()].Summary().Count, Is.EqualTo(1));
            Assert.That(matches[names[2].Trim().ToUpper()].Summary().Count, Is.EqualTo(1));
            Assert.That(matches[names[3].Trim().ToUpper()].Summary().Count, Is.EqualTo(1));
            Assert.That(matches[names[4].Trim().ToUpper()].Summary().Count, Is.EqualTo(2));
            Assert.That(matches[names[5].Trim().ToUpper()].Summary().Count, Is.EqualTo(1));
            Assert.That(matches[names[0].Trim().ToUpper()].Summary()[0].FirmCode, Is.EqualTo(price1.Supplier.Id));
            Assert.That(matches[names[1].Trim().ToUpper()].Summary()[0].FirmCode, Is.EqualTo(price1.Supplier.Id));
            Assert.That(matches[names[2].Trim().ToUpper()].Summary()[0].FirmCode, Is.EqualTo(price1.Supplier.Id));
            Assert.That(matches[names[3].Trim().ToUpper()].Summary()[0].FirmCode, Is.EqualTo(price2.Supplier.Id));
            Assert.That(matches[names[4].Trim().ToUpper()].Summary()[0].FirmCode, Is.EqualTo(price2.Supplier.Id));
            Assert.That(matches[names[4].Trim().ToUpper()].Summary()[1].FirmCode, Is.EqualTo(price1.Supplier.Id));
            Assert.That(matches[names[5].Trim().ToUpper()].Summary()[0].FirmCode, Is.EqualTo(price1.Supplier.Id));
        }
Example #2
0
        static async Task Main(string[] args)
        {
            Console.WriteLine("Введите ссылку: ");
            var input = Console.ReadLine()?.Trim();

            var filesProvider = new FilesProvider();

            if (input != string.Empty)
            {
                var indexHandler = new IndexerHandler();
                var worker       = new HtmlWorker(input);

                await filesProvider.WriteAllData(worker.GetPages());

                indexHandler.IndexWords(worker.HtmlInfos);

                await filesProvider.WriteInvertedList(indexHandler.IndexedWords);

                var tfIdfInfo = indexHandler.IndexedWords
                                .Select(x => new { x.Key, Value = x.Value.Distinct() })
                                .Select(x =>
                                        new WordFrequencyInfo
                {
                    Word = x.Key,
                    IDF  = Math.Log10(worker.HtmlInfos.Count / (double)x.Value.Count()),
                    TF   = x.Value.ToDictionary(q => q, q =>
                    {
                        var docContentInfo = worker.HtmlInfos.FirstOrDefault(z => z.Level == q)?.Content;

                        var wordsCount =
                            (double)docContentInfo.WordsInfo.Count(z => z.LemmatizedWord == x.Key);

                        return(wordsCount / docContentInfo.WordsCount);
                    })
                });

                await filesProvider.WriteTfIdf(tfIdfInfo);
            }
            else
            {
                var indexedWords = await filesProvider.ReadInvertedList();

                Console.WriteLine("Введите запрос: ");
                var searchQuery = Console.ReadLine();

                //searchQuery = "Android OR fb OR Биробиджан";

                var query        = QueryWorker.ParseQuery(searchQuery);
                var searchResult = indexedWords.Where(query.Compile()).ToList();

                var words = searchQuery.Split(' ');

                var result = QueryWorker.GetDocNumbers(words, searchResult);

                Console.WriteLine("Результаты:");
                foreach (var item in result)
                {
                    Console.Write(item + " ");
                }
            }

            var sourceTfIdfList = await filesProvider.ReadTfIdfList();

            var searchQueryTfIdf = filesProvider.GetSearchQueryTfIdf(sourceTfIdfList, new[] { "ГАБДУЛИНА" }).ToList();

            filesProvider.GetRelevantDocs(await filesProvider.ReadTfIdfList(), searchQueryTfIdf);
            Console.WriteLine();
        }