public void DoMatchTest() { if (Directory.Exists(_handler.IdxDir)) { Directory.Delete(_handler.IdxDir, true); } Price price1 = session.Query <Price>().FirstOrDefault(); Price price2 = session.Query <Price>().FirstOrDefault(p => p.Id != price1.Id && p.Supplier.Id != price1.Supplier.Id); Product product = Product.Queryable.FirstOrDefault(); DateTime now = DateTime.Now; IList <string> names = new List <string>(); names.Add(String.Format("Тестовое наименование 1 ({0})", now)); names.Add(String.Format("Тестовое наименование 2 ({0})", now)); names.Add(String.Format("Тестовое наименование 3 ({0})", now)); names.Add(String.Format("Тестовое наименование 4 ({0})", now)); names.Add(String.Format("Тестовое наименование 5 ({0})", now)); IList <ProductSynonym> synonyms = new List <ProductSynonym>(); synonyms.Add(new ProductSynonym() { Product = product, Junk = false, Price = price1, Synonym = names[0] }); synonyms.Add(new ProductSynonym() { Product = product, Junk = false, Price = price1, Synonym = names[1] }); synonyms.Add(new ProductSynonym() { Product = product, Junk = false, Price = price1, Synonym = names[2] }); synonyms.Add(new ProductSynonym() { Product = product, Junk = false, Price = price2, Synonym = names[3] }); synonyms.Add(new ProductSynonym() { Product = product, Junk = false, Price = price2, Synonym = names[4] }); synonyms.Add(new ProductSynonym() { Product = product, Junk = false, Price = price1, Synonym = names[4] }); using (new TransactionScope()) { foreach (var synonymProduct in synonyms) { synonymProduct.Save(); } } if (Directory.Exists(_handler.IdxDir)) { Directory.Delete(_handler.IdxDir, true); } _handler.DoIndex(synonyms, false, true); Assert.That(Directory.Exists(_handler.IdxDir), Is.True); var files = Directory.GetFiles(_handler.IdxDir, "*.*"); Assert.That(files.Count(), Is.GreaterThan(0)); long size = 0; foreach (var file in files) { FileInfo f = new FileInfo(file); size += f.Length; } Assert.That(size, Is.GreaterThan(0)); long taskId = _handler.AddTask(names, 0); Assert.That(_handler.GetTask(taskId), Is.Not.Null); while (_handler.GetTask(taskId).State == TaskState.Running) { Thread.Sleep(1000); } var matches = _handler.GetTask(taskId).Matches; var rate = _handler.GetTask(taskId).Rate; var str_res = IndexerHandler.TransformToSynonymBox(matches); Assert.That(rate, Is.EqualTo(100)); Assert.That(matches.Count, Is.EqualTo(5)); for (int i = 1; i <= 5; i++) { Assert.That(matches.ContainsKey(String.Format("Тестовое наименование {0} ({1})", i, now.ToString()).ToUpper())); } Assert.That(matches[names[0].Trim().ToUpper()].Summary().Count, Is.EqualTo(1)); Assert.That(matches[names[1].Trim().ToUpper()].Summary().Count, Is.EqualTo(1)); Assert.That(matches[names[2].Trim().ToUpper()].Summary().Count, Is.EqualTo(1)); Assert.That(matches[names[3].Trim().ToUpper()].Summary().Count, Is.EqualTo(1)); Assert.That(matches[names[4].Trim().ToUpper()].Summary().Count, Is.EqualTo(2)); Assert.That(matches[names[0].Trim().ToUpper()].Summary()[0].FirmCode, Is.EqualTo(price1.Supplier.Id)); Assert.That(matches[names[1].Trim().ToUpper()].Summary()[0].FirmCode, Is.EqualTo(price1.Supplier.Id)); Assert.That(matches[names[2].Trim().ToUpper()].Summary()[0].FirmCode, Is.EqualTo(price1.Supplier.Id)); Assert.That(matches[names[3].Trim().ToUpper()].Summary()[0].FirmCode, Is.EqualTo(price2.Supplier.Id)); Assert.That(matches[names[4].Trim().ToUpper()].Summary()[0].FirmCode, Is.EqualTo(price2.Supplier.Id)); Assert.That(matches[names[4].Trim().ToUpper()].Summary()[1].FirmCode, Is.EqualTo(price1.Supplier.Id)); Assert.That(str_res.SynonymBox.SelectMany(s => s.SynonymList.Select(l => l)).ToList().Count, Is.EqualTo(6)); Assert.IsTrue(str_res.SynonymBox[4].SynonymList.Count == 2); Assert.IsTrue(str_res.SynonymBox[4].SynonymList[0].FirmCode == price2.Supplier.Id); Assert.IsTrue(str_res.SynonymBox[4].SynonymList[0].FirmName == price2.Supplier.Name + " (" + price1.Supplier.FullName + ")"); Assert.IsTrue(str_res.SynonymBox[4].SynonymList[0].ProductId == product.Id); Assert.IsTrue(str_res.SynonymBox[4].SynonymList[0].Junk == false); Assert.IsTrue(str_res.SynonymBox[4].SynonymList[1].FirmCode == price1.Supplier.Id); Assert.IsTrue(str_res.SynonymBox[4].SynonymList[1].FirmName == price1.Supplier.Name + " (" + price1.Supplier.FullName + ")"); Assert.IsTrue(str_res.SynonymBox[4].SynonymList[1].ProductId == product.Id); Assert.IsTrue(str_res.SynonymBox[4].SynonymList[1].Junk == false); Assert.IsTrue(str_res.SynonymBox[4].OriginalName == names[4]); names.Add(String.Format("Тестовое наименование 6 ({0})", now)); synonyms.Clear(); synonyms.Add(new ProductSynonym() { Product = product, Junk = false, Price = price1, Synonym = names[5] }); using (new TransactionScope()) { synonyms[0].Save(); } _handler.DoIndex(synonyms, true, false); taskId = _handler.AddTask(names, 0); Thread.Sleep(1000); Assert.That(_handler.GetTask(taskId), Is.Not.Null); for (int i = 0; i < 10; i++) { if (_handler.GetTask(taskId).State != TaskState.Running) { break; } Thread.Sleep(10000); } matches = _handler.GetTask(taskId).Matches; Assert.That(rate, Is.EqualTo(100)); Assert.That(matches.Count, Is.EqualTo(6)); for (int i = 1; i <= 6; i++) { Assert.That(matches.ContainsKey(String.Format("Тестовое наименование {0} ({1})", i, now.ToString()).ToUpper())); } Assert.That(matches[names[0].Trim().ToUpper()].Summary().Count, Is.EqualTo(1)); Assert.That(matches[names[1].Trim().ToUpper()].Summary().Count, Is.EqualTo(1)); Assert.That(matches[names[2].Trim().ToUpper()].Summary().Count, Is.EqualTo(1)); Assert.That(matches[names[3].Trim().ToUpper()].Summary().Count, Is.EqualTo(1)); Assert.That(matches[names[4].Trim().ToUpper()].Summary().Count, Is.EqualTo(2)); Assert.That(matches[names[5].Trim().ToUpper()].Summary().Count, Is.EqualTo(1)); Assert.That(matches[names[0].Trim().ToUpper()].Summary()[0].FirmCode, Is.EqualTo(price1.Supplier.Id)); Assert.That(matches[names[1].Trim().ToUpper()].Summary()[0].FirmCode, Is.EqualTo(price1.Supplier.Id)); Assert.That(matches[names[2].Trim().ToUpper()].Summary()[0].FirmCode, Is.EqualTo(price1.Supplier.Id)); Assert.That(matches[names[3].Trim().ToUpper()].Summary()[0].FirmCode, Is.EqualTo(price2.Supplier.Id)); Assert.That(matches[names[4].Trim().ToUpper()].Summary()[0].FirmCode, Is.EqualTo(price2.Supplier.Id)); Assert.That(matches[names[4].Trim().ToUpper()].Summary()[1].FirmCode, Is.EqualTo(price1.Supplier.Id)); Assert.That(matches[names[5].Trim().ToUpper()].Summary()[0].FirmCode, Is.EqualTo(price1.Supplier.Id)); }
static async Task Main(string[] args) { Console.WriteLine("Введите ссылку: "); var input = Console.ReadLine()?.Trim(); var filesProvider = new FilesProvider(); if (input != string.Empty) { var indexHandler = new IndexerHandler(); var worker = new HtmlWorker(input); await filesProvider.WriteAllData(worker.GetPages()); indexHandler.IndexWords(worker.HtmlInfos); await filesProvider.WriteInvertedList(indexHandler.IndexedWords); var tfIdfInfo = indexHandler.IndexedWords .Select(x => new { x.Key, Value = x.Value.Distinct() }) .Select(x => new WordFrequencyInfo { Word = x.Key, IDF = Math.Log10(worker.HtmlInfos.Count / (double)x.Value.Count()), TF = x.Value.ToDictionary(q => q, q => { var docContentInfo = worker.HtmlInfos.FirstOrDefault(z => z.Level == q)?.Content; var wordsCount = (double)docContentInfo.WordsInfo.Count(z => z.LemmatizedWord == x.Key); return(wordsCount / docContentInfo.WordsCount); }) }); await filesProvider.WriteTfIdf(tfIdfInfo); } else { var indexedWords = await filesProvider.ReadInvertedList(); Console.WriteLine("Введите запрос: "); var searchQuery = Console.ReadLine(); //searchQuery = "Android OR fb OR Биробиджан"; var query = QueryWorker.ParseQuery(searchQuery); var searchResult = indexedWords.Where(query.Compile()).ToList(); var words = searchQuery.Split(' '); var result = QueryWorker.GetDocNumbers(words, searchResult); Console.WriteLine("Результаты:"); foreach (var item in result) { Console.Write(item + " "); } } var sourceTfIdfList = await filesProvider.ReadTfIdfList(); var searchQueryTfIdf = filesProvider.GetSearchQueryTfIdf(sourceTfIdfList, new[] { "ГАБДУЛИНА" }).ToList(); filesProvider.GetRelevantDocs(await filesProvider.ReadTfIdfList(), searchQueryTfIdf); Console.WriteLine(); }