private static void TestTaskmanager() { var tm = new TaskManager(); var brContextFactory = new BrowsingContextFactory(); var link = "https://www.olx.ua/nedvizhimost/arenda-kvartir/dolgosrochnaya-arenda-kvartir/dnepr/"; var getPagesCountTask = new GetPagesCountActivity(link, brContextFactory.GetNew()).AsSimpleTask(tm); var printPagesCountTask = new SimpleTask(() => System.Console.WriteLine("PagesCount: {0}", getPagesCountTask.Result)); printPagesCountTask.ContinueWith(() => { var linkPage = "?page={0}"; var tasks = new ITask <IEnumerable <PreviewAdModel> > [getPagesCountTask.Result]; for (int i = 1; i <= getPagesCountTask.Result; i++) { var procLink = i == 1 ? link : link + string.Format(linkPage, i); tasks[i - 1] = new GetPreviewModelsActivity(procLink, brContextFactory).AsSimpleTask(tm); } tasks.ForEach(s => s.ContinueWith(() => { return(new SimpleTask(() => { s.Result.ForEach(k => System.Console.WriteLine(k.Price)); })); })); /* tasks.ContinueWith(new SimpleTask(() => * { * System.Console.WriteLine("Completed"); * }));*/ return(tasks); }).ContinueWith(() => { return(new SimpleTask(() => System.Console.WriteLine("Completed"))); }); getPagesCountTask.ContinueWith(printPagesCountTask); tm.AddTask(getPagesCountTask); tm.Start(); }
private List <PreviewAdModel> GetPreviewModels() { List <PreviewAdModel> res = new List <PreviewAdModel>(); for (int j = 0; j < _maxEfforts; j++) { //TODO: Create logging Console.WriteLine("Processing page: {0}", _link); var regexPattern = "ID(.*).html"; var brContext = _brContextFactory.GetNew(); var task = brContext.OpenAsync(_link); var doc = task.Result; var offers = doc.QuerySelectorAll(".offer"); if (!offers.Any()) { Thread.Sleep(3000); } else { var k = 1; foreach (var offer in offers) { try { // TODO: Create logging Console.WriteLine("\tProcessing offer: {0}", k); var resLink = new PreviewAdModel(); resLink.AdId = uint.Parse(offer.Children.First().Attributes["data-id"].Value); resLink.TempId = Regex.Match(offer.QuerySelector(".detailsLink").Attributes["href"].Value, regexPattern).Groups[1 ].Value; var priceString = offer.QuerySelector(".price").Children.First().InnerHtml; resLink.Price = double.Parse(Regex.Replace(priceString, "[А-Яа-яA-Za-z$ .]", "")); resLink.PhoneNumbers.AddRange(GetPhoneNumbers(resLink.TempId, _brContextFactory.GetNew())); res.Add(resLink); k++; } catch (Exception ex) { //TODO: Create logging Console.WriteLine(ex.Message); System.Diagnostics.Debug.WriteLine(ex.ToString()); } } break; } } return(res); }
private static List <PreviewAdModel> GetPreviewModels(string link) { var res = new List <PreviewAdModel>(); var brContextFactory = new BrowsingContextFactory(); var linkPage = "?page={0}"; var pagesCount = GetPagesCount(link, brContextFactory.GetNew()); for (int i = 1; i <= pagesCount; i++) { var procLink = i == 1 ? link : link + string.Format(linkPage, i); res.AddRange(GetPreviewModels(brContextFactory, procLink)); System.Console.WriteLine(); } return(res); }
private static List <AdModel> GetLinks(string link) { var res = new List <AdModel>(); var brContextFactory = new BrowsingContextFactory(); Logger.Info(String.Format("Start processing for: {0}", link)); var linkPage = "?page={0}"; var regexPattern = "ID(.*).html"; var brContext = brContextFactory.GetNew(); var processedIds = new List <string>(); var pagesCount = GetPagesCount(link, brContextFactory.GetNew()); Logger.Info(String.Format("Total pages: {0}", pagesCount)); for (int i = 1; i <= pagesCount; i++) { for (int j = 0; j < 3; j++) { Logger.Info("Processing page: {0}", i); var procLink = i == 1 ? link : link + String.Format(linkPage, i); var task = brContext.OpenAsync(procLink); var doc = task.Result; var rawLinks = doc.QuerySelectorAll(".marginright5.link.linkWithHash.detailsLink"); if (rawLinks.Count() == 0) { Thread.Sleep(3000); } else { var k = 1; foreach (var rawLink in rawLinks) { try { Logger.Debug(String.Format("\tProcessing link: {0}", k)); var resLink = new AdModel { Link = rawLink.Attributes["href"].Value }; resLink.TempId = Regex.Match(resLink.Link, regexPattern).Groups[1].Value; if (!processedIds.Contains(resLink.TempId)) { var priceTask = brContextFactory.GetNew().OpenAsync(rawLink.Attributes["href"].Value); resLink.PhoneNumbers.AddRange(GetPhoneNumbers(resLink.TempId, brContextFactory.GetNew())); var docAd = priceTask.Result; var priceString = docAd.QuerySelector(".pricelabel.tcenter").Children[0].InnerHtml; var isPrivateString = docAd.QuerySelector("#offerdescription > div.clr.descriptioncontent.marginbott20 > table > tbody > tr:nth-child(1) > td:nth-child(1) > table > tbody > tr > td > strong > a").InnerHtml; isPrivateString = Regex.Replace(isPrivateString, "[\t\n]", ""); resLink.IsPrivate = !isPrivateString.Equals("Бизнес"); string value = Regex.Replace(priceString, "[А-Яа-яA-Za-z$ .]", ""); resLink.Price = Double.Parse(value); string rooms = docAd.QuerySelector("#offerdescription > div.clr.descriptioncontent.marginbott20 > table > tbody > tr:nth-child(2) > td.col > table > tbody > tr > td > strong").InnerHtml; resLink.Rooms = Int32.Parse(Regex.Replace(rooms, "[А-Яа-яA-Za-z .]", "")); res.Add(resLink); processedIds.Add(resLink.TempId); k++; } else { Logger.Warn("Dublicate"); } } catch (Exception ex) { Logger.Error(ex.ToString()); } } break; } } Console.WriteLine(); } return(res); }