public static void Parce(IConnectionStringBuilder conn, ICPath path) { List <AD> listAD = new List <AD>(); IDataParser paserProduct = new ParcerProduct(listAD); paserProduct.SetConnection(conn); paserProduct.Select(path); HashSet <AD> distinctAD = new HashSet <AD>(listAD, new SameADComparer()); ADMapper mapper = new ADMapper(Directory.GetCurrentDirectory() + "\\storage.xml"); HashSet <AD> storedListAD = new HashSet <AD>(new SameADComparer()); foreach (AD o in mapper.ReadAll()) { storedListAD.Add(o); } distinctAD = new HashSet <AD>(distinctAD.Except(storedListAD, new SameADComparer()), new SameADComparer()); if (storedListAD.SequenceEqual(storedListAD.Union(distinctAD, new SameADComparer()), new SameADComparer()) == false) { // ShowBalloon("Новые товары", GetShortInfoAD(distinctAD, 3)); foreach (AD ad in distinctAD) { Console.WriteLine(ad); } listAD = storedListAD.Union(distinctAD).ToList(); mapper.WriteAll(listAD); } }
public override void Select(ICPath cPath) { string conn = connectionStringBuilder.GetCeonnectionString() + cPath.GetPath().Replace(connectionStringBuilder.GetCeonnectionString(), ""); string content = getRequest(conn); _doc = GetDocument(ref content); //SaveToLog(content, "logProduct.txt"); RemoveOffset(_doc); }
public IEnumerable<AdOlxDirty> Get() { Select(_currentPath); HtmlNodeCollection nodes = _doc?.DocumentNode?.SelectNodes(".//tr[@class='wrap']"); // HtmlNodeCollection nodes = doc.DocumentNode.SelectNodes(".//[not(parent::table[@class='fixed offers breakword no-results-table'])]//tr[@class='wrap']"); if (nodes == null) yield break; foreach (HtmlNode title in nodes) { string s = title.InnerHtml; string location = ""; try { HtmlNode n = title.SelectSingleNode(".//td[@valign='bottom']"); n = n.SelectSingleNode(".//div[@class='space rel']"); location = n.SelectSingleNode(".//small [@class='breadcrumb x-normal']").InnerText.Trim(); } catch { } int id = 0; try { id = int.Parse(title.SelectSingleNode(".//table").Attributes["data-id"].Value); } catch { } string sNodePrev1 = ".//h3[@class='x-large lheight20 margintop5']"; string sNodePrev2 = ".//h3[@class='lheight22 margintop5']"; HtmlNode node = title.SelectSingleNode(sNodePrev1) ?? title.SelectSingleNode(sNodePrev2); ; string sTitle = node.SelectSingleNode(".//strong").InnerText; string href = node.SelectSingleNode(".//a").Attributes["href"].Value; node = title.SelectSingleNode(".//p[@class='price']").SelectSingleNode(".//strong"); string sPrice = node.InnerText.Replace(" ", ""); yield return new AdOlxDirty { Id = id, Title = sTitle, Href = href, PriceWithCurrency = sPrice, Location = location }; } if (TryParseNextPage(_doc, out string nextHref)) { Thread.Sleep(new Random().Next(1000, 3000)); _initPath = new CPath(nextHref); foreach (var o in Get()) { yield return o; } } }
public override void Select(ICPath cPath) { string stringConnect = connectionStringBuilder.GetCeonnectionString(); string href = cPath.GetPath(); href = stringConnect + href.Replace(stringConnect, ""); string content = getRequest(href); HtmlDocument doc = GetDocument(ref content); SaveToLog(href + "\n" + content, "logMenu.txt"); state.SelectMenu(doc); }
override public void Select(ICPath cPath) { string conn = connectionStringBuilder.GetCeonnectionString() + cPath.GetPath().Replace(connectionStringBuilder.GetCeonnectionString(), ""); string content = getRequest(conn); HtmlDocument doc = GetDocument(ref content); SaveToLog(content, "logProduct.txt"); RemoveOffset(doc); PrintProduct(doc); if (TryParseNextPage(doc, out string href)) { Thread.Sleep(new Random().Next(1000, 3000)); Select(new CPath(href)); } }
public IEnumerable <ProductDirty> Get() { Select(_currentPath); HtmlNodeCollection nodes = _doc?.DocumentNode? .SelectNodes(".//div[@class='g-i-tile-l g-i-tile-catalog-hover-left-side clearfix']")?.FirstOrDefault() .SelectNodes(".//div[@class='g-i-tile g-i-tile-catalog']"); // HtmlNodeCollection nodes = doc.DocumentNode.SelectNodes(".//[not(parent::table[@class='fixed offers breakword no-results-table'])]//tr[@class='wrap']"); if (nodes == null) { yield break; } foreach (HtmlNode adProductNode in nodes) { string sId = adProductNode.SelectSingleNode(".//div[@class='g-id']").InnerText.Trim(); int.TryParse(sId, out int id); HtmlNode nameAndHrefNode = adProductNode.SelectSingleNode(".//div[@class='g-i-tile-i-title clearfix']"); string href = nameAndHrefNode.SelectSingleNode(".//a").Attributes["href"].Value; string title = nameAndHrefNode.InnerText.Trim(); string category = "видеокарты"; string priceWithCurrency = adProductNode.SelectSingleNode(".//div[@class='g-price-uah']")?.InnerText.Trim(); yield return(new ProductDirty { Id = id.ToString(), Title = title, Href = href, PriceWithCurrency = priceWithCurrency ?? "0 грн.", Category = category }); } if (TryParseNextPage(_doc, out string nextHref)) { _currentPageNum++; Thread.Sleep(new Random().Next(1000, 3000)); _currentPath = new CPath(nextHref.Replace(connectionStringBuilder.GetCeonnectionString(), string.Empty)); foreach (var o in Get()) { yield return(o); } } }
public ParcerProduct(IConnectionStringBuilder conn, ICPath initPath) { SetConnection(conn); _initPath = initPath; ; }
public IEnumerator<AdOlxDirty> GetEnumerator() { _currentPath = new CPath(_initPath.GetPath()); return Get().GetEnumerator(); }
public abstract void Select(ICPath cPath);
abstract public void Select(ICPath cPath);
public IEnumerator <ProductDirty> GetEnumerator() { _currentPageNum = Constraints.FIRST_PAGE_INDEX; _currentPath = new CPath(_initPath.GetPath()); return(Get().GetEnumerator()); }