private void StartParse(string currentUrl) { ParsePagesCount(currentUrl); if (Abort) { return; } using (_writer = File.Create(FileName)) { if (FileName.Contains("xlsx")) { InitXlsxDoc(); } for (var i = 1; i <= EndPage; i++) { ParsePageLinks(ref i, currentUrl); if (Abort) { return; } } if (FileName.Contains("xlsx")) { _xssf.Write(_writer); } } OnLogResult?.Invoke(new LogItem { Status = "OK", Result = $"Все страницы категории {currentUrl} просканированы!" }); }
public void Start() { if (Abort) { return; } try { _client = Proxy != null ? new WebClient { Proxy = new WebProxy(Proxy), Encoding = Encoding.UTF8 } : new WebClient { Encoding = Encoding.UTF8 }; } catch (Exception exception) { OnLogResult?.Invoke(new LogItem { Status = "Error", Result = exception.Message }); Abort = true; return; } CheckCategory(ParseUrl); }
private void AbortParser() { if (FileName.Contains("xlsx")) { _xssf.Write(_writer); } OnLogResult?.Invoke(new LogItem { Status = "Warning", Result = "Сканирование прервано!" }); Abort = true; EndPage = 1; }
private void ParseItem(ref string itemUrl, ref int i, ref int j) { _htmlDocument = new HtmlDocument(); try { _url = _client.DownloadString(itemUrl); } catch (Exception exception) { OnLogResult?.Invoke(new LogItem { Status = "Error", Result = exception.Message }); AbortParser(); return; } _htmlDocument.LoadHtml(_url); var items = _htmlDocument .DocumentNode.Descendants("div") .Where(node => node.GetAttributeValue("class", "") .Equals("x-product-page")).ToArray(); foreach (var item in items) { var itemName = item .Descendants("h1").FirstOrDefault(node => node .GetAttributeValue("class", "") .Equals("x-title"))?.InnerText.Trim().Replace(""", "\"").Replace("&", "&"); var itemAvailability = item .Descendants("div").FirstOrDefault(node => node .GetAttributeValue("class", "") .Contains("x-product-presence"))?.InnerText.Trim(); var itemPrice = item .Descendants("div").FirstOrDefault(node => node .GetAttributeValue("class", "") .Equals("x-product-sticky__price"))?.InnerText.Trim(); var itemCode = item .Descendants("div").FirstOrDefault(node => node .GetAttributeValue("class", "") .Contains("x-product-info__identity-item"))?.InnerText.Trim().Replace(" ", " "); var itemPhone = item .Descendants("span").FirstOrDefault(node => node .GetAttributeValue("class", "") .Contains( "js-product-ad-conv-action x-pseudo-link")) ?.GetAttributeValue("data-pl-phones", ""); WrieteResult(ref itemName, ref itemAvailability, ref itemPrice, ref itemCode, ref itemPhone); } OnLogResult?.Invoke(new LogItem { Status = "OK", Result = $"Готова товар № {j++} на странице {i} из {EndPage}" }); }
private void CheckCategory(string categoryUrl) { if (Abort) { return; } _htmlDocument = new HtmlDocument(); try { _url = _client.DownloadString(categoryUrl); } catch (Exception exception) { OnLogResult?.Invoke(new LogItem { Status = "Error", Result = exception.Message }); Abort = true; return; } _htmlDocument.LoadHtml(_url); var categories = _htmlDocument .DocumentNode.Descendants("div") .Where(node => node.GetAttributeValue("class", "") .Equals("x-category-tile__content")).ToArray(); if (categories.Length == 0) { StartParse(categoryUrl); } else { foreach (var category in categories) { var nextCategoryUrl = category .Descendants("a").FirstOrDefault(node => node .GetAttributeValue("class", "") .Equals("x-category-tile__title"))?.GetAttributeValue("href", ""); CheckCategory("https://prom.ua" + nextCategoryUrl); } } if (Abort) { return; } OnLogResult?.Invoke(new LogItem { Status = "OK", Result = "Все категории просканированы!" }); }
public void AllCountries() { OnLogResult?.Invoke(new LogItem { Status = "Ok", Result = "Начинаю сканирование." }); var html = _webClient.DownloadString("http://spys.one/proxys"); var htmlDocument = new HtmlDocument(); htmlDocument.LoadHtml(html); var htmlNodes = htmlDocument .DocumentNode.Descendants("td") .Where(node => node.GetAttributeValue("class", "") .Contains("menu")).ToArray(); foreach (var htmlNode in htmlNodes) { var url = htmlNode.Descendants("a").FirstOrDefault()?.GetAttributeValue("href", null); if (url == null) { continue; } OneCountry("http://spys.one" + url); OnLogResult?.Invoke(new LogItem { Status = "Ok", Result = "Готова страна " + $"{htmlNode.InnerText.Trim().Substring(0, htmlNode.InnerText.Trim().IndexOf(")", StringComparison.Ordinal) + 1)}" }); if (!Stop) { continue; } OnLogResult?.Invoke(new LogItem { Status = "Warning", Result = "Сканирование остановлено." }); break; } using (var writer = File.Create(FileName)) { _xssf.Write(writer); } if (!Stop) { OnLogResult?.Invoke(new LogItem { Status = "Ok", Result = "Сканирование завершено." }); } }
private void ParsePagesCount(string currentUrl) { _htmlDocument = new HtmlDocument(); try { _url = _client.DownloadString(currentUrl); } catch (Exception exception) { OnLogResult?.Invoke(new LogItem { Status = "Error", Result = exception.Message }); Abort = true; return; } _htmlDocument.LoadHtml(_url); var pagesCount = _htmlDocument .DocumentNode.Descendants("a") .Where(node => node.GetAttributeValue("class", "") .Equals("x-pager__item")).ToArray(); EndPage = pagesCount.Length != 0 ? int.Parse(pagesCount[pagesCount.Length - 1].InnerText) : 1; }
private void ParsePageLinks(ref int i, string currentUrl) { _htmlDocument = new HtmlDocument(); try { _url = _client.DownloadString(currentUrl + ";" + i); } catch (Exception exception) { OnLogResult?.Invoke(new LogItem { Status = "Error", Result = exception.Message }); AbortParser(); return; } _htmlDocument.LoadHtml(_url); var pageUrls = _htmlDocument .DocumentNode.Descendants("a") .Where(node => node.GetAttributeValue("class", "") .Equals("x-gallery-tile__name")).ToArray(); var j = 1; foreach (var pageUrl in pageUrls) { if (Abort) { AbortParser(); return; } var itemUrl = pageUrl.GetAttributeValue("href", ""); ParseItem(ref itemUrl, ref i, ref j); } OnLogResult?.Invoke(new LogItem { Status = "OK", Result = $"Готова страница № {i} из {EndPage}" }); }
//public static event StateChanged OnStateChanged; //public static event ProgressChanged OnProgressChangedAll; //public static event ProgressChanged OnProgressChangedCurrent; //public static void RaiseOnProgressChangedCurrent(double result) //{ // var handler = OnProgressChangedCurrent; // handler?.Invoke(result); //} //public static void RaiseOnProgressChangedAll(double result) //{ // var handler = OnProgressChangedAll; // handler?.Invoke(result); //} //public static void RaiseOnStateChanged() //{ // var handler = OnStateChanged; // handler?.Invoke(); //} public static void RaiseOnResult(string result) { OnLogResult?.Invoke(new LogItem { Result = result }); }
public void RaiseOnResult(string status, string result) { OnLogResult?.Invoke(new LogItem { Status = status, Result = result }); }