protected override void ScrapProductFromUrl(string url, HtmlNode HtmlNode, WebScrapperBaseProxyEntity proxyInfo) { if (!ReferenceEquals(HtmlNode, null)) { var scrapper = new BaseScrapper(HtmlNode, proxyInfo, _l, url); try { AkeneoProduct product = scrapper.ScrappingInstance(requestScrappingSite); ShareSale.AddLinkToProcessing(product.productUrl); var delay = 1000; bool shouldGo = false; while (shouldGo) { var _c = ShareSale.GetLinkSolution(product.productUrl); if (!ReferenceEquals(_c, null)) { product.productUrl = _c; break; } Thread.Sleep(delay); } if (!ReferenceEquals(product, null)) { Akeneo.ProcessProduct(product); } else { _l.error("Error in creating AkeneoProduct object, you can find more information in application error log"); } } catch (Exception e) { _l.error($"Cannot create product from url {url} : {e.Message} -> {e.StackTrace}."); } return; } }
public ChromeDriverResolver(BaseWebDriverStrategy options, WebScrapperBaseProxyEntity _proxy, string url, BaseLogger _logger) { settings = options; _l = _logger; _baseUrl = url; _d = new DirectoriesService(); _f = new FilesWriter(); _ps = _proxy; DriverDependenciesDirectory = $@"{Directory.GetCurrentDirectory()}/Scrapper/Resources/scripts/ChromeDriver/js/"; DriverResourcesDirectory = $"{Directory.GetCurrentDirectory()}/Scrapper/Resources/"; DriverBaseExtensionsPath = $"{DriverResourcesDirectory}selenium/chrome/proxy.plugin/"; }
protected WebScrapperBaseProxyEntity CheckProxyState(WebScrapperBaseProxyEntity proxy) { const string NCSI_TEST_URL = "http://www.msftncsi.com/ncsi.txt"; const string NCSI_TEST_RESULT = "Microsoft NCSI"; const string NCSI_DNS = "dns.msftncsi.com"; const string NCSI_DNS_IP_ADDRESS = "131.107.255.255"; try { using (var _wc = new WebClient()) { var CurrentProxy = new WebProxy(proxy.ProxyUrl, proxy.ProxyPort); CurrentProxy.Credentials = new NetworkCredential(proxy.AuthLogin, proxy.AuthPassword); _wc.Proxy = CurrentProxy; var _ts = _wc.DownloadString(NCSI_TEST_URL); if (_ts != NCSI_TEST_RESULT) { proxy.IsProxyAvailable = false; return(proxy); } var DnsHost = Dns.GetHostEntry(NCSI_DNS); if (DnsHost.AddressList.Length <= 0 || DnsHost.AddressList[0].ToString() != NCSI_DNS_IP_ADDRESS) { proxy.IsProxyAvailable = false; return(proxy); } proxy.IsProxyAvailable = true; CheckProxiesInvoker(); return(proxy); } } catch (WebException) { _l.warn($"Proxy [{proxy.ProxyUrl}:{proxy.ProxyPort}] are not stable, skipping"); proxy.IsProxyAvailable = false; return(proxy); } catch (System.Net.Sockets.SocketException) { _l.warn($"Proxy [{proxy.ProxyUrl}:{proxy.ProxyPort}] are not stable, skipping"); proxy.IsProxyAvailable = false; return(proxy); } }
protected void ConvertProxies() { if (ReferenceEquals(ProxyFileEntitiesSeparator, null) || ProxyFileEntitiesSeparator.Equals(String.Empty)) { _l.warn("Proxy service: empty or invalid entities delimiter"); return; } try { ProxyesList = new List <WebScrapperBaseProxyEntity>(); foreach (string l in RequestProxyes) { string[] _t = l.Split(ProxyFileEntitiesSeparator); if (!ReferenceEquals(_t, null) && _t.Length > 0) { var Proxy = new WebScrapperBaseProxyEntity(); Proxy.IsProxyAvailable = false; if (ProxyFileHostEntityIndex > -1 && !ReferenceEquals(_t[ProxyFileHostEntityIndex], null)) { Proxy.ProxyUrl = _t[ProxyFileHostEntityIndex]; } if (ProxyFilePortEntityIndex > -1 && !ReferenceEquals(_t[ProxyFilePortEntityIndex], null)) { Proxy.ProxyPort = int.Parse(_t[ProxyFilePortEntityIndex]); } if (ProxyFileLoginIndex > -1 && !ReferenceEquals(_t[ProxyFileLoginIndex], null)) { Proxy.AuthLogin = _t[ProxyFileLoginIndex]; } if (ProxyFilePasswordIndex > -1 && !ReferenceEquals(_t[ProxyFilePasswordIndex], null)) { Proxy.AuthPassword = _t[ProxyFilePasswordIndex]; } ProxyesList.Add(Proxy); } } } catch (Exception e) { _l.error($"Proxy service: some errors occured, during converting proxies to the application entities: {e.Message} -> {e.StackTrace}"); } }
protected abstract void ScrapProductFromUrl(string url, HtmlNode HtmlNode, WebScrapperBaseProxyEntity proxyInfo);
protected override void ScrapProductFromUrl(string url, HtmlNode htmlNode, WebScrapperBaseProxyEntity proxyInfo) { if (!ReferenceEquals(htmlNode, null)) { new Thread(() => { try { var scrapper = new BaseScrapper(htmlNode, proxyInfo, _l, url); AkeneoProduct product = scrapper.ScrappingInstance(requestScrappingSite); if ((ReferenceEquals(product.productCategory, null)) && !ReferenceEquals(requestScrappingSite.CollectionsProcessor, null) && !ReferenceEquals(requestScrappingSite.CollectionsProcessor.Collections, null) && requestScrappingSite.CollectionsProcessor.Collections.Count > 0) { List <string> firstLevelMatches = new List <string>(); foreach (string collection in requestScrappingSite.CollectionsProcessor.Collections) { var c = collection.Trim().Remove(collection.Trim().Length - 1); if (product.productName.Contains(c)) { firstLevelMatches.Add(c); } } int MaxCountOfMatches = 0; int CurrentCountOfMatches = 0; foreach (var collection in firstLevelMatches) { CurrentCountOfMatches = 0; var parts = collection.Split(" ").ToList(); foreach (var part in parts) { if (product.productName.Contains(part)) { CurrentCountOfMatches++; } } if (CurrentCountOfMatches > MaxCountOfMatches) { product.productCategory = collection; MaxCountOfMatches = CurrentCountOfMatches; } } if (ReferenceEquals(product.productCategory, null)) { var d = product.productName.Split(" "); product.productCategory = String.Concat(d[d.Length - 2], " ", d[d.Length - 1]); } } if (!product.isProductInStock) { Shopify.UpdateProduct(product.productName); } if (!ReferenceEquals(product, null)) { Akeneo.ProcessProduct(product); } else { _l.error("Error in creating AkeneoProduct object, you can find more information in application error log"); } } catch (Exception e) { _l.error($"Cannot create product from url {url} : {e.Message} -> {e.StackTrace}."); } return; }).Start(); } }
protected void UpdateProxieAddressState(ref WebScrapperBaseProxyEntity proxy) { proxy = CheckProxyState(proxy); }