private void ParsInfo(Zillow zillow) { Thread thread = new Thread(() => { LinkParser linkParser; ReqParametres reqParametres; do { //Парсим предварительную ссылку reqParametres = new ReqParametres(zillow.URL); reqParametres.SetUserAgent(Useragents.GetNewUseragent()); reqParametres.SetProxy(); linkParser = new LinkParser(reqParametres.Request); SavedCookies = linkParser.Cookies; } while (isCaptcha(linkParser.Data)); string newLink = linkParser.Data.ParsFromTo("<link rel=\"canonical\" href=\"", "\""); //Проверяем на неверную ссылку if (newLink.Contains("https://www.zillow.com/homes/for_sale/")) { zillow.Status = "No such adress"; } else { zillow.URL = newLink; do { reqParametres = new ReqParametres(zillow.URL); reqParametres.SetUserAgent(Useragents.GetNewUseragent()); reqParametres.SetProxy(); linkParser = new LinkParser(reqParametres.Request); } while (isCaptcha(linkParser.Data)); zillow.Status = CheckOnStatus(linkParser.Data.ToLower()).Replace("<span tabindex=\"0\" role=\"button\"><span class=\"zsg-tooltip-launch_keyword\">", "") .Replace("<Span Tabindex=\"0\" Role=\"Button\"><Span Class=\"Zsg-Tooltip-Launch_Keyword\">", ""); if (zillow.Status.Equals("Undefined")) { DebugBox.WriteLine(linkParser.Data); } else { //Zestimate set List <string> rawZestimate = linkParser.Data.ParsRegex("Zestimate<sup>®</sup></span></span>(.*?)\\$([0-9,./a-zA-Z]+)<", 2); if (rawZestimate.Count != 0) { zillow.Zestimate = "$" + rawZestimate[0]; } zillow.SoldPrice = CheckPrice(linkParser.Data); } SavedCookies = linkParser.Cookies; } threadCount--; progress++; DebugBox.WriteLine($"Обработано ссылок: {progress} из {results.Count}."); double val = 100.0f / results.Count * progress; WorkProgress.SetValue(val); }); thread.IsBackground = true; thread.Start(); }
public override void CrawlData() { string baid = "-1"; for (int i = 0; i < MaxPageNumber; i++) { try { LinkParser linkParser; do { ReqParametres reqParametres = new ReqParametres("https://ru.auctions.godaddy.com/trpSearchResults.aspx", HttpMethod.Post, $"t=5&action=search&hidAdvSearch=ddlAdvKeyword:3|txtKeyword:|ddlCharacters:0|txtCharacters:|txtMinTraffic:|txtMaxTraffic:|txtMinDomainAge:|txtMaxDomainAge:|txtMinPrice:|txtMaxPrice:|ddlCategories:0|chkAddBuyNow:false|chkAddFeatured:false|chkAddDash:true|chkAddDigit:true|chkAddWeb:false|chkAddAppr:false|chkAddInv:false|chkAddReseller:false|ddlPattern1:|ddlPattern2:|ddlPattern3:|ddlPattern4:|chkSaleOffer:false|chkSalePublic:false|chkSaleExpired:false|chkSaleCloseouts:false|chkSaleUsed:false|chkSaleBuyNow:false|chkSaleDC:false|chkAddOnSale:false|ddlAdvBids:0|txtBids:|txtAuctionID:|ddlDateOffset:|ddlSort:auctionendtimeD&rtr=7&baid={baid}&searchDir=1"); reqParametres.SetUserAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.38 Safari/537.36"); reqParametres.RowRequest.ContentType = "application/x-www-form-urlencoded"; linkParser = new LinkParser(reqParametres.Request); } while (!linkParser.Data.Contains("onclick=\"_s_baid")); Sites.AddRange(linkParser.Data.ParsRegex("alt=\"\" />(.*?)<", 1).Select(x => new Site { Link = x })); baid = linkParser.Data.ParsFromTo("onclick=\"_s_baid1=", ";"); if (string.IsNullOrEmpty(baid)) { break; } } catch (Exception e) { Thread.Sleep(5000); Console.WriteLine(e.Message + "\n"); } } }
private void SetParametres(IntercarsProfile profile) { ReqParametres req = new ReqParametres("https://ic-ua.intercars.eu/dynamic/uni/ws_towDetail.php?wit=ICKATALOGWEB&p=F", HttpMethod.POST, $"artnr={profile.Number}&witryna=ICKATALOGWEB&towkod={profile.TovarCode}&lang=RU&nb=N&kraj=UA&typ=&wsk="); req.SetUserAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.34 Safari/537.36"); SetCookies(req); req.RowRequest.ContentType = "application/x-www-form-urlencoded"; LinkParser link = new LinkParser(req.Request); profile.Description = link.Data.ParsFromTo("<title>", "<"); profile.Image = link.Data.ParsFromTo("<img src=\"", "\""); if (profile.Image != "") { profile.Image = "https://ic-ua.intercars.eu" + profile.Image; } SetModel(profile, link.Data); SetZaminniki(profile, link.Data); SetOriginalNumbers(profile); List <string> additionalInformation = link.Data.ParsRegex("dKartaNazClass(.*?)>(.*?)<", 2); for (int i = 0; i < additionalInformation.Count; i += 2) { if (i == 0) { profile.AdditionalInformation = additionalInformation[i] + " " + additionalInformation[i + 1]; } else { profile.AdditionalInformation += "\n" + additionalInformation[i] + " " + additionalInformation[i + 1]; } } SetPrices(profile, link.Data); }
private string UploadImageToGoogle() { try { Console.Write("Trying connection to new proxy: "); ReqParametres req = new ReqParametres($"https://www.google.com/searchbyimage?image_url={_imageUrl}"); req.SetUserAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.53 Safari/537.36"); //req.SetProxy(5000, Proxies.GetProxy()); LinkParser link = new LinkParser(req.Request); if (link.IsError) { Proxies.DeleteFirstProxy(); return(UploadImageToGoogle()); } if (link.Data.ParsFromTo("role=\"heading\"><a href=\"", "\"") == "") { Proxies.DeleteFirstProxy(); return(UploadImageToGoogle()); } Console.WriteLine("Succes!"); return("https://www.google.com" + link.Data.ParsFromTo("role=\"heading\"><a href=\"", "\"").Replace("amp;", "")); } catch (Exception e) { Console.WriteLine(e); Proxies.DeleteFirstProxy(); return(UploadImageToGoogle()); } }
private void SetTovarCode(IntercarsProfile profile) { ReqParametres req = new ReqParametres($"https://ic-ua.intercars.eu/dynamic/uni/ws_towary.php?wit=ICKATALOGWEB&pro=&kraj=UA&oesearch={profile.Number}&ofe=", HttpMethod.POST, $"oesearch={profile.Number}"); req.SetUserAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.34 Safari/537.36"); SetCookies(req); req.RowRequest.ContentType = "application/x-www-form-urlencoded"; LinkParser link = new LinkParser(req.Request); List <string> allCodes = link.Data.Replace("\n", "").Replace("\r", "").ParsRegex("Daj_Katalog_Detail_Clob(.*?);", 1); List <string> uniqCodes = new List <string>(); allCodes.ForEach( code => { string rawCode = code.Split(',')[code.Split(',').Length - 1].ParsFromTo("'", "'"); if (!uniqCodes.Contains(rawCode) && Regex.IsMatch(rawCode, "[A-Z0-9]+")) { uniqCodes.Add(rawCode); } }); if (uniqCodes.Count > 0) { profile.TovarCode = uniqCodes[0]; } else { profile.TovarCode = ""; } }
private void SetOriginalNumbers(IntercarsProfile profile) { if (profile.NomerOe == "") { return; } ReqParametres req = new ReqParametres($"https://ic-ua.intercars.eu/dynamic/ickatalogweb/ws_getsoap.php?call=numoe&art={profile.NomerOe}&wit=ICKATALOGWEB", HttpMethod.POST, $"call=numoe&art={profile.NomerOe}&wit=ICKATALOGWEB"); req.SetUserAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.34 Safari/537.36"); SetCookies(req); req.RowRequest.ContentType = "application/x-www-form-urlencoded"; LinkParser link = new LinkParser(req.Request); List <string> numbers = link.Data.ParsRegex("> ([0-9]+)", 1); for (int i = 0; i < numbers.Count; i++) { if (i == 0) { profile.OriginalNumbers = numbers[i]; } else { profile.OriginalNumbers += ";\n" + numbers[i]; } } }
private void SetZaminniki(IntercarsProfile profile, string data) { string parametres = data.ParsFromTo("&sta=T&fir=UJ8&gru=", "\""); profile.Gru = parametres; if (parametres == "") { return; } ReqParametres req = new ReqParametres($"https://ic-ua.intercars.eu/dynamic/ickatalogweb/ws_zamienniki.php?popup=T&firgru={parametres}&towkod={profile.TovarCode}&zakres=all"); req.SetUserAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.34 Safari/537.36"); SetCookies(req); req.RowRequest.ContentType = "application/x-www-form-urlencoded"; LinkParser link = new LinkParser(req.Request); List <string> zaminniki = link.Data.ParsRegex(" <b>(.*?)<", 1); for (int i = 0; i < zaminniki.Count; i++) { if (i == 0) { profile.Zaminniki = zaminniki[i]; } else { profile.Zaminniki += ";\n" + zaminniki[i]; } } }
private void SetModel(IntercarsProfile profile, string data) { string numerOe = data.ParsFromTo("daj_numeryOE('", "'"); profile.NomerOe = numerOe; if (numerOe == "") { return; } ReqParametres req = new ReqParametres($"https://ic-ua.intercars.eu/dynamic/ickatalogweb/ws_getsoap.php?call=stos&art={numerOe}&wit=ICKATALOGWEB", HttpMethod.POST, $"call=stos&art={numerOe}&wit=ICKATALOGWEB"); req.SetUserAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.34 Safari/537.36"); SetCookies(req); req.RowRequest.ContentType = "application/x-www-form-urlencoded"; LinkParser link = new LinkParser(req.Request); profile.Mark = link.Data.ParsFromTo("class=\"dZB1\" ><u>", "<"); List <string> models = link.Data.Replace("\n", "").Replace("\r", "").ParsRegex("class=\"dZB2\"><u>(.*?)<", 1); for (int i = 0; i < models.Count; i++) { if (i == 0) { profile.Model = models[i]; } else { profile.Model += ";\n" + models[i]; } } }
private void CookieSet() { LinkParser linkParser; do { ReqParametres reqParametres = new ReqParametres("https://www.zillow.com/"); reqParametres.SetUserAgent(Useragents.GetNewUseragent()); //reqParametres.SetProxy(); if (SavedCookies != null) { reqParametres.SetCookie(SavedCookies); } linkParser = new LinkParser(reqParametres.Request); SavedCookies = linkParser.Cookies; } while (isCaptcha(linkParser.Data)); }
private void SetKhcodeProcess(int count) { Thread thread = new Thread( (() => { for (int i = count; i < count + 1000; i++) { _count++; if (Khcode != "") { break; } ReqParametres req = new ReqParametres( "https://ic-ua.intercars.eu/dynamic/ickatalogweb/ws_zamienniki.php?popup=T&firgru=|UJ5|UJ8|UR1|UR3&towkod=BC421D&zakres=all"); req.SetUserAgent( "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.34 Safari/537.36"); CookieContainer cookieContainer = new CookieContainer(); cookieContainer.Add( new Uri("https://ic-ua.intercars.eu/"), new Cookie("khkod", "U" + i)); cookieContainer.Add(new Uri("https://ic-ua.intercars.eu/"), new Cookie("lang", "UA")); cookieContainer.Add(new Uri("https://ic-ua.intercars.eu/"), new Cookie("kraj", "UA")); cookieContainer.Add( new Uri("https://ic-ua.intercars.eu/"), new Cookie("PHPSESSID", "elo485mp69q42ejdgjssd23ev4")); req.SetCookie(cookieContainer); LinkParser link = new LinkParser(req.Request); List <string> zaminniki = link.Data.ParsRegex(" <b>(.*?)<", 1); if (zaminniki.Count > 0) { Khcode = i.ToString(); break; } } })); thread.IsBackground = true; thread.Start(); }
private bool GoogleCheck() { return(true); LinkParser linkParser; // do // { // ReqParametres reqParametres = new ReqParametres("https://www.google.com/search?q=site:"+_site.Link); // reqParametres.SetUserAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.38 Safari/537.36"); // reqParametres.SetProxy(Proxies.GetProxy()); // linkParser = new LinkParser(reqParametres.Request); // if (linkParser.IsError || linkParser.Data.Contains("recaptcha")) // { // Thread.Sleep(2000); // } // } while (linkParser.IsError|| linkParser.Data.Contains("recaptcha")); ReqParametres reqParametres = new ReqParametres("https://www.google.com/search?q=site:" + _site.Link); reqParametres.SetUserAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.38 Safari/537.36"); // reqParametres.SetProxy(Proxies.GetProxy()); linkParser = new LinkParser(reqParametres.Request); return(linkParser.Data.Contains("result-stats\">")); }
private bool IsValidCode(string code) { ReqParametres req = new ReqParametres("https://ic-ua.intercars.eu/dynamic/ickatalogweb/ws_zamienniki.php?popup=T&firgru=|UJ5|UJ8|UR1|UR3&towkod=BC421D&zakres=all"); req.SetUserAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.34 Safari/537.36"); CookieContainer cookieContainer = new CookieContainer(); cookieContainer.Add(new Uri("https://ic-ua.intercars.eu/"), new Cookie("khkod", "U" + code)); cookieContainer.Add(new Uri("https://ic-ua.intercars.eu/"), new Cookie("lang", "UA")); cookieContainer.Add(new Uri("https://ic-ua.intercars.eu/"), new Cookie("kraj", "UA")); cookieContainer.Add(new Uri("https://ic-ua.intercars.eu/"), new Cookie("PHPSESSID", "elo485mp69q42ejdgjssd23ev4")); req.SetCookie(cookieContainer); LinkParser link = new LinkParser(req.Request); List <string> zaminniki = link.Data.ParsRegex(" <b>(.*?)<", 1); if (zaminniki.Count > 0) { return(true); } else { return(false); } }