private void ParsInfo(Zillow zillow) { Thread thread = new Thread(() => { LinkParser linkParser; ReqParametres reqParametres; do { //Парсим предварительную ссылку reqParametres = new ReqParametres(zillow.URL); reqParametres.SetUserAgent(Useragents.GetNewUseragent()); reqParametres.SetProxy(); linkParser = new LinkParser(reqParametres.Request); SavedCookies = linkParser.Cookies; } while (isCaptcha(linkParser.Data)); string newLink = linkParser.Data.ParsFromTo("<link rel=\"canonical\" href=\"", "\""); //Проверяем на неверную ссылку if (newLink.Contains("https://www.zillow.com/homes/for_sale/")) { zillow.Status = "No such adress"; } else { zillow.URL = newLink; do { reqParametres = new ReqParametres(zillow.URL); reqParametres.SetUserAgent(Useragents.GetNewUseragent()); reqParametres.SetProxy(); linkParser = new LinkParser(reqParametres.Request); } while (isCaptcha(linkParser.Data)); zillow.Status = CheckOnStatus(linkParser.Data.ToLower()).Replace("<span tabindex=\"0\" role=\"button\"><span class=\"zsg-tooltip-launch_keyword\">", "") .Replace("<Span Tabindex=\"0\" Role=\"Button\"><Span Class=\"Zsg-Tooltip-Launch_Keyword\">", ""); if (zillow.Status.Equals("Undefined")) { DebugBox.WriteLine(linkParser.Data); } else { //Zestimate set List <string> rawZestimate = linkParser.Data.ParsRegex("Zestimate<sup>®</sup></span></span>(.*?)\\$([0-9,./a-zA-Z]+)<", 2); if (rawZestimate.Count != 0) { zillow.Zestimate = "$" + rawZestimate[0]; } zillow.SoldPrice = CheckPrice(linkParser.Data); } SavedCookies = linkParser.Cookies; } threadCount--; progress++; DebugBox.WriteLine($"Обработано ссылок: {progress} из {results.Count}."); double val = 100.0f / results.Count * progress; WorkProgress.SetValue(val); }); thread.IsBackground = true; thread.Start(); }
public HouseParser(List <string> phraseForSearch) { DebugBox.WriteLine("Начинаем парсинг данных."); phraseForSearch.ForEach(phrase => { Zillow zillow = new Zillow(); string link = "https://www.zillow.com/homes/" + phrase.Replace(" ", "-") + "_rb/"; zillow.URL = link; results.Add(zillow); }); CookieSet(); results.ForEach(result => { while (threadCount >= 200) { Thread.Sleep(500); } threadCount++; ParsInfo(result); }); while (threadCount != 0) { Thread.Sleep(1000); } DebugBox.WriteLine("Парсинг завершён!"); }