override public void PROCESSOR(BotCycle bc) { CustomBot cb = (CustomBot)bc.Bot; if (!cb.HR.Get(Url)) { throw new ProcessorException(ProcessorExceptionType.RESTORE_AS_NEW, "Could not get: " + Url); } DataSifter.Capture gc = cb.list.Parse(cb.HR.HtmlResult); { string url = gc.ValueOf("NextPageUrl"); if (url != null) { cb.BotCycle.Add(new ListItem(Spider.GetAbsoluteUrl(url, cb.HR.ResponseUrl))); } } string[] urls = Spider.GetAbsoluteUrls(gc.ValuesOf("ProductUrl"), cb.HR.ResponseUrl, cb.HR.HtmlResult); foreach (string url in urls) { cb.BotCycle.Add(new ProductItem(url)); } }
override public void PROCESSOR(BotCycle bc) { CustomBot cb = (CustomBot)bc.Bot; if (!cb.HR.Get(Url)) { throw new ProcessorException(ProcessorExceptionType.RESTORE_AS_NEW, "Could not get: " + Url); } DataSifter.Capture gc = cb.product.Parse(cb.HR.HtmlResult); Fhr.CrawlerHost.Product product = new Fhr.CrawlerHost.Product( id: gc.ValueOf("Id"), url: Url, name: gc.ValueOf("Name"), sku: gc.ValueOf("Sku"), price: gc.ValueOf("Price"), category_branch: gc.ValuesOf("Category"), image_urls: Spider.GetAbsoluteUrls(gc.ValuesOf("ImageUrl"), Url, cb.HR.HtmlResult), stock: gc.ValueOf("Stock") != null ? (decimal)Fhr.CrawlerHost.Product.StockValue.IN_STOCK : (decimal)Fhr.CrawlerHost.Product.StockValue.NOT_IN_STOCK, description: gc.ValueOf("Description") ); if (!Cliver.Fhr.CrawlerHost.CrawlerApi.SaveProductAsJson(product)) { throw new ProcessorException(ProcessorExceptionType.ERROR, "Product was not saved."); } }
override public void PROCESSOR(BotCycle bc) { CustomBot cb = (CustomBot)bc.Bot; string name = FieldPreparation.Html.GetCsvField(Name); if (!cb.HR.GetPage(Url)) { throw new ProcessorException(ProcessorExceptionType.RESTORE_AS_NEW, "Could not get: " + Url); } DataSifter.Capture c = product.Parse(cb.HR.HtmlResult); string zip_code = Regex.Replace(c.ValueOf("ZipCode"), @"[^\d]", "", RegexOptions.Singleline); string url2 = "http://www.yellowpages.com/search?search_terms=" + name + "&geo_location_terms=" + zip_code; string email = null; string url3 = url2; if (cb.HR.GetPage(url2)) { DataSifter.Capture c2 = yp.Parse(cb.HR.HtmlResult); string regex_name = get_stripped_name(name); regex_name = Regex.Escape((regex_name.Length > 10 ? regex_name.Substring(0, 10) : regex_name).Trim()); foreach (DataSifter.Capture cc in c2["Company"]) { if (cc.ValueOf("ZipCode") != null && Regex.Replace(cc.ValueOf("ZipCode"), @"[^\d]", "", RegexOptions.Singleline) == zip_code && Regex.IsMatch(get_stripped_name(cc.ValueOf("Name")), regex_name, RegexOptions.IgnoreCase) ) { url3 = Spider.GetAbsoluteUrl(cc.ValueOf("Url"), url2); if (!cb.HR.GetPage(url3)) { throw new ProcessorException(ProcessorExceptionType.RESTORE_AS_NEW, "Could not get: " + url3); } DataSifter.Capture c3 = yp2.Parse(cb.HR.HtmlResult); email = c3.ValueOf("Email"); break; } } } else if (cb.HR.HWResponse.StatusCode != HttpStatusCode.NotFound) { throw new ProcessorException(ProcessorExceptionType.RESTORE_AS_NEW, "Could not get: " + url2); } FileWriter.This.PrepareAndWriteHtmlLineWithHeader( "Name", Name, "City", City, "ZipCode", zip_code, "State", State, "Phone", Phone, "Email", email, "Url", Url, "Url2", url3 ); }
override public void PROCESSOR(BotCycle bc) { CustomBot cb = (CustomBot)bc.Bot; if (!cb.HR.Get(Url)) { throw new ProcessorException(ProcessorExceptionType.RESTORE_AS_NEW, "Could not get: " + Url); } DataSifter.Capture gc = cb.category.Parse(cb.HR.HtmlResult); string[] urls = Spider.GetAbsoluteUrls(gc.ValuesOf("CategoryUrl"), cb.HR.ResponseUrl, cb.HR.HtmlResult); foreach (string url in urls) { cb.BotCycle.Add(new CategoryItem(url)); } }
override public void PROCESSOR(BotCycle bc) { CustomBot cb = (CustomBot)bc.Bot; string url = "http://www.rent.com/" + Regex.Replace(State, @"\s", "-"); if (!cb.HR.GetPage(url)) { throw new ProcessorException(ProcessorExceptionType.RESTORE_AS_NEW, "Could not get: " + url); } DataSifter.Capture c = cities.Parse(cb.HR.HtmlResult); string[] us = c.ValuesOf("Url"); for (int i = 0; i < us.Length; i++) { bc.Add(new SearchItem("http://www.rent.com" + us[i])); } }
void search_processor(string url) { if (!HR.GetPage(url)) { throw new ProcessorException(ProcessorExceptionType.RESTORE_AS_NEW, "Could not get: " + url); } DataSifter.Capture c0 = search.Parse(HR.HtmlResult); string npu = c0.ValueOf("NextPageUrl"); if (npu != null) { BotCycle.Add(new SearchNextPageItem(npu)); } foreach (DataSifter.Capture c in c0["Product"]) { BotCycle.Add(new CompanyItem(Spider.GetAbsoluteUrl(c.ValueOf("Url"), url), c.ValueOf("Name"), c.ValueOf("City"), c.ValueOf("State"), c.ValueOf("Phone"))); } }
void search_processor(string url) { if (!HR.GetPage(url)) { throw new ProcessorException(ProcessorExceptionType.RESTORE_AS_NEW, "Could not get: " + url); } DataSifter.Capture c = yp.Parse(HR.HtmlResult); string npu = c.ValueOf("NextPageUrl"); if (npu != null) { Add(new SearchNextPageItem(Cliver.BotWeb.Spider.GetAbsoluteUrl(npu, url))); } foreach (string u in Cliver.BotWeb.Spider.GetAbsoluteUrls(c.ValuesOf("Company/Url"), url, HR.HtmlResult)) { Add(new CompanyItem(u)); } }
override public void __Processor(BotCycle bc) { CustomBotCycle cbc = (CustomBotCycle)bc; if (!cbc.HR.GetPage(Url)) { throw new ProcessorException(ProcessorExceptionType.RESTORE_AS_NEW, "Could not get: " + Url); } DataSifter.Capture c = yp2.Parse(cbc.HR.HtmlResult); FileWriter.This.PrepareAndWriteHtmlLineWithHeader( "Name", c.ValueOf("Name"), "City", c.ValueOf("City"), "ZipCode", c.ValueOf("ZipCode"), "State", c.ValueOf("State"), "Phone", c.ValueOf("Phone"), "Email", c.ValueOf("Email"), "Site", c.ValueOf("Site"), "Url", Url ); }