override public void PROCESSOR(BotCycle bc)
            {
                CustomBot cb = (CustomBot)bc.Bot;

                if (!cb.HR.Get(Url))
                {
                    throw new ProcessorException(ProcessorExceptionType.RESTORE_AS_NEW, "Could not get: " + Url);
                }

                DataSifter.Capture gc = cb.list.Parse(cb.HR.HtmlResult);

                {
                    string url = gc.ValueOf("NextPageUrl");
                    if (url != null)
                    {
                        cb.BotCycle.Add(new ListItem(Spider.GetAbsoluteUrl(url, cb.HR.ResponseUrl)));
                    }
                }

                string[] urls = Spider.GetAbsoluteUrls(gc.ValuesOf("ProductUrl"), cb.HR.ResponseUrl, cb.HR.HtmlResult);
                foreach (string url in urls)
                {
                    cb.BotCycle.Add(new ProductItem(url));
                }
            }
示例#2
0
            override public void PROCESSOR(BotCycle bc)
            {
                CustomBot cb = (CustomBot)bc.Bot;
                int       _MaxDownloadedFileLength = Bot.Properties.Web.Default.MaxDownloadedFileLength;

                if (!Download)
                {
                    Bot.Properties.Web.Default.MaxDownloadedFileLength = 0;
                }
                bool rc = cb.hr.GetPage(Url);

                Bot.Properties.Web.Default.MaxDownloadedFileLength = _MaxDownloadedFileLength;
                if (!rc)
                {
                    if (cb.hr.Status == WebRoutineStatus.UNACCEPTABLE_CONTENT_TYPE)
                    {
                        return;
                    }
                    if (cb.hr.HWResponse.StatusCode == System.Net.HttpStatusCode.NotFound)
                    {
                        FileWriter.This.WriteLine(ParentLink.Url, Url);
                    }
                    //site2boken_urls[item.Site.Url] = site2boken_urls[item.Site.Url] + "\n" + item.Url;
                    else
                    {
                        throw new ProcessorException(ProcessorExceptionType.RESTORE_AS_NEW, "Could not get: " + Url);
                    }
                    return;
                }
                if (Download)
                {
                    cb.get_links(Depth + 1);
                }
            }
            override public void PROCESSOR(BotCycle bc)
            {
                CustomBot cb = (CustomBot)bc.Bot;

                if (!cb.HR.Get(Url))
                {
                    throw new ProcessorException(ProcessorExceptionType.RESTORE_AS_NEW, "Could not get: " + Url);
                }

                DataSifter.Capture gc = cb.product.Parse(cb.HR.HtmlResult);

                Fhr.CrawlerHost.Product product = new Fhr.CrawlerHost.Product(
                    id: gc.ValueOf("Id"),
                    url: Url,
                    name: gc.ValueOf("Name"),
                    sku: gc.ValueOf("Sku"),
                    price: gc.ValueOf("Price"),
                    category_branch: gc.ValuesOf("Category"),
                    image_urls: Spider.GetAbsoluteUrls(gc.ValuesOf("ImageUrl"), Url, cb.HR.HtmlResult),
                    stock: gc.ValueOf("Stock") != null ? (decimal)Fhr.CrawlerHost.Product.StockValue.IN_STOCK : (decimal)Fhr.CrawlerHost.Product.StockValue.NOT_IN_STOCK,
                    description: gc.ValueOf("Description")
                    );
                if (!Cliver.Fhr.CrawlerHost.CrawlerApi.SaveProductAsJson(product))
                {
                    throw new ProcessorException(ProcessorExceptionType.ERROR, "Product was not saved.");
                }
            }
示例#4
0
            override public void PROCESSOR(BotCycle bc)
            {
                CustomBot cb = (CustomBot)bc.Bot;

                string name = FieldPreparation.Html.GetCsvField(Name);

                if (!cb.HR.GetPage(Url))
                {
                    throw new ProcessorException(ProcessorExceptionType.RESTORE_AS_NEW, "Could not get: " + Url);
                }

                DataSifter.Capture c        = product.Parse(cb.HR.HtmlResult);
                string             zip_code = Regex.Replace(c.ValueOf("ZipCode"), @"[^\d]", "", RegexOptions.Singleline);
                string             url2     = "http://www.yellowpages.com/search?search_terms=" + name + "&geo_location_terms=" + zip_code;
                string             email    = null;
                string             url3     = url2;

                if (cb.HR.GetPage(url2))
                {
                    DataSifter.Capture c2         = yp.Parse(cb.HR.HtmlResult);
                    string             regex_name = get_stripped_name(name);
                    regex_name = Regex.Escape((regex_name.Length > 10 ? regex_name.Substring(0, 10) : regex_name).Trim());
                    foreach (DataSifter.Capture cc in c2["Company"])
                    {
                        if (cc.ValueOf("ZipCode") != null &&
                            Regex.Replace(cc.ValueOf("ZipCode"), @"[^\d]", "", RegexOptions.Singleline) == zip_code &&
                            Regex.IsMatch(get_stripped_name(cc.ValueOf("Name")), regex_name, RegexOptions.IgnoreCase)
                            )
                        {
                            url3 = Spider.GetAbsoluteUrl(cc.ValueOf("Url"), url2);
                            if (!cb.HR.GetPage(url3))
                            {
                                throw new ProcessorException(ProcessorExceptionType.RESTORE_AS_NEW, "Could not get: " + url3);
                            }

                            DataSifter.Capture c3 = yp2.Parse(cb.HR.HtmlResult);
                            email = c3.ValueOf("Email");
                            break;
                        }
                    }
                }
                else if (cb.HR.HWResponse.StatusCode != HttpStatusCode.NotFound)
                {
                    throw new ProcessorException(ProcessorExceptionType.RESTORE_AS_NEW, "Could not get: " + url2);
                }

                FileWriter.This.PrepareAndWriteHtmlLineWithHeader(
                    "Name", Name,
                    "City", City,
                    "ZipCode", zip_code,
                    "State", State,
                    "Phone", Phone,
                    "Email", email,
                    "Url", Url,
                    "Url2", url3
                    );
            }
示例#5
0
            override public void PROCESSOR(BotCycle bc)
            {
                CustomBot cb = (CustomBot)bc.Bot;

                if (!cb.hr.GetPage(Url))
                {
                    throw new ProcessorException(ProcessorExceptionType.RESTORE_AS_NEW, "Could not get site: " + Url);
                }
                cb.get_links(1);
            }
            override public void PROCESSOR(BotCycle bc)
            {
                CustomBot cb = (CustomBot)bc.Bot;

                if (!cb.HR.Get(Url))
                {
                    throw new ProcessorException(ProcessorExceptionType.RESTORE_AS_NEW, "Could not get: " + Url);
                }

                DataSifter.Capture gc   = cb.category.Parse(cb.HR.HtmlResult);
                string[]           urls = Spider.GetAbsoluteUrls(gc.ValuesOf("CategoryUrl"), cb.HR.ResponseUrl, cb.HR.HtmlResult);
                foreach (string url in urls)
                {
                    cb.BotCycle.Add(new CategoryItem(url));
                }
            }
示例#7
0
            override public void PROCESSOR(BotCycle bc)
            {
                CustomBot cb  = (CustomBot)bc.Bot;
                string    url = "http://www.rent.com/" + Regex.Replace(State, @"\s", "-");

                if (!cb.HR.GetPage(url))
                {
                    throw new ProcessorException(ProcessorExceptionType.RESTORE_AS_NEW, "Could not get: " + url);
                }

                DataSifter.Capture c = cities.Parse(cb.HR.HtmlResult);

                string[] us = c.ValuesOf("Url");
                for (int i = 0; i < us.Length; i++)
                {
                    bc.Add(new SearchItem("http://www.rent.com" + us[i]));
                }
            }
示例#8
0
            override public void PROCESSOR(BotCycle bc)
            {
                CustomBot cb = (CustomBot)bc.Bot;

                cb.search_processor(Url);
            }