override public void PROCESSOR(BotCycle bc)
            {
                CustomBot cb = (CustomBot)bc.Bot;

                if (!cb.HR.Get(Url))
                {
                    throw new ProcessorException(ProcessorExceptionType.RESTORE_AS_NEW, "Could not get: " + Url);
                }

                DataSifter.Capture gc = cb.list.Parse(cb.HR.HtmlResult);

                {
                    string url = gc.ValueOf("NextPageUrl");
                    if (url != null)
                    {
                        cb.BotCycle.Add(new ListItem(Spider.GetAbsoluteUrl(url, cb.HR.ResponseUrl)));
                    }
                }

                string[] urls = Spider.GetAbsoluteUrls(gc.ValuesOf("ProductUrl"), cb.HR.ResponseUrl, cb.HR.HtmlResult);
                foreach (string url in urls)
                {
                    cb.BotCycle.Add(new ProductItem(url));
                }
            }
            override public void PROCESSOR(BotCycle bc)
            {
                CustomBot cb = (CustomBot)bc.Bot;

                if (!cb.HR.Get(Url))
                {
                    throw new ProcessorException(ProcessorExceptionType.RESTORE_AS_NEW, "Could not get: " + Url);
                }

                DataSifter.Capture gc = cb.product.Parse(cb.HR.HtmlResult);

                Fhr.CrawlerHost.Product product = new Fhr.CrawlerHost.Product(
                    id: gc.ValueOf("Id"),
                    url: Url,
                    name: gc.ValueOf("Name"),
                    sku: gc.ValueOf("Sku"),
                    price: gc.ValueOf("Price"),
                    category_branch: gc.ValuesOf("Category"),
                    image_urls: Spider.GetAbsoluteUrls(gc.ValuesOf("ImageUrl"), Url, cb.HR.HtmlResult),
                    stock: gc.ValueOf("Stock") != null ? (decimal)Fhr.CrawlerHost.Product.StockValue.IN_STOCK : (decimal)Fhr.CrawlerHost.Product.StockValue.NOT_IN_STOCK,
                    description: gc.ValueOf("Description")
                    );
                if (!Cliver.Fhr.CrawlerHost.CrawlerApi.SaveProductAsJson(product))
                {
                    throw new ProcessorException(ProcessorExceptionType.ERROR, "Product was not saved.");
                }
            }
Beispiel #3
0
            override public void PROCESSOR(BotCycle bc)
            {
                CustomBot cb = (CustomBot)bc.Bot;

                string name = FieldPreparation.Html.GetCsvField(Name);

                if (!cb.HR.GetPage(Url))
                {
                    throw new ProcessorException(ProcessorExceptionType.RESTORE_AS_NEW, "Could not get: " + Url);
                }

                DataSifter.Capture c        = product.Parse(cb.HR.HtmlResult);
                string             zip_code = Regex.Replace(c.ValueOf("ZipCode"), @"[^\d]", "", RegexOptions.Singleline);
                string             url2     = "http://www.yellowpages.com/search?search_terms=" + name + "&geo_location_terms=" + zip_code;
                string             email    = null;
                string             url3     = url2;

                if (cb.HR.GetPage(url2))
                {
                    DataSifter.Capture c2         = yp.Parse(cb.HR.HtmlResult);
                    string             regex_name = get_stripped_name(name);
                    regex_name = Regex.Escape((regex_name.Length > 10 ? regex_name.Substring(0, 10) : regex_name).Trim());
                    foreach (DataSifter.Capture cc in c2["Company"])
                    {
                        if (cc.ValueOf("ZipCode") != null &&
                            Regex.Replace(cc.ValueOf("ZipCode"), @"[^\d]", "", RegexOptions.Singleline) == zip_code &&
                            Regex.IsMatch(get_stripped_name(cc.ValueOf("Name")), regex_name, RegexOptions.IgnoreCase)
                            )
                        {
                            url3 = Spider.GetAbsoluteUrl(cc.ValueOf("Url"), url2);
                            if (!cb.HR.GetPage(url3))
                            {
                                throw new ProcessorException(ProcessorExceptionType.RESTORE_AS_NEW, "Could not get: " + url3);
                            }

                            DataSifter.Capture c3 = yp2.Parse(cb.HR.HtmlResult);
                            email = c3.ValueOf("Email");
                            break;
                        }
                    }
                }
                else if (cb.HR.HWResponse.StatusCode != HttpStatusCode.NotFound)
                {
                    throw new ProcessorException(ProcessorExceptionType.RESTORE_AS_NEW, "Could not get: " + url2);
                }

                FileWriter.This.PrepareAndWriteHtmlLineWithHeader(
                    "Name", Name,
                    "City", City,
                    "ZipCode", zip_code,
                    "State", State,
                    "Phone", Phone,
                    "Email", email,
                    "Url", Url,
                    "Url2", url3
                    );
            }
            override public void PROCESSOR(BotCycle bc)
            {
                CustomBot cb = (CustomBot)bc.Bot;

                if (!cb.HR.Get(Url))
                {
                    throw new ProcessorException(ProcessorExceptionType.RESTORE_AS_NEW, "Could not get: " + Url);
                }

                DataSifter.Capture gc   = cb.category.Parse(cb.HR.HtmlResult);
                string[]           urls = Spider.GetAbsoluteUrls(gc.ValuesOf("CategoryUrl"), cb.HR.ResponseUrl, cb.HR.HtmlResult);
                foreach (string url in urls)
                {
                    cb.BotCycle.Add(new CategoryItem(url));
                }
            }
Beispiel #5
0
            override public void PROCESSOR(BotCycle bc)
            {
                CustomBot cb  = (CustomBot)bc.Bot;
                string    url = "http://www.rent.com/" + Regex.Replace(State, @"\s", "-");

                if (!cb.HR.GetPage(url))
                {
                    throw new ProcessorException(ProcessorExceptionType.RESTORE_AS_NEW, "Could not get: " + url);
                }

                DataSifter.Capture c = cities.Parse(cb.HR.HtmlResult);

                string[] us = c.ValuesOf("Url");
                for (int i = 0; i < us.Length; i++)
                {
                    bc.Add(new SearchItem("http://www.rent.com" + us[i]));
                }
            }
Beispiel #6
0
        void search_processor(string url)
        {
            if (!HR.GetPage(url))
            {
                throw new ProcessorException(ProcessorExceptionType.RESTORE_AS_NEW, "Could not get: " + url);
            }

            DataSifter.Capture c0 = search.Parse(HR.HtmlResult);

            string npu = c0.ValueOf("NextPageUrl");

            if (npu != null)
            {
                BotCycle.Add(new SearchNextPageItem(npu));
            }

            foreach (DataSifter.Capture c in c0["Product"])
            {
                BotCycle.Add(new CompanyItem(Spider.GetAbsoluteUrl(c.ValueOf("Url"), url), c.ValueOf("Name"), c.ValueOf("City"), c.ValueOf("State"), c.ValueOf("Phone")));
            }
        }
            void search_processor(string url)
            {
                if (!HR.GetPage(url))
                {
                    throw new ProcessorException(ProcessorExceptionType.RESTORE_AS_NEW, "Could not get: " + url);
                }

                DataSifter.Capture c = yp.Parse(HR.HtmlResult);

                string npu = c.ValueOf("NextPageUrl");

                if (npu != null)
                {
                    Add(new SearchNextPageItem(Cliver.BotWeb.Spider.GetAbsoluteUrl(npu, url)));
                }

                foreach (string u in Cliver.BotWeb.Spider.GetAbsoluteUrls(c.ValuesOf("Company/Url"), url, HR.HtmlResult))
                {
                    Add(new CompanyItem(u));
                }
            }
                override public void __Processor(BotCycle bc)
                {
                    CustomBotCycle cbc = (CustomBotCycle)bc;

                    if (!cbc.HR.GetPage(Url))
                    {
                        throw new ProcessorException(ProcessorExceptionType.RESTORE_AS_NEW, "Could not get: " + Url);
                    }

                    DataSifter.Capture c = yp2.Parse(cbc.HR.HtmlResult);

                    FileWriter.This.PrepareAndWriteHtmlLineWithHeader(
                        "Name", c.ValueOf("Name"),
                        "City", c.ValueOf("City"),
                        "ZipCode", c.ValueOf("ZipCode"),
                        "State", c.ValueOf("State"),
                        "Phone", c.ValueOf("Phone"),
                        "Email", c.ValueOf("Email"),
                        "Site", c.ValueOf("Site"),
                        "Url", Url
                        );
                }