override public void PROCESSOR(BotCycle bc)
            {
                CustomBot cb = (CustomBot)bc.Bot;

                if (!cb.HR.Get(Url))
                {
                    throw new ProcessorException(ProcessorExceptionType.RESTORE_AS_NEW, "Could not get: " + Url);
                }

                DataSifter.Capture gc = cb.list.Parse(cb.HR.HtmlResult);

                {
                    string url = gc.ValueOf("NextPageUrl");
                    if (url != null)
                    {
                        cb.BotCycle.Add(new ListItem(Spider.GetAbsoluteUrl(url, cb.HR.ResponseUrl)));
                    }
                }

                string[] urls = Spider.GetAbsoluteUrls(gc.ValuesOf("ProductUrl"), cb.HR.ResponseUrl, cb.HR.HtmlResult);
                foreach (string url in urls)
                {
                    cb.BotCycle.Add(new ProductItem(url));
                }
            }
Example #2
0
            override public void PROCESSOR(BotCycle bc)
            {
                CustomBot cb = (CustomBot)bc.Bot;

                string name = FieldPreparation.Html.GetCsvField(Name);

                if (!cb.HR.GetPage(Url))
                {
                    throw new ProcessorException(ProcessorExceptionType.RESTORE_AS_NEW, "Could not get: " + Url);
                }

                DataSifter.Capture c        = product.Parse(cb.HR.HtmlResult);
                string             zip_code = Regex.Replace(c.ValueOf("ZipCode"), @"[^\d]", "", RegexOptions.Singleline);
                string             url2     = "http://www.yellowpages.com/search?search_terms=" + name + "&geo_location_terms=" + zip_code;
                string             email    = null;
                string             url3     = url2;

                if (cb.HR.GetPage(url2))
                {
                    DataSifter.Capture c2         = yp.Parse(cb.HR.HtmlResult);
                    string             regex_name = get_stripped_name(name);
                    regex_name = Regex.Escape((regex_name.Length > 10 ? regex_name.Substring(0, 10) : regex_name).Trim());
                    foreach (DataSifter.Capture cc in c2["Company"])
                    {
                        if (cc.ValueOf("ZipCode") != null &&
                            Regex.Replace(cc.ValueOf("ZipCode"), @"[^\d]", "", RegexOptions.Singleline) == zip_code &&
                            Regex.IsMatch(get_stripped_name(cc.ValueOf("Name")), regex_name, RegexOptions.IgnoreCase)
                            )
                        {
                            url3 = Spider.GetAbsoluteUrl(cc.ValueOf("Url"), url2);
                            if (!cb.HR.GetPage(url3))
                            {
                                throw new ProcessorException(ProcessorExceptionType.RESTORE_AS_NEW, "Could not get: " + url3);
                            }

                            DataSifter.Capture c3 = yp2.Parse(cb.HR.HtmlResult);
                            email = c3.ValueOf("Email");
                            break;
                        }
                    }
                }
                else if (cb.HR.HWResponse.StatusCode != HttpStatusCode.NotFound)
                {
                    throw new ProcessorException(ProcessorExceptionType.RESTORE_AS_NEW, "Could not get: " + url2);
                }

                FileWriter.This.PrepareAndWriteHtmlLineWithHeader(
                    "Name", Name,
                    "City", City,
                    "ZipCode", zip_code,
                    "State", State,
                    "Phone", Phone,
                    "Email", email,
                    "Url", Url,
                    "Url2", url3
                    );
            }
Example #3
0
        void search_processor(string url)
        {
            if (!HR.GetPage(url))
            {
                throw new ProcessorException(ProcessorExceptionType.RESTORE_AS_NEW, "Could not get: " + url);
            }

            DataSifter.Capture c0 = search.Parse(HR.HtmlResult);

            string npu = c0.ValueOf("NextPageUrl");

            if (npu != null)
            {
                BotCycle.Add(new SearchNextPageItem(npu));
            }

            foreach (DataSifter.Capture c in c0["Product"])
            {
                BotCycle.Add(new CompanyItem(Spider.GetAbsoluteUrl(c.ValueOf("Url"), url), c.ValueOf("Name"), c.ValueOf("City"), c.ValueOf("State"), c.ValueOf("Phone")));
            }
        }