Ejemplo n.º 1
0
        protected MapScrapeResult ParseDetailsAustralia(string url)
        {
            MapScrapeResult mapScrapeResult = new MapScrapeResult();

            mapScrapeResult.AdUrl = url;
            if (this.terminated)
            {
                return(mapScrapeResult);
            }
            string text;

            if (this.downloader.DownloadUrl(url, out text))
            {
                mapScrapeResult.Headline = base.ExtractValue(text, "<h1 class=\"listingName\">(?<value>.*?)</h1>");
                mapScrapeResult.Address  = base.ExtractValue(text, "<span class=\"listingAddressText\">(?<value>.*?)</span>");
                base.ParseAddress(mapScrapeResult);
                mapScrapeResult.Latitude  = base.ExtractValue(text, "latitude=\"(?<value>.*?)\"");
                mapScrapeResult.Longitude = base.ExtractValue(text, "longitude=\"(?<value>.*?)\"");
                mapScrapeResult.Map       = string.Format("http://www.yellowpages.com.au/app/staticMap?markers={0},{1},1&width=261&height=175&type=bpp", mapScrapeResult.Latitude, mapScrapeResult.Longitude);
                mapScrapeResult.Website   = base.ExtractValue(text, "<a class=\"webAddressLink\".*?href=\"(?<value>.*?)\"").Trim();
                if (!string.IsNullOrEmpty(mapScrapeResult.Website))
                {
                    mapScrapeResult.Website = string.Format("http://www.yellowpages.com.au{0}", mapScrapeResult.Website);
                }
                mapScrapeResult.Phone = base.ExtractValue(text, "<div class=\"primaryPhoneNumber\">(?<value>.*?)</div>").Trim();
                mapScrapeResult.Email = base.ExtractEmails(text).FirstOrDefault <string>();
            }
            return(mapScrapeResult);
        }
        protected MapScrapeResult ParseDetailsCanada(string url)
        {
            MapScrapeResult mapScrapeResult = new MapScrapeResult();

            mapScrapeResult.AdUrl = url;
            if (this.terminated)
            {
                return(mapScrapeResult);
            }
            string text;

            if (this.downloader.DownloadUrl(url, out text))
            {
                mapScrapeResult.Headline  = base.ExtractValue(text, "<h1.*?class=\"fn org\".*?>(?<value>.*?)</h1>");
                mapScrapeResult.Address   = base.ExtractValue(text, "<span.*?class=\"street-address\".*?>(?<value>.*?)</span>");
                mapScrapeResult.City      = base.ExtractValue(text, "<span.*?class=\"locality\".*?>(?<value>.*?)</span>");
                mapScrapeResult.Region    = base.ExtractValue(text, "<span.*?class=\"region\".*?>(?<value>.*?)</span>");
                mapScrapeResult.ZipCode   = base.ExtractValue(text, "<span.*?class=\"postal-code\".*?>(?<value>.*?)</span>");
                mapScrapeResult.Map       = base.ExtractValue(text, "<a.*?href=\"(?<value>http://ca.maps.yahoo.com.*?)\">");
                mapScrapeResult.Latitude  = base.ExtractValue(mapScrapeResult.Map, "lon=(?<value>.*?)&");
                mapScrapeResult.Longitude = base.ExtractValue(mapScrapeResult.Map, "lat=(?<value>.*?)&");
                mapScrapeResult.Email     = base.ExtractEmails(text).FirstOrDefault <string>();
                mapScrapeResult.Phone     = base.ExtractPhones(text).FirstOrDefault <string>();
            }
            return(mapScrapeResult);
        }
        protected MapScrapeResult ParseDetailsAustralia(string url)
        {
            MapScrapeResult mapScrapeResult = new MapScrapeResult();

            mapScrapeResult.AdUrl = url;
            if (this.terminated)
            {
                return(mapScrapeResult);
            }
            string text;

            if (this.downloader.DownloadUrl(url, out text))
            {
                mapScrapeResult.Headline  = base.ExtractValue(text, "<h1>(?<value>.*?)</h1>");
                mapScrapeResult.Address   = base.ExtractValue(text, "<div class=\"adr\">(?<value>.*?)</div>");
                mapScrapeResult.Region    = base.ExtractValue(mapScrapeResult.Address, "<span class=\"region\".*?>(?<value>.*?)</span>");
                mapScrapeResult.City      = base.ExtractValue(mapScrapeResult.Address, "<span.*?class=\"locality\".*?>(?<value>.*?)</span>");
                mapScrapeResult.ZipCode   = base.ExtractValue(mapScrapeResult.Address, "<span.*?class=\"postal-code\".*?>(?<value>.*?)</span>");
                mapScrapeResult.Address   = UrlDownloader.SkipHtmlTags(mapScrapeResult.Address).Replace("\t", string.Empty).Replace("\n", string.Empty).Trim();
                mapScrapeResult.Latitude  = base.ExtractValue(text, "<span class=\"latitude\">(?<value>.*?)</span>");
                mapScrapeResult.Longitude = base.ExtractValue(text, "<span class=\"longitude\">(?<value>.*?)</span>");
                mapScrapeResult.Map       = string.Format("http://maps.yahoo.com/maps_result?ard=1&lat={0}&lon={1}&zoom=18", mapScrapeResult.Latitude, mapScrapeResult.Longitude);
                string text2 = base.ExtractValue(text, "<ul class=\"pageTools\">(?<value>.*?)</ul>");
                mapScrapeResult.Website = base.ExtractValue(text2, "href=\"(?<value>.*?)\"");
                mapScrapeResult.Phone   = base.ExtractValue(text2, "<span class=\"tl-phone-full\">(?<value>.*?)</span>").Trim();
                mapScrapeResult.Email   = base.ExtractEmails(text).FirstOrDefault <string>();
            }
            return(mapScrapeResult);
        }
        protected bool ProcessAustralia(string text, List <IScrapeResult> result, SearchCategory Location, SearchCategory Category)
        {
            RegexOptions    options         = RegexOptions.IgnoreCase | RegexOptions.Compiled | RegexOptions.Singleline;
            Regex           regex           = new Regex("<a class=\"org\" href=\"/(?<href>.*?)\">", options);
            MatchCollection matchCollection = regex.Matches(text);
            bool            result2;

            foreach (Match match in matchCollection)
            {
                if (this.terminated)
                {
                    result2 = false;
                    return(result2);
                }
                MapScrapeResult mapScrapeResult = this.ParseDetailsAustralia(string.Format("{0}{1}", "http://local.yahoo.com.au/", match.Groups["href"].Value));
                mapScrapeResult.Category = Category.Name;
                mapScrapeResult.Radius   = 0.ToString();
                if (this.CanAddRes(result, mapScrapeResult))
                {
                    if (this.terminated)
                    {
                        result2 = false;
                        return(result2);
                    }
                    if (!string.IsNullOrEmpty(mapScrapeResult.Website))
                    {
                        string website;
                        base.WalkWebsite(mapScrapeResult.Website, mapScrapeResult, out website);
                        mapScrapeResult.Website = website;
                    }
                    if (this.CanAddRes(result, mapScrapeResult))
                    {
                        Monitor.Enter(result);
                        try
                        {
                            result.Add(mapScrapeResult);
                        }
                        finally
                        {
                            Monitor.Exit(result);
                        }
                        if (this.terminated)
                        {
                            result2 = false;
                            return(result2);
                        }
                        if (base.Callback != null)
                        {
                            base.Callback.Process(mapScrapeResult);
                        }
                    }
                }
            }
            return(text.IndexOf("\"TL_pagelink_next\"") >= 0);

            return(result2);
        }
        protected bool ProcessCanada(string text, List <IScrapeResult> result, SearchCategory Location, SearchCategory Category)
        {
            RegexOptions    options         = RegexOptions.IgnoreCase | RegexOptions.Compiled | RegexOptions.Singleline;
            Regex           regex           = new Regex("<li.*?class=\"vcard\".*?>.*?<a.*?class=\"ttl\".*?href=\"(?<href>.*?)\".*?</li>", options);
            MatchCollection matchCollection = regex.Matches(text);
            bool            result2;

            foreach (Match match in matchCollection)
            {
                if (this.terminated)
                {
                    result2 = false;
                    return(result2);
                }
                MapScrapeResult mapScrapeResult = this.ParseDetailsCanada(match.Groups["href"].Value);
                mapScrapeResult.Radius   = base.ExtractValue(match.Value, "<span.*?class=\"mlg\">(?<value>.*?)</span>");
                mapScrapeResult.Category = Category.Name;
                if (this.CanAddRes(result, mapScrapeResult))
                {
                    if (this.terminated)
                    {
                        result2 = false;
                        return(result2);
                    }
                    if (!string.IsNullOrEmpty(mapScrapeResult.Website))
                    {
                        string website;
                        base.WalkWebsite(mapScrapeResult.Website, mapScrapeResult, out website);
                        mapScrapeResult.Website = website;
                    }
                    if (this.CanAddRes(result, mapScrapeResult))
                    {
                        Monitor.Enter(result);
                        try
                        {
                            result.Add(mapScrapeResult);
                        }
                        finally
                        {
                            Monitor.Exit(result);
                        }
                        if (this.terminated)
                        {
                            result2 = false;
                            return(result2);
                        }
                        if (base.Callback != null)
                        {
                            base.Callback.Process(mapScrapeResult);
                        }
                    }
                }
            }
            return(text.IndexOf("<span>Next</span></a></b>") >= 0);

            return(result2);
        }
Ejemplo n.º 6
0
        protected bool ProcessUSA(string text, List <IScrapeResult> result, SearchCategory Location, SearchCategory Category)
        {
            RegexOptions    options         = RegexOptions.IgnoreCase | RegexOptions.Compiled | RegexOptions.Singleline;
            Regex           regex           = new Regex("<h3 class=\"business-name fn org\">.*?<a.*?href=\"(?<href>.*?)\".*?</a>.*?</h3>.*?<div class=\"distance\">(?<radius>.*?)</div>", options);
            MatchCollection matchCollection = regex.Matches(text);
            bool            result2;

            foreach (Match match in matchCollection)
            {
                if (this.terminated)
                {
                    result2 = false;
                    return(result2);
                }
                MapScrapeResult mapScrapeResult = this.ParseDetailsUSA(match.Groups["href"].Value);
                mapScrapeResult.Radius   = UrlDownloader.SkipHtmlTags(match.Groups["radius"].Value).Trim();
                mapScrapeResult.Category = Category.Name;
                if (this.CanAddRes(result, mapScrapeResult))
                {
                    if (this.terminated)
                    {
                        result2 = false;
                        return(result2);
                    }
                    if (!string.IsNullOrEmpty(mapScrapeResult.Website))
                    {
                        string website;
                        base.WalkWebsite(mapScrapeResult.Website, mapScrapeResult, out website);
                        mapScrapeResult.Website = website;
                    }
                    if (this.CanAddRes(result, mapScrapeResult))
                    {
                        Monitor.Enter(result);
                        try
                        {
                            result.Add(mapScrapeResult);
                        }
                        finally
                        {
                            Monitor.Exit(result);
                        }
                        if (this.terminated)
                        {
                            result2 = false;
                            return(result2);
                        }
                        if (base.Callback != null)
                        {
                            base.Callback.Process(mapScrapeResult);
                        }
                    }
                }
            }
            return(text.IndexOf("<li class=\"next\">") >= 0);

            return(result2);
        }
Ejemplo n.º 7
0
        protected bool ProcessCanada(string text, List <IScrapeResult> result, SearchCategory Location, SearchCategory Category)
        {
            RegexOptions    options         = RegexOptions.IgnoreCase | RegexOptions.Compiled | RegexOptions.Singleline;
            Regex           regex           = new Regex("<div class=\"listingDetail\".*?>.*?<h3 class=\"listingTitleLine\">.*?href=\"(?<href>.*?)\".*?</h3>.*?<h4 class=\"phoneLink\">(?<phone>.*?)</h4>", options);
            MatchCollection matchCollection = regex.Matches(text);
            bool            result2;

            foreach (Match match in matchCollection)
            {
                if (this.terminated)
                {
                    result2 = false;
                    return(result2);
                }
                MapScrapeResult mapScrapeResult = this.ParseDetailsCanada(string.Format("http://www.yellowpages.ca{0}", match.Groups["href"].Value));
                mapScrapeResult.Phone    = UrlDownloader.SkipHtmlTags(match.Groups["phone"].Value);
                mapScrapeResult.Category = Category.Name;
                if (this.CanAddRes(result, mapScrapeResult))
                {
                    if (this.terminated)
                    {
                        result2 = false;
                        return(result2);
                    }
                    if (!string.IsNullOrEmpty(mapScrapeResult.Website))
                    {
                        string website;
                        base.WalkWebsite(mapScrapeResult.Website, mapScrapeResult, out website);
                        mapScrapeResult.Website = website;
                    }
                    if (this.CanAddRes(result, mapScrapeResult))
                    {
                        Monitor.Enter(result);
                        try
                        {
                            result.Add(mapScrapeResult);
                        }
                        finally
                        {
                            Monitor.Exit(result);
                        }
                        if (this.terminated)
                        {
                            result2 = false;
                            return(result2);
                        }
                        if (base.Callback != null)
                        {
                            base.Callback.Process(mapScrapeResult);
                        }
                    }
                }
            }
            return(text.IndexOf(">Next</a>") >= 0);

            return(result2);
        }
Ejemplo n.º 8
0
        protected override bool ProcessSimpleRequest(string text, List <IScrapeResult> result, SearchCategory Location, SearchCategory Category)
        {
            RegexOptions    options         = RegexOptions.IgnoreCase | RegexOptions.Compiled | RegexOptions.Singleline;
            Regex           regex           = new Regex("<span\\sclass=detls>.*?href=(?<href>.*?)\\s", options);
            MatchCollection matchCollection = regex.Matches(text);
            bool            result2;

            foreach (Match match in matchCollection)
            {
                if (this.terminated)
                {
                    result2 = false;
                    return(result2);
                }
                MapScrapeResult mapScrapeResult = this.ParseDetails(string.Format("http://maps.google.com{0}", match.Groups["href"].Value), Category.Url.Contains("Real+Estate"));
                if (this.CanAddRes(result, mapScrapeResult))
                {
                    if (!string.IsNullOrEmpty(mapScrapeResult.Website))
                    {
                        string website;
                        base.WalkWebsite(mapScrapeResult.Website, mapScrapeResult, out website);
                        mapScrapeResult.Website = website;
                    }
                    mapScrapeResult.Category = Category.Name;
                    if (this.CanAddRes(result, mapScrapeResult))
                    {
                        Monitor.Enter(result);
                        try
                        {
                            result.Add(mapScrapeResult);
                        }
                        finally
                        {
                            Monitor.Exit(result);
                        }
                        if (this.terminated)
                        {
                            result2 = false;
                            return(result2);
                        }
                        if (base.Callback != null)
                        {
                            base.Callback.Process(mapScrapeResult);
                        }
                    }
                }
            }
            return(text.IndexOf("</div>Next</a>") >= 0);

            return(result2);
        }
        protected MapScrapeResult ParseDetailsUSA(string url)
        {
            MapScrapeResult mapScrapeResult = new MapScrapeResult();

            mapScrapeResult.AdUrl = url;
            if (this.terminated)
            {
                return(mapScrapeResult);
            }
            string text;

            if (this.downloader.DownloadUrl(url, out text))
            {
                mapScrapeResult.Headline = base.ExtractValue(text, "<span.*?property=\"vcard:Name\".*?>(?<value>.*?)</span>");
                mapScrapeResult.Address  = this.GetAddress(text);
                StringBuilder stringBuilder = new StringBuilder();
                mapScrapeResult.Region  = base.ExtractValue(text, "<span.*?property=\"vcard:region\".*?>(?<value>.*?)</span>");
                mapScrapeResult.ZipCode = base.ExtractValue(text, "<span.*?property=\"vcard:postal-code\".*?>(?<value>.*?)</span>");
                string value = base.ExtractValue(text, "<span.*?property=\"vcard:street-address\".*?>(?<value>.*?)</span>");
                mapScrapeResult.City = base.ExtractValue(text, "<span.*?property=\"vcard:locality\".*?>(?<value>.*?)</span>");
                stringBuilder.Append(value);
                if (!string.IsNullOrEmpty(mapScrapeResult.City) && stringBuilder.Length > 0)
                {
                    stringBuilder.Append(", ");
                }
                stringBuilder.Append(mapScrapeResult.City);
                if (!string.IsNullOrEmpty(mapScrapeResult.Region) && stringBuilder.Length > 0)
                {
                    stringBuilder.Append(", ");
                }
                stringBuilder.Append(mapScrapeResult.Region);
                if (!string.IsNullOrEmpty(mapScrapeResult.ZipCode) && stringBuilder.Length > 0)
                {
                    stringBuilder.Append(" ");
                }
                stringBuilder.Append(mapScrapeResult.ZipCode);
                mapScrapeResult.Address   = stringBuilder.ToString();
                mapScrapeResult.Latitude  = base.ExtractValue(text, "<span.*?property=\"vcard:latitude\".*?>(?<value>.*?)</span>");
                mapScrapeResult.Longitude = base.ExtractValue(text, "<span.*?property=\"vcard:longitude\".*?>(?<value>.*?)</span>");
                mapScrapeResult.Map       = base.ExtractValue(text, "<div.*?id=\"yls-dt-mapcont\".*?>.*?<img.*?src=\"(?<value>.*?)\"");
                mapScrapeResult.Email     = base.ExtractEmails(text).FirstOrDefault <string>();
                mapScrapeResult.Phone     = base.ExtractValue(text, "<li.*?property=\"vcard:tel\".*?>(?<value>.*?)</li>");
                string text2 = base.ExtractValue(text, "<ul.*?id=\"yls-dt-weblinks\">.*?(?<value><a.*?property=\"vcard:url\".*?>)");
                mapScrapeResult.Website = base.ExtractValue(text2, "href=\"(?<value>.*?)\"").Trim();
            }
            return(mapScrapeResult);
        }
Ejemplo n.º 10
0
        protected MapScrapeResult ParseDetailsCanada(string url)
        {
            MapScrapeResult mapScrapeResult = new MapScrapeResult();

            mapScrapeResult.AdUrl = url;
            if (this.terminated)
            {
                return(mapScrapeResult);
            }
            string text;

            if (this.downloader.DownloadUrl(url, out text))
            {
                mapScrapeResult.Website   = base.ExtractValue(text, "\"webURL\":{.*?\"href\":\"(?<value>.*?)\"").Trim();
                mapScrapeResult.Map       = base.ExtractValue(text, "<a id=\"interactiveMapPrint\" href='(?<value>.*?)'.*?>").Trim();
                mapScrapeResult.Latitude  = base.ExtractValue(text, "\"latitude\":\\s*?(?<value>[-+]?[0-9]*\\.?[0-9]+)");
                mapScrapeResult.Longitude = base.ExtractValue(text, "\"longitude\":\\s*?(?<value>[-+]?[0-9]*\\.?[0-9]+)");
                text = base.ExtractValue(text, "<div id=\"busCardLeft\">(?<value>.*?)</div>");
                mapScrapeResult.Phone    = base.ExtractPhones(text).FirstOrDefault <string>();
                mapScrapeResult.Email    = base.ExtractEmails(text).FirstOrDefault <string>();
                mapScrapeResult.Headline = UrlDownloader.SkipHtmlTags(base.ExtractValue(text, "<h1 id=\"ypBusCardBusName\">(?<value>.*?)</h1>")).Trim();
                mapScrapeResult.Address  = base.ExtractValue(text, "<p>(?<value>.*?)</p>").Trim();
                string[] array = mapScrapeResult.Address.Split(new char[]
                {
                    ','
                });
                if (array.Length > 0)
                {
                    string text2 = array[array.Length - 1].Trim();
                    int    num   = text2.IndexOf(' ');
                    if (num >= 0)
                    {
                        mapScrapeResult.Region  = text2.Substring(0, num).Trim();
                        mapScrapeResult.ZipCode = text2.Substring(num + 1).Trim();
                    }
                }
                if (array.Length > 1)
                {
                    mapScrapeResult.City = array[array.Length - 2].Trim();
                }
                if (!string.IsNullOrEmpty(mapScrapeResult.Map))
                {
                    mapScrapeResult.Map = string.Format("http://www.yellowpages.ca{0}", mapScrapeResult.Map);
                }
            }
            return(mapScrapeResult);
        }
Ejemplo n.º 11
0
        protected MapScrapeResult ParseDetailsUSA(string url)
        {
            MapScrapeResult mapScrapeResult = new MapScrapeResult();

            mapScrapeResult.AdUrl = url;
            if (this.terminated)
            {
                return(mapScrapeResult);
            }
            string text;

            if (this.downloader.DownloadUrl(url, out text))
            {
                mapScrapeResult.Headline = UrlDownloader.SkipHtmlTags(base.ExtractValue(text, "<h1 class=\"fn org\">(?<value>.*?)</h1>")).Trim();
                string text2 = base.ExtractValue(text, "<p class=\"primary-location\">(?<value>.*?)</p>");
                if (string.IsNullOrEmpty(text2))
                {
                    mapScrapeResult.Address = base.ExtractValue(text, "<span class=\"listing-address adr\">(?<value>.*?)</span>").Trim();
                }
                else
                {
                    mapScrapeResult.Address = base.ExtractValue(text2, "<span class=\"street-address\">(?<value>.*?)</span>").Trim();
                    mapScrapeResult.City    = base.ExtractValue(text2, "<span class=\"locality\">(?<value>.*?)</span>");
                    mapScrapeResult.Region  = base.ExtractValue(text2, "<span class=\"region\">(?<value>.*?)</span>");
                    mapScrapeResult.ZipCode = base.ExtractValue(text2, "<span class=\"postal-code\">(?<value>.*?)</span>");
                }
                mapScrapeResult.Email = base.ExtractEmails(text).FirstOrDefault <string>();
                mapScrapeResult.Phone = base.ExtractValue(text, "<p class=\"phone\">(?<value>.*?)</p>");
                string text3 = base.ExtractValue(text, "<ul class=\"feature-links blue-arrow\">(?<value>.*?)</ul>").Trim();
                mapScrapeResult.Website = base.ExtractValue(text3, "href=\"(?<value>.*?)\".*?Visit Website</a>").Trim();
                if (mapScrapeResult.Website.StartsWith("mailto:"))
                {
                    mapScrapeResult.Website = string.Empty;
                }
                mapScrapeResult.Map       = string.Format("http://www.yellowpages.com{0}", base.ExtractValue(text, "<div id=\"mip-minimap\">.*?href=\"(?<value>.*?)\".*?</div>").Trim());
                mapScrapeResult.Latitude  = base.ExtractValue(text, "<span class=\"latitude\" id=\"map-latitude\">(?<value>.*?)</span>");
                mapScrapeResult.Longitude = base.ExtractValue(text, "<span class=\"longitude\" id=\"map-longitude\">(?<value>.*?)</span>");
            }
            return(mapScrapeResult);
        }
        protected MapScrapeResult ParseDetailsUK(string url)
        {
            MapScrapeResult mapScrapeResult = new MapScrapeResult();

            mapScrapeResult.AdUrl = url;
            if (this.terminated)
            {
                return(mapScrapeResult);
            }
            string text;

            if (this.downloader.DownloadUrl(url, out text))
            {
                mapScrapeResult.Headline = base.ExtractValue(text, "<h1 class=\"org fn n\">(?<value>.*?)</h1>");
                mapScrapeResult.Address  = base.ExtractValue(text, "<p class=\"street-address\">(?<value>.*?)</p>");
                mapScrapeResult.ZipCode  = base.ExtractValue(text, "<span class=\"postal-code\">(?<value>.*?)</span>");
                mapScrapeResult.Address  = UrlDownloader.SkipHtmlTags(mapScrapeResult.Address).Trim();
                List <string> list = new List <string>(mapScrapeResult.Address.Split(new string[]
                {
                    "\t",
                    "\n",
                    ","
                }, StringSplitOptions.RemoveEmptyEntries));
                list.RemoveAll((string s) => s.Trim() == string.Empty);
                mapScrapeResult.Address = mapScrapeResult.Address.Replace("\t", string.Empty);
                mapScrapeResult.Address = mapScrapeResult.Address.Replace("\n", string.Empty);
                if (list.Count > 1)
                {
                    mapScrapeResult.City = list[1];
                }
                string text2 = base.ExtractValue(text, "<p class=\"geo\">(?<value>.*?)</p>");
                mapScrapeResult.Latitude  = base.ExtractValue(text2, "<span class=\"latitude\">(?<value>.*?)</span>");
                mapScrapeResult.Longitude = base.ExtractValue(text2, "<span class=\"longitude\">(?<value>.*?)</span>");
                mapScrapeResult.Map       = base.ExtractValue(text, "<div id=\"ent-page-md-links\">.*?href=\"(?<value>.*?)\".*?</div>");
                mapScrapeResult.Email     = base.ExtractEmails(text).FirstOrDefault <string>();
                mapScrapeResult.Phone     = base.ExtractValue(text, "<h2 class=\"tel\">.*?<span class=\"value\">(?<value>.*?)</span>").Trim();
            }
            return(mapScrapeResult);
        }
        protected bool ProcessUK(string text, List <IScrapeResult> result, SearchCategory Location, SearchCategory Category)
        {
            RegexOptions    options         = RegexOptions.IgnoreCase | RegexOptions.Compiled | RegexOptions.Singleline;
            Regex           regex           = new Regex("<div class=\"addr \">.*?<h2>(?<txt>.*?)</h2>.*?<span class=\"note\">(?<radius>.*?)<span>", options);
            MatchCollection matchCollection = regex.Matches(text);
            bool            result2;

            foreach (Match match in matchCollection)
            {
                if (this.terminated)
                {
                    result2 = false;
                    return(result2);
                }
                string          text2           = base.ExtractValue(match.Groups["txt"].Value, "href=\"(?<value>.*?)\"");
                MapScrapeResult mapScrapeResult = this.ParseDetailsUK(string.Format("{0}{1}", "http://uk.local.yahoo.com", text2));
                mapScrapeResult.Radius = match.Groups["radius"].Value;
                mapScrapeResult.Region = text2.Split(new string[]
                {
                    "/"
                }, StringSplitOptions.RemoveEmptyEntries).FirstOrDefault <string>();
                if (!string.IsNullOrEmpty(mapScrapeResult.Region))
                {
                    mapScrapeResult.Region = mapScrapeResult.Region.Replace('_', ' ');
                }
                mapScrapeResult.Category = Category.Name;
                if (this.CanAddRes(result, mapScrapeResult))
                {
                    if (this.terminated)
                    {
                        result2 = false;
                        return(result2);
                    }
                    if (!string.IsNullOrEmpty(mapScrapeResult.Website))
                    {
                        string website;
                        base.WalkWebsite(mapScrapeResult.Website, mapScrapeResult, out website);
                        mapScrapeResult.Website = website;
                    }
                    if (this.CanAddRes(result, mapScrapeResult))
                    {
                        Monitor.Enter(result);
                        try
                        {
                            result.Add(mapScrapeResult);
                        }
                        finally
                        {
                            Monitor.Exit(result);
                        }
                        if (this.terminated)
                        {
                            result2 = false;
                            return(result2);
                        }
                        if (base.Callback != null)
                        {
                            base.Callback.Process(mapScrapeResult);
                        }
                    }
                }
            }
            return(text.IndexOf("Next</a></li>") >= 0);

            return(result2);
        }
Ejemplo n.º 14
0
        protected bool ProcessUK(string text, List <IScrapeResult> result, SearchCategory Location, SearchCategory Category)
        {
            RegexOptions    options         = RegexOptions.IgnoreCase | RegexOptions.Compiled | RegexOptions.Singleline;
            Regex           regex           = new Regex("<div class=\"advert-content\">(?<text>.*?)</div>.*?(?<cta><div class=\"advert-cta\">.*?</div>){1}.*?<ul class=\"tabbed\">(?<tab>.*?)</ul>{1}", options);
            MatchCollection matchCollection = regex.Matches(text);
            bool            result2;

            foreach (Match match in matchCollection)
            {
                if (this.terminated)
                {
                    result2 = false;
                    return(result2);
                }
                string          value           = match.Groups["text"].Value;
                string          value2          = match.Groups["cta"].Value;
                string          value3          = match.Groups["tab"].Value;
                MapScrapeResult mapScrapeResult = new MapScrapeResult();
                mapScrapeResult.AdUrl    = string.Format("http://www.yell.com{0}", base.ExtractValue(value3, "<li class=\"summaryTL\">.*?href=\"(?<value>.*?)\".*?</li>"));
                mapScrapeResult.Map      = string.Format("http://www.yell.com{0}", base.ExtractValue(value3, "<li class=\"mapTL\">.*?href=\"(?<value>.*?)\".*?</li>"));
                mapScrapeResult.Phone    = UrlDownloader.SkipHtmlTags(base.ExtractValue(value2, "<ul class=\"(tel-single|tel-multiple)\">.*?<strong>(?<value>.*?)</strong>.*?</ul>")).Trim();
                mapScrapeResult.Headline = base.ExtractValue(value, "<h2 class=\"coName\">(?<value>.*?)</h2>");
                mapScrapeResult.Website  = base.ExtractValue(mapScrapeResult.Headline, "href='(?<value>.*?)'").Trim();
                mapScrapeResult.Headline = UrlDownloader.SkipHtmlTags(mapScrapeResult.Headline).Trim();
                mapScrapeResult.Address  = UrlDownloader.SkipHtmlTags(base.ExtractValue(value, "<p class=\"address\">(?<value>.*?)</p>")).Trim();
                mapScrapeResult.ZipCode  = base.ExtractValue(mapScrapeResult.Address, "(?<value>[A-Z]{1,2}[0-9R][0-9A-Z]?\\s*?[0-9][ABD-HJLNP-UW-Z]{2})");
                string[] array = mapScrapeResult.Address.Split(new char[]
                {
                    ','
                });
                if (array.Length > 0)
                {
                    mapScrapeResult.Region = array[array.Length - 1];
                    if (mapScrapeResult.ZipCode.Length > 0)
                    {
                        mapScrapeResult.Region = mapScrapeResult.Region.Replace(mapScrapeResult.ZipCode, string.Empty).Trim();
                    }
                    else
                    {
                        mapScrapeResult.Region = mapScrapeResult.Region.Trim();
                    }
                }
                if (array.Length > 1)
                {
                    if (string.IsNullOrEmpty(mapScrapeResult.ZipCode))
                    {
                        mapScrapeResult.City = array[array.Length - 2].Trim();
                    }
                    else
                    {
                        mapScrapeResult.City = array[array.Length - 2].Replace(mapScrapeResult.ZipCode, string.Empty).Trim();
                    }
                }
                if (this.terminated)
                {
                    result2 = false;
                    return(result2);
                }
                if (this.CanAddRes(result, mapScrapeResult))
                {
                    if (!string.IsNullOrEmpty(mapScrapeResult.Website))
                    {
                        string website;
                        base.WalkWebsite(mapScrapeResult.Website, mapScrapeResult, out website);
                        mapScrapeResult.Website = website;
                    }
                    mapScrapeResult.Category = Category.Name;
                    if (this.CanAddRes(result, mapScrapeResult))
                    {
                        Monitor.Enter(result);
                        try
                        {
                            result.Add(mapScrapeResult);
                        }
                        finally
                        {
                            Monitor.Exit(result);
                        }
                        if (this.terminated)
                        {
                            result2 = false;
                            return(result2);
                        }
                        if (base.Callback != null)
                        {
                            base.Callback.Process(mapScrapeResult);
                        }
                    }
                }
            }
            return(text.IndexOf(">Next</a>") >= 0);

            return(result2);
        }
Ejemplo n.º 15
0
        protected MapScrapeResult ParseDetails(string url, bool isRealEstate)
        {
            MapScrapeResult mapScrapeResult = new MapScrapeResult();

            mapScrapeResult.AdUrl = url;
            string text;

            if (this.downloader.DownloadUrl(url, out text))
            {
                if (this.terminated)
                {
                    return(mapScrapeResult);
                }
                mapScrapeResult.Headline = UrlDownloader.SkipHtmlTags(base.ExtractValue(text, "<span class=\"*?pp-place-title\"*?>(?<value>.*?)</span>")).Trim();
                mapScrapeResult.Address  = UrlDownloader.SkipHtmlTags(base.ExtractValue(text, "<span.*?class=\"*?pp-headline-item pp-headline-address\"*?.*?>(?<value>.*?)</span>")).Trim();
                if (isRealEstate)
                {
                    if (mapScrapeResult.Address.Trim().Length > 0)
                    {
                        mapScrapeResult.Address = mapScrapeResult.Headline + "," + mapScrapeResult.Address;
                    }
                    else
                    {
                        mapScrapeResult.Address = mapScrapeResult.Headline;
                    }
                    if (this.country == AbstractScraper.Country.UK)
                    {
                        MapScrapeResult expr_AE = mapScrapeResult;
                        expr_AE.Address += ", UK";
                    }
                }
                mapScrapeResult.Phone = base.ExtractValue(text, "<span.*?class=\"*?telephone\"*?.*?>.*?<nobr>(?<value>.*?)</nobr>").Trim();
                string text2 = base.ExtractValue(text, "<div.*?class=\"*?pp-compact-story\"*?.*?>(?<value>.*?)</div>");
                mapScrapeResult.Map = base.ExtractValue(text2, "src=\"*?(?<value>.*?)\"*?\\s");
                string text3 = base.ExtractValue(text, "<div.*?class=\"*?pp-story\"*?.*?>.*?Email.*?(?<value>.*?)</div>");
                mapScrapeResult.Email = base.ExtractEmails(text3).FirstOrDefault <string>();
                if (isRealEstate)
                {
                    base.ExtractValue(text, "latlng:\\{(?<value>.*?)\\}");
                    mapScrapeResult.Latitude  = base.ExtractValue(text, "lat:(?<value>[-+]?[0-9]*\\.?[0-9]+)");
                    mapScrapeResult.Longitude = base.ExtractValue(text, "lng:\\s*?(?<value>[-+]?[0-9]*\\.?[0-9]+)");
                }
                else
                {
                    mapScrapeResult.Latitude = base.ExtractValue(text, "latitude_e6:(?<value>.*?),").Trim();
                    int num = (mapScrapeResult.Latitude.StartsWith("-") || mapScrapeResult.Latitude.StartsWith("+")) ? 3 : 2;
                    if (mapScrapeResult.Latitude.Length > num)
                    {
                        mapScrapeResult.Latitude = mapScrapeResult.Latitude.Insert(num, ".");
                    }
                    mapScrapeResult.Longitude = base.ExtractValue(text, "longitude_e6:(?<value>.*?),").Trim();
                    num = ((mapScrapeResult.Longitude.StartsWith("-") || mapScrapeResult.Longitude.StartsWith("+")) ? 3 : 2);
                    if (mapScrapeResult.Longitude.Length > num)
                    {
                        mapScrapeResult.Longitude = mapScrapeResult.Longitude.Insert(num, ".");
                    }
                }
                base.ParseAddress(mapScrapeResult);
                mapScrapeResult.Radius  = base.Radius.ToString();
                mapScrapeResult.Website = base.ExtractValue(text, "<span.*?class=\"?pp-authority-page\"?>.*?<a.*?href=\"?(?<value>.*?)\"?(\\s|>)").Trim();
            }
            return(mapScrapeResult);
        }
Ejemplo n.º 16
0
        protected void ParseAddress(MapScrapeResult res)
        {
            switch (this.country)
            {
            case AbstractScraper.Country.USA:
            {
                string[] array = res.Address.Split(new char[]
                    {
                        ','
                    });
                if (array.Length > 1)
                {
                    res.City = array[1].Trim();
                }
                if (array.Length > 2)
                {
                    string[] array2 = array[2].Trim().Split(new string[]
                        {
                            " "
                        }, StringSplitOptions.RemoveEmptyEntries);
                    if (array2.Length > 0)
                    {
                        res.Region = array2[0].Trim();
                    }
                    if (array2.Length > 1)
                    {
                        res.ZipCode = array2[1].Trim();
                        return;
                    }
                }
                break;
            }

            case AbstractScraper.Country.Canada:
            {
                string[] array = res.Address.Split(new char[]
                    {
                        ','
                    });
                if (array.Length > 1)
                {
                    string text = array[array.Length - 2].Trim();
                    int    num  = text.IndexOf(' ');
                    if (num >= 0)
                    {
                        res.Region  = text.Substring(0, num).Trim();
                        res.ZipCode = text.Substring(num + 1).Trim();
                    }
                }
                if (array.Length > 2)
                {
                    res.City = array[array.Length - 3];
                    return;
                }
                break;
            }

            case AbstractScraper.Country.UK:
            {
                res.ZipCode = base.ExtractValue(res.Address, "(?<value>[A-Z]{1,2}[0-9R][0-9A-Z]? ?[0-9][ABD-HJLNP-UW-Z]{2})");
                string[] array = res.Address.Split(new char[]
                    {
                        ','
                    });
                if (array.Length > 0)
                {
                    res.Region = array[array.Length - 1].Trim();
                }
                if (array.Length > 1)
                {
                    if (string.IsNullOrEmpty(res.ZipCode))
                    {
                        res.City = array[array.Length - 2].Trim();
                        return;
                    }
                    res.City = array[array.Length - 2].Replace(res.ZipCode, string.Empty).Trim();
                    return;
                }
                break;
            }

            case AbstractScraper.Country.Australia:
            {
                string[] array = res.Address.Split(new char[]
                    {
                        ','
                    });
                if (array.Length > 1)
                {
                    string   text2  = array[array.Length - 2].Trim();
                    string[] array3 = text2.Split(new string[]
                        {
                            " "
                        }, StringSplitOptions.RemoveEmptyEntries);
                    int num2 = 2;
                    if (array3.Length > 1)
                    {
                        res.Region = array3[array3.Length - 2];
                    }
                    else
                    {
                        num2 = 1;
                    }
                    if (array3.Length > 0)
                    {
                        res.ZipCode = array3[array3.Length - 1];
                    }
                    else
                    {
                        num2 = 0;
                    }
                    StringBuilder stringBuilder = new StringBuilder();
                    for (int i = 0; i < array3.Length - num2; i++)
                    {
                        stringBuilder.Append(array3[i]);
                        stringBuilder.Append(" ");
                    }
                    res.City = stringBuilder.ToString().Trim();
                    return;
                }
                break;
            }

            default:
            {
                string[] array = res.Address.Split(new char[]
                    {
                        ','
                    });
                if (array.Length > 0)
                {
                    res.Region = array[array.Length - 1].Trim();
                }
                if (array.Length > 2)
                {
                    string[] array4 = array[array.Length - 2].Trim().Split(new string[]
                        {
                            " "
                        }, StringSplitOptions.RemoveEmptyEntries);
                    if (array4.Length > 0)
                    {
                        res.City = array4[0].Trim();
                    }
                    if (array4.Length > 1)
                    {
                        res.ZipCode = array4[1].Trim();
                    }
                }
                break;
            }
            }
        }