protected MapScrapeResult ParseDetailsAustralia(string url) { MapScrapeResult mapScrapeResult = new MapScrapeResult(); mapScrapeResult.AdUrl = url; if (this.terminated) { return(mapScrapeResult); } string text; if (this.downloader.DownloadUrl(url, out text)) { mapScrapeResult.Headline = base.ExtractValue(text, "<h1>(?<value>.*?)</h1>"); mapScrapeResult.Address = base.ExtractValue(text, "<div class=\"adr\">(?<value>.*?)</div>"); mapScrapeResult.Region = base.ExtractValue(mapScrapeResult.Address, "<span class=\"region\".*?>(?<value>.*?)</span>"); mapScrapeResult.City = base.ExtractValue(mapScrapeResult.Address, "<span.*?class=\"locality\".*?>(?<value>.*?)</span>"); mapScrapeResult.ZipCode = base.ExtractValue(mapScrapeResult.Address, "<span.*?class=\"postal-code\".*?>(?<value>.*?)</span>"); mapScrapeResult.Address = UrlDownloader.SkipHtmlTags(mapScrapeResult.Address).Replace("\t", string.Empty).Replace("\n", string.Empty).Trim(); mapScrapeResult.Latitude = base.ExtractValue(text, "<span class=\"latitude\">(?<value>.*?)</span>"); mapScrapeResult.Longitude = base.ExtractValue(text, "<span class=\"longitude\">(?<value>.*?)</span>"); mapScrapeResult.Map = string.Format("http://maps.yahoo.com/maps_result?ard=1&lat={0}&lon={1}&zoom=18", mapScrapeResult.Latitude, mapScrapeResult.Longitude); string text2 = base.ExtractValue(text, "<ul class=\"pageTools\">(?<value>.*?)</ul>"); mapScrapeResult.Website = base.ExtractValue(text2, "href=\"(?<value>.*?)\""); mapScrapeResult.Phone = base.ExtractValue(text2, "<span class=\"tl-phone-full\">(?<value>.*?)</span>").Trim(); mapScrapeResult.Email = base.ExtractEmails(text).FirstOrDefault <string>(); } return(mapScrapeResult); }
protected bool ProcessUSA(string text, List <IScrapeResult> result, SearchCategory Location, SearchCategory Category) { RegexOptions options = RegexOptions.IgnoreCase | RegexOptions.Compiled | RegexOptions.Singleline; Regex regex = new Regex("<h3 class=\"business-name fn org\">.*?<a.*?href=\"(?<href>.*?)\".*?</a>.*?</h3>.*?<div class=\"distance\">(?<radius>.*?)</div>", options); MatchCollection matchCollection = regex.Matches(text); bool result2; foreach (Match match in matchCollection) { if (this.terminated) { result2 = false; return(result2); } MapScrapeResult mapScrapeResult = this.ParseDetailsUSA(match.Groups["href"].Value); mapScrapeResult.Radius = UrlDownloader.SkipHtmlTags(match.Groups["radius"].Value).Trim(); mapScrapeResult.Category = Category.Name; if (this.CanAddRes(result, mapScrapeResult)) { if (this.terminated) { result2 = false; return(result2); } if (!string.IsNullOrEmpty(mapScrapeResult.Website)) { string website; base.WalkWebsite(mapScrapeResult.Website, mapScrapeResult, out website); mapScrapeResult.Website = website; } if (this.CanAddRes(result, mapScrapeResult)) { Monitor.Enter(result); try { result.Add(mapScrapeResult); } finally { Monitor.Exit(result); } if (this.terminated) { result2 = false; return(result2); } if (base.Callback != null) { base.Callback.Process(mapScrapeResult); } } } } return(text.IndexOf("<li class=\"next\">") >= 0); return(result2); }
protected bool ProcessCanada(string text, List <IScrapeResult> result, SearchCategory Location, SearchCategory Category) { RegexOptions options = RegexOptions.IgnoreCase | RegexOptions.Compiled | RegexOptions.Singleline; Regex regex = new Regex("<div class=\"listingDetail\".*?>.*?<h3 class=\"listingTitleLine\">.*?href=\"(?<href>.*?)\".*?</h3>.*?<h4 class=\"phoneLink\">(?<phone>.*?)</h4>", options); MatchCollection matchCollection = regex.Matches(text); bool result2; foreach (Match match in matchCollection) { if (this.terminated) { result2 = false; return(result2); } MapScrapeResult mapScrapeResult = this.ParseDetailsCanada(string.Format("http://www.yellowpages.ca{0}", match.Groups["href"].Value)); mapScrapeResult.Phone = UrlDownloader.SkipHtmlTags(match.Groups["phone"].Value); mapScrapeResult.Category = Category.Name; if (this.CanAddRes(result, mapScrapeResult)) { if (this.terminated) { result2 = false; return(result2); } if (!string.IsNullOrEmpty(mapScrapeResult.Website)) { string website; base.WalkWebsite(mapScrapeResult.Website, mapScrapeResult, out website); mapScrapeResult.Website = website; } if (this.CanAddRes(result, mapScrapeResult)) { Monitor.Enter(result); try { result.Add(mapScrapeResult); } finally { Monitor.Exit(result); } if (this.terminated) { result2 = false; return(result2); } if (base.Callback != null) { base.Callback.Process(mapScrapeResult); } } } } return(text.IndexOf(">Next</a>") >= 0); return(result2); }
protected MapScrapeResult ParseDetailsCanada(string url) { MapScrapeResult mapScrapeResult = new MapScrapeResult(); mapScrapeResult.AdUrl = url; if (this.terminated) { return(mapScrapeResult); } string text; if (this.downloader.DownloadUrl(url, out text)) { mapScrapeResult.Website = base.ExtractValue(text, "\"webURL\":{.*?\"href\":\"(?<value>.*?)\"").Trim(); mapScrapeResult.Map = base.ExtractValue(text, "<a id=\"interactiveMapPrint\" href='(?<value>.*?)'.*?>").Trim(); mapScrapeResult.Latitude = base.ExtractValue(text, "\"latitude\":\\s*?(?<value>[-+]?[0-9]*\\.?[0-9]+)"); mapScrapeResult.Longitude = base.ExtractValue(text, "\"longitude\":\\s*?(?<value>[-+]?[0-9]*\\.?[0-9]+)"); text = base.ExtractValue(text, "<div id=\"busCardLeft\">(?<value>.*?)</div>"); mapScrapeResult.Phone = base.ExtractPhones(text).FirstOrDefault <string>(); mapScrapeResult.Email = base.ExtractEmails(text).FirstOrDefault <string>(); mapScrapeResult.Headline = UrlDownloader.SkipHtmlTags(base.ExtractValue(text, "<h1 id=\"ypBusCardBusName\">(?<value>.*?)</h1>")).Trim(); mapScrapeResult.Address = base.ExtractValue(text, "<p>(?<value>.*?)</p>").Trim(); string[] array = mapScrapeResult.Address.Split(new char[] { ',' }); if (array.Length > 0) { string text2 = array[array.Length - 1].Trim(); int num = text2.IndexOf(' '); if (num >= 0) { mapScrapeResult.Region = text2.Substring(0, num).Trim(); mapScrapeResult.ZipCode = text2.Substring(num + 1).Trim(); } } if (array.Length > 1) { mapScrapeResult.City = array[array.Length - 2].Trim(); } if (!string.IsNullOrEmpty(mapScrapeResult.Map)) { mapScrapeResult.Map = string.Format("http://www.yellowpages.ca{0}", mapScrapeResult.Map); } } return(mapScrapeResult); }
protected MapScrapeResult ParseDetailsUSA(string url) { MapScrapeResult mapScrapeResult = new MapScrapeResult(); mapScrapeResult.AdUrl = url; if (this.terminated) { return(mapScrapeResult); } string text; if (this.downloader.DownloadUrl(url, out text)) { mapScrapeResult.Headline = UrlDownloader.SkipHtmlTags(base.ExtractValue(text, "<h1 class=\"fn org\">(?<value>.*?)</h1>")).Trim(); string text2 = base.ExtractValue(text, "<p class=\"primary-location\">(?<value>.*?)</p>"); if (string.IsNullOrEmpty(text2)) { mapScrapeResult.Address = base.ExtractValue(text, "<span class=\"listing-address adr\">(?<value>.*?)</span>").Trim(); } else { mapScrapeResult.Address = base.ExtractValue(text2, "<span class=\"street-address\">(?<value>.*?)</span>").Trim(); mapScrapeResult.City = base.ExtractValue(text2, "<span class=\"locality\">(?<value>.*?)</span>"); mapScrapeResult.Region = base.ExtractValue(text2, "<span class=\"region\">(?<value>.*?)</span>"); mapScrapeResult.ZipCode = base.ExtractValue(text2, "<span class=\"postal-code\">(?<value>.*?)</span>"); } mapScrapeResult.Email = base.ExtractEmails(text).FirstOrDefault <string>(); mapScrapeResult.Phone = base.ExtractValue(text, "<p class=\"phone\">(?<value>.*?)</p>"); string text3 = base.ExtractValue(text, "<ul class=\"feature-links blue-arrow\">(?<value>.*?)</ul>").Trim(); mapScrapeResult.Website = base.ExtractValue(text3, "href=\"(?<value>.*?)\".*?Visit Website</a>").Trim(); if (mapScrapeResult.Website.StartsWith("mailto:")) { mapScrapeResult.Website = string.Empty; } mapScrapeResult.Map = string.Format("http://www.yellowpages.com{0}", base.ExtractValue(text, "<div id=\"mip-minimap\">.*?href=\"(?<value>.*?)\".*?</div>").Trim()); mapScrapeResult.Latitude = base.ExtractValue(text, "<span class=\"latitude\" id=\"map-latitude\">(?<value>.*?)</span>"); mapScrapeResult.Longitude = base.ExtractValue(text, "<span class=\"longitude\" id=\"map-longitude\">(?<value>.*?)</span>"); } return(mapScrapeResult); }
protected MapScrapeResult ParseDetailsUK(string url) { MapScrapeResult mapScrapeResult = new MapScrapeResult(); mapScrapeResult.AdUrl = url; if (this.terminated) { return(mapScrapeResult); } string text; if (this.downloader.DownloadUrl(url, out text)) { mapScrapeResult.Headline = base.ExtractValue(text, "<h1 class=\"org fn n\">(?<value>.*?)</h1>"); mapScrapeResult.Address = base.ExtractValue(text, "<p class=\"street-address\">(?<value>.*?)</p>"); mapScrapeResult.ZipCode = base.ExtractValue(text, "<span class=\"postal-code\">(?<value>.*?)</span>"); mapScrapeResult.Address = UrlDownloader.SkipHtmlTags(mapScrapeResult.Address).Trim(); List <string> list = new List <string>(mapScrapeResult.Address.Split(new string[] { "\t", "\n", "," }, StringSplitOptions.RemoveEmptyEntries)); list.RemoveAll((string s) => s.Trim() == string.Empty); mapScrapeResult.Address = mapScrapeResult.Address.Replace("\t", string.Empty); mapScrapeResult.Address = mapScrapeResult.Address.Replace("\n", string.Empty); if (list.Count > 1) { mapScrapeResult.City = list[1]; } string text2 = base.ExtractValue(text, "<p class=\"geo\">(?<value>.*?)</p>"); mapScrapeResult.Latitude = base.ExtractValue(text2, "<span class=\"latitude\">(?<value>.*?)</span>"); mapScrapeResult.Longitude = base.ExtractValue(text2, "<span class=\"longitude\">(?<value>.*?)</span>"); mapScrapeResult.Map = base.ExtractValue(text, "<div id=\"ent-page-md-links\">.*?href=\"(?<value>.*?)\".*?</div>"); mapScrapeResult.Email = base.ExtractEmails(text).FirstOrDefault <string>(); mapScrapeResult.Phone = base.ExtractValue(text, "<h2 class=\"tel\">.*?<span class=\"value\">(?<value>.*?)</span>").Trim(); } return(mapScrapeResult); }
protected override string GetUrl(SearchCategory scLocation, SearchCategory scCategory, string keyword, int pageNum) { string text = scLocation.Url.Replace(" ", "+"); string text2 = scCategory.Url.Replace(" ", "+"); string text3 = string.Empty; keyword = UrlDownloader.UrlEncode(keyword); string text4 = scLocation.Meta ?? string.Empty; string a; if ((a = text4) != null) { if (a == "canada") { this.country = AbstractScraper.Country.Canada; text2 = UrlDownloader.SkipHtmlTags(text2.Replace('-', ' ')); text3 = (string.IsNullOrEmpty(keyword) ? string.Format("http://www.yellowpages.ca/search/?stype=si&what={0}&where={1}", text2, text) : string.Format("http://www.yellowpages.ca/search/?stype=si&what={0}+{1}&where={2}", text2, keyword, text)); goto IL_1D9; } if (a == "australia") { if (base.Downloader != null) { base.Downloader.KeepAlive = false; } this.country = AbstractScraper.Country.Australia; text3 = (string.IsNullOrEmpty(keyword) ? string.Format("http://www.yellowpages.com.au/search/listings?clue={1}&locationClue={0}&x=0&y=0&pageNumber={2}", text, text2, pageNum) : string.Format("http://www.yellowpages.com.au/search/listings?clue={1}+{3}&locationClue={0}&x=0&y=0&pageNumber={2}", new object[] { text, text2, pageNum, keyword })); goto IL_1D9; } if (a == "uk") { this.country = AbstractScraper.Country.UK; text3 = (string.IsNullOrEmpty(keyword) ? string.Format("http://www.yell.com/ucs/UcsSearchAction.do?keywords={0}&location={1}&pageNum={2}", text2, text, pageNum) : string.Format("http://www.yell.com/ucs/UcsSearchAction.do?keywords={0}+{1}&location={2}&pageNum={3}", new object[] { text2, keyword, text, pageNum })); goto IL_1D9; } } text3 = (string.IsNullOrEmpty(keyword) ? string.Format("http://www.yellowpages.com/{0}/{1}?page={2}", text, text2, pageNum) : string.Format("http://www.yellowpages.com/{0}/{1}?q={2}&page={3}", new object[] { text, text2, keyword, pageNum })); this.country = AbstractScraper.Country.USA; IL_1D9: if (base.Radius > 0f) { text3 += string.Format("&refinements[radius]={0}", base.Radius.ToString("#0.##")); } return(text3); }
protected bool ProcessUK(string text, List <IScrapeResult> result, SearchCategory Location, SearchCategory Category) { RegexOptions options = RegexOptions.IgnoreCase | RegexOptions.Compiled | RegexOptions.Singleline; Regex regex = new Regex("<div class=\"advert-content\">(?<text>.*?)</div>.*?(?<cta><div class=\"advert-cta\">.*?</div>){1}.*?<ul class=\"tabbed\">(?<tab>.*?)</ul>{1}", options); MatchCollection matchCollection = regex.Matches(text); bool result2; foreach (Match match in matchCollection) { if (this.terminated) { result2 = false; return(result2); } string value = match.Groups["text"].Value; string value2 = match.Groups["cta"].Value; string value3 = match.Groups["tab"].Value; MapScrapeResult mapScrapeResult = new MapScrapeResult(); mapScrapeResult.AdUrl = string.Format("http://www.yell.com{0}", base.ExtractValue(value3, "<li class=\"summaryTL\">.*?href=\"(?<value>.*?)\".*?</li>")); mapScrapeResult.Map = string.Format("http://www.yell.com{0}", base.ExtractValue(value3, "<li class=\"mapTL\">.*?href=\"(?<value>.*?)\".*?</li>")); mapScrapeResult.Phone = UrlDownloader.SkipHtmlTags(base.ExtractValue(value2, "<ul class=\"(tel-single|tel-multiple)\">.*?<strong>(?<value>.*?)</strong>.*?</ul>")).Trim(); mapScrapeResult.Headline = base.ExtractValue(value, "<h2 class=\"coName\">(?<value>.*?)</h2>"); mapScrapeResult.Website = base.ExtractValue(mapScrapeResult.Headline, "href='(?<value>.*?)'").Trim(); mapScrapeResult.Headline = UrlDownloader.SkipHtmlTags(mapScrapeResult.Headline).Trim(); mapScrapeResult.Address = UrlDownloader.SkipHtmlTags(base.ExtractValue(value, "<p class=\"address\">(?<value>.*?)</p>")).Trim(); mapScrapeResult.ZipCode = base.ExtractValue(mapScrapeResult.Address, "(?<value>[A-Z]{1,2}[0-9R][0-9A-Z]?\\s*?[0-9][ABD-HJLNP-UW-Z]{2})"); string[] array = mapScrapeResult.Address.Split(new char[] { ',' }); if (array.Length > 0) { mapScrapeResult.Region = array[array.Length - 1]; if (mapScrapeResult.ZipCode.Length > 0) { mapScrapeResult.Region = mapScrapeResult.Region.Replace(mapScrapeResult.ZipCode, string.Empty).Trim(); } else { mapScrapeResult.Region = mapScrapeResult.Region.Trim(); } } if (array.Length > 1) { if (string.IsNullOrEmpty(mapScrapeResult.ZipCode)) { mapScrapeResult.City = array[array.Length - 2].Trim(); } else { mapScrapeResult.City = array[array.Length - 2].Replace(mapScrapeResult.ZipCode, string.Empty).Trim(); } } if (this.terminated) { result2 = false; return(result2); } if (this.CanAddRes(result, mapScrapeResult)) { if (!string.IsNullOrEmpty(mapScrapeResult.Website)) { string website; base.WalkWebsite(mapScrapeResult.Website, mapScrapeResult, out website); mapScrapeResult.Website = website; } mapScrapeResult.Category = Category.Name; if (this.CanAddRes(result, mapScrapeResult)) { Monitor.Enter(result); try { result.Add(mapScrapeResult); } finally { Monitor.Exit(result); } if (this.terminated) { result2 = false; return(result2); } if (base.Callback != null) { base.Callback.Process(mapScrapeResult); } } } } return(text.IndexOf(">Next</a>") >= 0); return(result2); }
protected MapScrapeResult ParseDetails(string url, bool isRealEstate) { MapScrapeResult mapScrapeResult = new MapScrapeResult(); mapScrapeResult.AdUrl = url; string text; if (this.downloader.DownloadUrl(url, out text)) { if (this.terminated) { return(mapScrapeResult); } mapScrapeResult.Headline = UrlDownloader.SkipHtmlTags(base.ExtractValue(text, "<span class=\"*?pp-place-title\"*?>(?<value>.*?)</span>")).Trim(); mapScrapeResult.Address = UrlDownloader.SkipHtmlTags(base.ExtractValue(text, "<span.*?class=\"*?pp-headline-item pp-headline-address\"*?.*?>(?<value>.*?)</span>")).Trim(); if (isRealEstate) { if (mapScrapeResult.Address.Trim().Length > 0) { mapScrapeResult.Address = mapScrapeResult.Headline + "," + mapScrapeResult.Address; } else { mapScrapeResult.Address = mapScrapeResult.Headline; } if (this.country == AbstractScraper.Country.UK) { MapScrapeResult expr_AE = mapScrapeResult; expr_AE.Address += ", UK"; } } mapScrapeResult.Phone = base.ExtractValue(text, "<span.*?class=\"*?telephone\"*?.*?>.*?<nobr>(?<value>.*?)</nobr>").Trim(); string text2 = base.ExtractValue(text, "<div.*?class=\"*?pp-compact-story\"*?.*?>(?<value>.*?)</div>"); mapScrapeResult.Map = base.ExtractValue(text2, "src=\"*?(?<value>.*?)\"*?\\s"); string text3 = base.ExtractValue(text, "<div.*?class=\"*?pp-story\"*?.*?>.*?Email.*?(?<value>.*?)</div>"); mapScrapeResult.Email = base.ExtractEmails(text3).FirstOrDefault <string>(); if (isRealEstate) { base.ExtractValue(text, "latlng:\\{(?<value>.*?)\\}"); mapScrapeResult.Latitude = base.ExtractValue(text, "lat:(?<value>[-+]?[0-9]*\\.?[0-9]+)"); mapScrapeResult.Longitude = base.ExtractValue(text, "lng:\\s*?(?<value>[-+]?[0-9]*\\.?[0-9]+)"); } else { mapScrapeResult.Latitude = base.ExtractValue(text, "latitude_e6:(?<value>.*?),").Trim(); int num = (mapScrapeResult.Latitude.StartsWith("-") || mapScrapeResult.Latitude.StartsWith("+")) ? 3 : 2; if (mapScrapeResult.Latitude.Length > num) { mapScrapeResult.Latitude = mapScrapeResult.Latitude.Insert(num, "."); } mapScrapeResult.Longitude = base.ExtractValue(text, "longitude_e6:(?<value>.*?),").Trim(); num = ((mapScrapeResult.Longitude.StartsWith("-") || mapScrapeResult.Longitude.StartsWith("+")) ? 3 : 2); if (mapScrapeResult.Longitude.Length > num) { mapScrapeResult.Longitude = mapScrapeResult.Longitude.Insert(num, "."); } } base.ParseAddress(mapScrapeResult); mapScrapeResult.Radius = base.Radius.ToString(); mapScrapeResult.Website = base.ExtractValue(text, "<span.*?class=\"?pp-authority-page\"?>.*?<a.*?href=\"?(?<value>.*?)\"?(\\s|>)").Trim(); } return(mapScrapeResult); }
public override void Process(IScrapeResult scr) { SimpleScrapeResult simpleScrapeResult = scr as SimpleScrapeResult; if (simpleScrapeResult == null) { return; } if (this.listView != null) { if (this.listView.InvokeRequired) { ScrapeResultCallback.ProcessDelegate method = new ScrapeResultCallback.ProcessDelegate(this.Process); try { this.listView.Invoke(method, new object[] { simpleScrapeResult }); return; } catch { return; } } string text = string.Empty; for (int i = 0; i < simpleScrapeResult.Emails.Count; i++) { text += simpleScrapeResult.Emails[i]; if (i < simpleScrapeResult.Emails.Count - 1) { text += ", "; } } string text2 = string.Empty; for (int j = 0; j < simpleScrapeResult.PhonesInBody.Count; j++) { text2 += simpleScrapeResult.PhonesInBody[j]; if (j < simpleScrapeResult.PhonesInBody.Count - 1) { text2 += ", "; } } ListViewItem listViewItem = new ListViewItem(new string[] { simpleScrapeResult.Category, simpleScrapeResult.Location, simpleScrapeResult.Headline, UrlDownloader.SkipHtmlTags(simpleScrapeResult.Description), simpleScrapeResult.Email, text, text2, simpleScrapeResult.DatePosted, simpleScrapeResult.AdUrl }); listViewItem.Tag = simpleScrapeResult; if (simpleScrapeResult.IsEmailSent) { foreach (ListViewItem.ListViewSubItem listViewSubItem in listViewItem.SubItems) { listViewSubItem.BackColor = Color.GreenYellow; } } listViewItem.Checked = simpleScrapeResult.IsSelected; Font font = new Font(listViewItem.SubItems[8].Font, listViewItem.SubItems[8].Font.Style | FontStyle.Underline); this.listView.Items.Add(listViewItem); listViewItem.UseItemStyleForSubItems = false; listViewItem.SubItems[8].Font = font; listViewItem.SubItems[8].ForeColor = Color.FromArgb(0, 0, 0, 255); this.listView.CausesValidation = !this.listView.CausesValidation; } }