protected override bool ProcessKeywordRequest(string text, List <IScrapeResult> result, SearchCategory Location, SearchCategory Category) { RegexOptions options = RegexOptions.IgnoreCase | RegexOptions.Compiled | RegexOptions.Singleline; Regex regex = new Regex("<p class=\"row\">.*?<span.*?>.*?</span>(?<date>.*?) - <a.*?href=\"(?<href>.*?)\".*?>.*?</p>", options); DateTime t = base.LowDate; MatchCollection matchCollection = regex.Matches(text); if (matchCollection.Count <= 0) { regex = new Regex("<p.*?>.*?(?<date>.*?) - <a.*?href=\"(?<href>.*?)\".*?>.*?</p>", options); matchCollection = regex.Matches(text); } bool result2; foreach (Match match in matchCollection) { t = this.ParseDate(match.Groups["date"].Value.Trim()); if (t < base.LowDate) { result2 = false; return(result2); } if (!(t > base.HighDate)) { SimpleScrapeResult simpleScrapeResult = this.ParseDetails(match.Groups["href"].Value); simpleScrapeResult.Location = Location.Name; simpleScrapeResult.Category = Category.Name; simpleScrapeResult.Emails.Remove(simpleScrapeResult.Email); if (this.terminated) { result2 = false; return(result2); } if (this.CanAddRes(result, simpleScrapeResult)) { Monitor.Enter(result); try { result.Add(simpleScrapeResult); } finally { Monitor.Exit(result); } if (base.Callback != null) { base.Callback.Process(simpleScrapeResult); } } } } return(text.IndexOf("<b>Next >></b>") >= 0); return(result2); }
protected bool ProcessUSA(string text, List <IScrapeResult> result, SearchCategory Location, SearchCategory Category) { RegexOptions options = RegexOptions.IgnoreCase | RegexOptions.Compiled | RegexOptions.Singleline; Regex regex = new Regex("<tr class=\"yls-rs-listinfo\">.*?href=\"(?<href>.*?)\".*?</tr>", options); Regex regex2 = new Regex("<td class=\"distance\">(?<radius>.*?)</td>", options); MatchCollection matchCollection = regex.Matches(text); bool result2; foreach (Match match in matchCollection) { if (this.terminated) { result2 = false; return(result2); } MapScrapeResult mapScrapeResult = this.ParseDetailsUSA(match.Groups["href"].Value); Match match2 = regex2.Match(match.Value); if (match2.Success) { mapScrapeResult.Radius = UrlDownloader.SkipHtmlTags(match2.Groups["radius"].Value).Trim(); } mapScrapeResult.Category = Category.Name; if (this.CanAddRes(result, mapScrapeResult)) { if (this.terminated) { result2 = false; return(result2); } if (!string.IsNullOrEmpty(mapScrapeResult.Website)) { string website; base.WalkWebsite(mapScrapeResult.Website, mapScrapeResult, out website); mapScrapeResult.Website = website; } if (this.CanAddRes(result, mapScrapeResult)) { Monitor.Enter(result); try { result.Add(mapScrapeResult); } finally { Monitor.Exit(result); } if (this.terminated) { result2 = false; return(result2); } if (base.Callback != null) { base.Callback.Process(mapScrapeResult); } } } } return(text.IndexOf("Next <span>»") >= 0); return(result2); }
protected override bool ProcessSimpleRequest(string text, List <IScrapeResult> result, SearchCategory Location, SearchCategory Category) { switch (this.country) { case AbstractScraper.Country.Canada: { return(this.ProcessCanada(text, result, Location, Category)); } case AbstractScraper.Country.UK: { return(this.ProcessUK(text, result, Location, Category)); } case AbstractScraper.Country.Australia: { return(this.ProcessAustralia(text, result, Location, Category)); } default: { return(this.ProcessUSA(text, result, Location, Category)); } } }
protected bool ProcessAustralia(string text, List <IScrapeResult> result, SearchCategory Location, SearchCategory Category) { RegexOptions options = RegexOptions.IgnoreCase | RegexOptions.Compiled | RegexOptions.Singleline; Regex regex = new Regex("<a class=\"org\" href=\"/(?<href>.*?)\">", options); MatchCollection matchCollection = regex.Matches(text); bool result2; foreach (Match match in matchCollection) { if (this.terminated) { result2 = false; return(result2); } MapScrapeResult mapScrapeResult = this.ParseDetailsAustralia(string.Format("{0}{1}", "http://local.yahoo.com.au/", match.Groups["href"].Value)); mapScrapeResult.Category = Category.Name; mapScrapeResult.Radius = 0.ToString(); if (this.CanAddRes(result, mapScrapeResult)) { if (this.terminated) { result2 = false; return(result2); } if (!string.IsNullOrEmpty(mapScrapeResult.Website)) { string website; base.WalkWebsite(mapScrapeResult.Website, mapScrapeResult, out website); mapScrapeResult.Website = website; } if (this.CanAddRes(result, mapScrapeResult)) { Monitor.Enter(result); try { result.Add(mapScrapeResult); } finally { Monitor.Exit(result); } if (this.terminated) { result2 = false; return(result2); } if (base.Callback != null) { base.Callback.Process(mapScrapeResult); } } } } return(text.IndexOf("\"TL_pagelink_next\"") >= 0); return(result2); }
protected override bool ProcessKeywordRequest(string text, List <IScrapeResult> result, SearchCategory Location, SearchCategory Category) { return(this.ProcessSimpleRequest(text, result, Location, Category)); }
protected override bool ProcessKeywordRequest(string text, List <IScrapeResult> result, SearchCategory Location, SearchCategory Category) { if (text.IndexOf("0 results found in") >= 0 || text.IndexOf("No Results") >= 0) { return(false); } RegexOptions options = RegexOptions.IgnoreCase | RegexOptions.Compiled | RegexOptions.Singleline; Regex regex = new Regex("<input [^>]*?name=\"galleryoff\".*?/>"); Match match = regex.Match(text); if (match.Success && match.Value.IndexOf("checked") >= 0) { regex = new Regex("<div class=\"gallery-ad-title\"><a href=\"(?<href>.*?)\".*?<div class=\"post-date\">Posted: (?<date>.*?)</div>", options); } else { regex = new Regex("<li>.*?href=\"(?<href>.*?)\".*?<em>.*?</em>.*?- (?<date>.*?)</li>", options); } DateTime t = base.LowDate; bool result2; foreach (Match match2 in regex.Matches(text)) { t = this.ParseDate(match2.Groups["date"].Value); if (t < base.LowDate) { result2 = false; return(result2); } if (!(t > base.HighDate)) { SimpleScrapeResult simpleScrapeResult = this.ParseDetails(match2.Groups["href"].Value); simpleScrapeResult.Location = Location.Name; simpleScrapeResult.Category = Category.Name; if (string.IsNullOrEmpty(simpleScrapeResult.Email)) { simpleScrapeResult.Email = simpleScrapeResult.Emails.FirstOrDefault <string>(); } simpleScrapeResult.Emails.Remove(simpleScrapeResult.Email); if (this.terminated) { result2 = false; return(result2); } if (this.CanAddRes(result, simpleScrapeResult)) { Monitor.Enter(result); try { result.Add(simpleScrapeResult); } finally { Monitor.Exit(result); } if (base.Callback != null) { base.Callback.Process(simpleScrapeResult); } } } } return(text.IndexOf("<span class=\"pagination-label\">Next</span>") >= 0); return(result2); }
protected bool ProcessUK(string text, List <IScrapeResult> result, SearchCategory Location, SearchCategory Category) { RegexOptions options = RegexOptions.IgnoreCase | RegexOptions.Compiled | RegexOptions.Singleline; Regex regex = new Regex("<div class=\"addr \">.*?<h2>(?<txt>.*?)</h2>.*?<span class=\"note\">(?<radius>.*?)<span>", options); MatchCollection matchCollection = regex.Matches(text); bool result2; foreach (Match match in matchCollection) { if (this.terminated) { result2 = false; return(result2); } string text2 = base.ExtractValue(match.Groups["txt"].Value, "href=\"(?<value>.*?)\""); MapScrapeResult mapScrapeResult = this.ParseDetailsUK(string.Format("{0}{1}", "http://uk.local.yahoo.com", text2)); mapScrapeResult.Radius = match.Groups["radius"].Value; mapScrapeResult.Region = text2.Split(new string[] { "/" }, StringSplitOptions.RemoveEmptyEntries).FirstOrDefault <string>(); if (!string.IsNullOrEmpty(mapScrapeResult.Region)) { mapScrapeResult.Region = mapScrapeResult.Region.Replace('_', ' '); } mapScrapeResult.Category = Category.Name; if (this.CanAddRes(result, mapScrapeResult)) { if (this.terminated) { result2 = false; return(result2); } if (!string.IsNullOrEmpty(mapScrapeResult.Website)) { string website; base.WalkWebsite(mapScrapeResult.Website, mapScrapeResult, out website); mapScrapeResult.Website = website; } if (this.CanAddRes(result, mapScrapeResult)) { Monitor.Enter(result); try { result.Add(mapScrapeResult); } finally { Monitor.Exit(result); } if (this.terminated) { result2 = false; return(result2); } if (base.Callback != null) { base.Callback.Process(mapScrapeResult); } } } } return(text.IndexOf("Next</a></li>") >= 0); return(result2); }
protected bool ProcessUK(string text, List <IScrapeResult> result, SearchCategory Location, SearchCategory Category) { RegexOptions options = RegexOptions.IgnoreCase | RegexOptions.Compiled | RegexOptions.Singleline; Regex regex = new Regex("<div class=\"advert-content\">(?<text>.*?)</div>.*?(?<cta><div class=\"advert-cta\">.*?</div>){1}.*?<ul class=\"tabbed\">(?<tab>.*?)</ul>{1}", options); MatchCollection matchCollection = regex.Matches(text); bool result2; foreach (Match match in matchCollection) { if (this.terminated) { result2 = false; return(result2); } string value = match.Groups["text"].Value; string value2 = match.Groups["cta"].Value; string value3 = match.Groups["tab"].Value; MapScrapeResult mapScrapeResult = new MapScrapeResult(); mapScrapeResult.AdUrl = string.Format("http://www.yell.com{0}", base.ExtractValue(value3, "<li class=\"summaryTL\">.*?href=\"(?<value>.*?)\".*?</li>")); mapScrapeResult.Map = string.Format("http://www.yell.com{0}", base.ExtractValue(value3, "<li class=\"mapTL\">.*?href=\"(?<value>.*?)\".*?</li>")); mapScrapeResult.Phone = UrlDownloader.SkipHtmlTags(base.ExtractValue(value2, "<ul class=\"(tel-single|tel-multiple)\">.*?<strong>(?<value>.*?)</strong>.*?</ul>")).Trim(); mapScrapeResult.Headline = base.ExtractValue(value, "<h2 class=\"coName\">(?<value>.*?)</h2>"); mapScrapeResult.Website = base.ExtractValue(mapScrapeResult.Headline, "href='(?<value>.*?)'").Trim(); mapScrapeResult.Headline = UrlDownloader.SkipHtmlTags(mapScrapeResult.Headline).Trim(); mapScrapeResult.Address = UrlDownloader.SkipHtmlTags(base.ExtractValue(value, "<p class=\"address\">(?<value>.*?)</p>")).Trim(); mapScrapeResult.ZipCode = base.ExtractValue(mapScrapeResult.Address, "(?<value>[A-Z]{1,2}[0-9R][0-9A-Z]?\\s*?[0-9][ABD-HJLNP-UW-Z]{2})"); string[] array = mapScrapeResult.Address.Split(new char[] { ',' }); if (array.Length > 0) { mapScrapeResult.Region = array[array.Length - 1]; if (mapScrapeResult.ZipCode.Length > 0) { mapScrapeResult.Region = mapScrapeResult.Region.Replace(mapScrapeResult.ZipCode, string.Empty).Trim(); } else { mapScrapeResult.Region = mapScrapeResult.Region.Trim(); } } if (array.Length > 1) { if (string.IsNullOrEmpty(mapScrapeResult.ZipCode)) { mapScrapeResult.City = array[array.Length - 2].Trim(); } else { mapScrapeResult.City = array[array.Length - 2].Replace(mapScrapeResult.ZipCode, string.Empty).Trim(); } } if (this.terminated) { result2 = false; return(result2); } if (this.CanAddRes(result, mapScrapeResult)) { if (!string.IsNullOrEmpty(mapScrapeResult.Website)) { string website; base.WalkWebsite(mapScrapeResult.Website, mapScrapeResult, out website); mapScrapeResult.Website = website; } mapScrapeResult.Category = Category.Name; if (this.CanAddRes(result, mapScrapeResult)) { Monitor.Enter(result); try { result.Add(mapScrapeResult); } finally { Monitor.Exit(result); } if (this.terminated) { result2 = false; return(result2); } if (base.Callback != null) { base.Callback.Process(mapScrapeResult); } } } } return(text.IndexOf(">Next</a>") >= 0); return(result2); }
protected override string GetUrl(SearchCategory scLocation, SearchCategory scCategory, string keyword, int pageNum) { string text = scLocation.Url.Replace(" ", "+"); string text2 = scCategory.Url.Replace(" ", "+"); string text3 = string.Empty; keyword = UrlDownloader.UrlEncode(keyword); string text4 = scLocation.Meta ?? string.Empty; string a; if ((a = text4) != null) { if (a == "canada") { this.country = AbstractScraper.Country.Canada; text2 = UrlDownloader.SkipHtmlTags(text2.Replace('-', ' ')); text3 = (string.IsNullOrEmpty(keyword) ? string.Format("http://www.yellowpages.ca/search/?stype=si&what={0}&where={1}", text2, text) : string.Format("http://www.yellowpages.ca/search/?stype=si&what={0}+{1}&where={2}", text2, keyword, text)); goto IL_1D9; } if (a == "australia") { if (base.Downloader != null) { base.Downloader.KeepAlive = false; } this.country = AbstractScraper.Country.Australia; text3 = (string.IsNullOrEmpty(keyword) ? string.Format("http://www.yellowpages.com.au/search/listings?clue={1}&locationClue={0}&x=0&y=0&pageNumber={2}", text, text2, pageNum) : string.Format("http://www.yellowpages.com.au/search/listings?clue={1}+{3}&locationClue={0}&x=0&y=0&pageNumber={2}", new object[] { text, text2, pageNum, keyword })); goto IL_1D9; } if (a == "uk") { this.country = AbstractScraper.Country.UK; text3 = (string.IsNullOrEmpty(keyword) ? string.Format("http://www.yell.com/ucs/UcsSearchAction.do?keywords={0}&location={1}&pageNum={2}", text2, text, pageNum) : string.Format("http://www.yell.com/ucs/UcsSearchAction.do?keywords={0}+{1}&location={2}&pageNum={3}", new object[] { text2, keyword, text, pageNum })); goto IL_1D9; } } text3 = (string.IsNullOrEmpty(keyword) ? string.Format("http://www.yellowpages.com/{0}/{1}?page={2}", text, text2, pageNum) : string.Format("http://www.yellowpages.com/{0}/{1}?q={2}&page={3}", new object[] { text, text2, keyword, pageNum })); this.country = AbstractScraper.Country.USA; IL_1D9: if (base.Radius > 0f) { text3 += string.Format("&refinements[radius]={0}", base.Radius.ToString("#0.##")); } return(text3); }
protected override bool ProcessSimpleRequest(string text, List <IScrapeResult> result, SearchCategory Location, SearchCategory Category) { RegexOptions options = RegexOptions.IgnoreCase | RegexOptions.Compiled | RegexOptions.Singleline; Regex regex = new Regex("<span\\sclass=detls>.*?href=(?<href>.*?)\\s", options); MatchCollection matchCollection = regex.Matches(text); bool result2; foreach (Match match in matchCollection) { if (this.terminated) { result2 = false; return(result2); } MapScrapeResult mapScrapeResult = this.ParseDetails(string.Format("http://maps.google.com{0}", match.Groups["href"].Value), Category.Url.Contains("Real+Estate")); if (this.CanAddRes(result, mapScrapeResult)) { if (!string.IsNullOrEmpty(mapScrapeResult.Website)) { string website; base.WalkWebsite(mapScrapeResult.Website, mapScrapeResult, out website); mapScrapeResult.Website = website; } mapScrapeResult.Category = Category.Name; if (this.CanAddRes(result, mapScrapeResult)) { Monitor.Enter(result); try { result.Add(mapScrapeResult); } finally { Monitor.Exit(result); } if (this.terminated) { result2 = false; return(result2); } if (base.Callback != null) { base.Callback.Process(mapScrapeResult); } } } } return(text.IndexOf("</div>Next</a>") >= 0); return(result2); }
protected bool ProcessCanada(string text, List <IScrapeResult> result, SearchCategory Location, SearchCategory Category) { RegexOptions options = RegexOptions.IgnoreCase | RegexOptions.Compiled | RegexOptions.Singleline; Regex regex = new Regex("<div class=\"listingDetail\".*?>.*?<h3 class=\"listingTitleLine\">.*?href=\"(?<href>.*?)\".*?</h3>.*?<h4 class=\"phoneLink\">(?<phone>.*?)</h4>", options); MatchCollection matchCollection = regex.Matches(text); bool result2; foreach (Match match in matchCollection) { if (this.terminated) { result2 = false; return(result2); } MapScrapeResult mapScrapeResult = this.ParseDetailsCanada(string.Format("http://www.yellowpages.ca{0}", match.Groups["href"].Value)); mapScrapeResult.Phone = UrlDownloader.SkipHtmlTags(match.Groups["phone"].Value); mapScrapeResult.Category = Category.Name; if (this.CanAddRes(result, mapScrapeResult)) { if (this.terminated) { result2 = false; return(result2); } if (!string.IsNullOrEmpty(mapScrapeResult.Website)) { string website; base.WalkWebsite(mapScrapeResult.Website, mapScrapeResult, out website); mapScrapeResult.Website = website; } if (this.CanAddRes(result, mapScrapeResult)) { Monitor.Enter(result); try { result.Add(mapScrapeResult); } finally { Monitor.Exit(result); } if (this.terminated) { result2 = false; return(result2); } if (base.Callback != null) { base.Callback.Process(mapScrapeResult); } } } } return(text.IndexOf(">Next</a>") >= 0); return(result2); }
public virtual void Search() { if (this.Callback != null) { this.Callback.Init(this.Locations.Count * this.Categories.Count); } this.terminated = false; string text = UrlDownloader.UrlEncode(this.Keyword); if (this.Categories.Count <= 0) { if (!this.allowsCategorylessSearch) { if (this.downloader != null && this.downloader.errorCallback != null) { this.downloader.errorCallback.Log("No categories selected."); } return; } if (string.IsNullOrEmpty(text)) { if (this.downloader != null && this.downloader.errorCallback != null) { this.downloader.errorCallback.Log("No keyword and categories selected."); } return; } } if (this.Locations.Count <= 0) { if (this.downloader != null && this.downloader.errorCallback != null) { this.downloader.errorCallback.Log("No locations selected."); } return; } AbstractScraper.PageProcessDelegate pageProcessDelegate = string.IsNullOrEmpty(text) ? new AbstractScraper.PageProcessDelegate(this.ProcessSimpleRequest) : new AbstractScraper.PageProcessDelegate(this.ProcessKeywordRequest); Queue <KeyValuePair <Thread, object> > queue = new Queue <KeyValuePair <Thread, object> >(); if (this.Categories.Count > 0) { using (List <SearchCategory> .Enumerator enumerator = this.Locations.GetEnumerator()) { while (enumerator.MoveNext()) { SearchCategory current = enumerator.Current; foreach (SearchCategory current2 in this.Categories) { object[] value = new object[] { this.campaign.Leads, current, current2, text, pageProcessDelegate }; queue.Enqueue(new KeyValuePair <Thread, object>(new Thread(new ParameterizedThreadStart(this.ProcessSearch)) { Name = string.Format("{0}|{1}", current.Name, current2.Name) }, value)); } } goto IL_2C0; } } foreach (SearchCategory current3 in this.Locations) { SearchCategory searchCategory = new SearchCategory(); searchCategory.Name = text; searchCategory.Url = text; object[] value2 = new object[] { this.campaign.Leads, current3, searchCategory, null, pageProcessDelegate }; queue.Enqueue(new KeyValuePair <Thread, object>(new Thread(new ParameterizedThreadStart(this.ProcessSearch)) { Name = string.Format("{0}|{1}", current3.Name, searchCategory.Name) }, value2)); } IL_2C0: List <Thread> list = new List <Thread>(this.maxThreadCount); Thread.CurrentThread.Priority = ThreadPriority.Lowest; int i = 0; while (queue.Count > 0 || list.Count > 0) { if (this.terminated) { break; } while (list.Count < this.maxThreadCount && queue.Count > 0) { KeyValuePair <Thread, object> keyValuePair = queue.Dequeue(); list.Add(keyValuePair.Key); keyValuePair.Key.Start(keyValuePair.Value); } if (this.terminated) { break; } i = 0; while (i < list.Count) { if (list[i].ThreadState == ThreadState.Stopped) { list.RemoveAt(i); if (this.Callback != null) { this.Callback.ShowProgress(); } } else { i++; } } } if (this.Callback != null) { this.Callback.Finish(); } if (this.downloader != null && this.downloader.errorCallback != null) { if (this.terminated) { this.downloader.errorCallback.Log("Search has been stopped."); return; } this.downloader.errorCallback.Log("Search finished."); } }
protected abstract string GetUrl(SearchCategory scLocation, SearchCategory scCategory, string keyword, int pageNum);
protected abstract bool ProcessKeywordRequest(string text, List <IScrapeResult> result, SearchCategory Location, SearchCategory Category);
protected override bool ProcessSimpleRequest(string text, List <IScrapeResult> result, SearchCategory Location, SearchCategory Category) { RegexOptions options = RegexOptions.IgnoreCase | RegexOptions.Compiled | RegexOptions.Singleline; Regex regex = new Regex("<h4.*?class=\"ban\".*?>(?<date>.*?)</h4>", options); DateTime t = base.LowDate; MatchCollection matchCollection = regex.Matches(text); bool result2; foreach (Match match in matchCollection) { t = this.ParseDate(match.Groups["date"].Value); if (t < base.LowDate) { result2 = false; return(result2); } if (!(t > base.HighDate)) { regex = new Regex("<p.*?href=\"(?<href>.*?)\".*?</p>", options); Match match2 = match.NextMatch(); string input; if (match2.Success) { input = text.Substring(match.Index, match2.Index - match.Index + 1); } else { input = text.Substring(match.Index); } MatchCollection matchCollection2 = regex.Matches(input); foreach (Match match3 in matchCollection2) { SimpleScrapeResult simpleScrapeResult = this.ParseDetails(match3.Groups["href"].Value); simpleScrapeResult.Location = Location.Name; simpleScrapeResult.Category = Category.Name; simpleScrapeResult.Emails.Remove(simpleScrapeResult.Email); if (this.terminated) { result2 = false; return(result2); } if (this.CanAddRes(result, simpleScrapeResult)) { Monitor.Enter(result); try { result.Add(simpleScrapeResult); } finally { Monitor.Exit(result); } if (base.Callback != null) { base.Callback.Process(simpleScrapeResult); } } } } } return(text.IndexOf("next 100 postings</a>") >= 0); return(result2); }
protected bool ProcessCanada(string text, List <IScrapeResult> result, SearchCategory Location, SearchCategory Category) { RegexOptions options = RegexOptions.IgnoreCase | RegexOptions.Compiled | RegexOptions.Singleline; Regex regex = new Regex("<li.*?class=\"vcard\".*?>.*?<a.*?class=\"ttl\".*?href=\"(?<href>.*?)\".*?</li>", options); MatchCollection matchCollection = regex.Matches(text); bool result2; foreach (Match match in matchCollection) { if (this.terminated) { result2 = false; return(result2); } MapScrapeResult mapScrapeResult = this.ParseDetailsCanada(match.Groups["href"].Value); mapScrapeResult.Radius = base.ExtractValue(match.Value, "<span.*?class=\"mlg\">(?<value>.*?)</span>"); mapScrapeResult.Category = Category.Name; if (this.CanAddRes(result, mapScrapeResult)) { if (this.terminated) { result2 = false; return(result2); } if (!string.IsNullOrEmpty(mapScrapeResult.Website)) { string website; base.WalkWebsite(mapScrapeResult.Website, mapScrapeResult, out website); mapScrapeResult.Website = website; } if (this.CanAddRes(result, mapScrapeResult)) { Monitor.Enter(result); try { result.Add(mapScrapeResult); } finally { Monitor.Exit(result); } if (this.terminated) { result2 = false; return(result2); } if (base.Callback != null) { base.Callback.Process(mapScrapeResult); } } } } return(text.IndexOf("<span>Next</span></a></b>") >= 0); return(result2); }
protected override string GetUrl(SearchCategory scLocation, SearchCategory scCategory, string keyword, int pageNum) { string url = scLocation.Url; string url2 = scCategory.Url; bool flag = url.IndexOf('.') > 0; string text = string.Empty; if (string.IsNullOrEmpty(keyword)) { if (flag) { if (pageNum != 0) { text = string.Format("http://{0}/{1}index{32}00.html", url, url2, pageNum); } else { text = string.Format("http://{0}/{1}", url, url2); } } else { if (pageNum != 0) { text = string.Format("http://{0}.en.craigslist.org/{1}index{2}00.html", url, url2, pageNum); } else { text = string.Format("http://{0}.en.craigslist.org/{1}", url, url2); } } } else { if (flag) { if (pageNum != 0) { text = string.Format("http://{0}/search/{1}?query={2}&s={3}00", new object[] { url, url2, keyword, pageNum }); } else { text = string.Format("http://{0}/search/{1}?query={2}", url, url2, keyword); } } else { if (pageNum != 0) { text = string.Format("http://{0}.en.craigslist.org/search/{1}?query={2}&s={3}00", new object[] { url, url2, keyword, pageNum }); } else { text = string.Format("http://{0}.en.craigslist.org/search/{1}?query={2}", url, url2, keyword); } } text = text.Replace("/?", "?"); } return(text); }
protected override bool ProcessSimpleRequest(string text, List <IScrapeResult> result, SearchCategory Location, SearchCategory Category) { if (text.IndexOf("0 results found in") >= 0) { return(false); } RegexOptions options = RegexOptions.IgnoreCase | RegexOptions.Compiled | RegexOptions.Singleline; Regex regex = new Regex("<input [^>]*?name=\"galleryoff\".*?/>"); Match match = regex.Match(text); if (match.Success && match.Value.IndexOf("checked") >= 0) { return(this.ProcessKeywordRequest(text, result, Location, Category)); } regex = new Regex("<h3 class=\"section-title\">(?<date>.*?)</h3>", options); DateTime t = base.LowDate; bool result2; foreach (Match match2 in regex.Matches(text)) { t = this.ParseDate(match2.Groups["date"].Value); if (t < base.LowDate) { result2 = false; return(result2); } if (!(t > base.HighDate)) { regex = new Regex("<span style=\"vertical-align:middle\">.*?<a href=\"(?<href>.*?)\".*?</a>", options); Match match3 = match2.NextMatch(); string input; if (match3.Success) { input = text.Substring(match2.Index, match3.Index - match2.Index + 1); } else { input = text.Substring(match2.Index); } foreach (Match match4 in regex.Matches(input)) { SimpleScrapeResult simpleScrapeResult = this.ParseDetails(match4.Groups["href"].Value); simpleScrapeResult.Location = Location.Name; simpleScrapeResult.Category = Category.Name; simpleScrapeResult.Emails.Remove(simpleScrapeResult.Email); if (this.terminated) { result2 = false; return(result2); } if (this.CanAddRes(result, simpleScrapeResult)) { Monitor.Enter(result); try { result.Add(simpleScrapeResult); } finally { Monitor.Exit(result); } if (base.Callback != null) { base.Callback.Process(simpleScrapeResult); } } } } } return(text.IndexOf("<span class=\"pagination-label\">Next</span>") >= 0); return(result2); }