public SimpleScrapeResult ParseDetails(string url) { SimpleScrapeResult simpleScrapeResult = new SimpleScrapeResult(); simpleScrapeResult.AdUrl = url; string text; if (this.downloader.DownloadUrl(url, out text)) { simpleScrapeResult.Headline = base.ExtractValue(text, "<h1>(?<value>.*?)</h1>"); simpleScrapeResult.Email = base.ExtractValue(text, "b>Reply</b>: <a href=\"mailto:(?<value>.*?)\\?subject=").ToLower().Trim(); simpleScrapeResult.DatePosted = base.ExtractValue(text, ">posted: (?<value>.*?)</div>"); simpleScrapeResult.Description = base.ExtractValue(text, "<div class=\"adBody\">(?<value>.*?)</div>").Trim(); List <string> list = base.ExtractEmails(text); foreach (string current in list) { string text2 = current.ToLower().Trim(); if (!simpleScrapeResult.Emails.Contains(text2) && simpleScrapeResult.Email != text2) { simpleScrapeResult.Emails.Add(text2); } } simpleScrapeResult.PhonesInBody.AddRange(base.ExtractPhones(text)); } return(simpleScrapeResult); }
public SimpleScrapeResult ParseDetails(string url) { SimpleScrapeResult simpleScrapeResult = new SimpleScrapeResult(); simpleScrapeResult.AdUrl = url; string text; if (this.downloader.DownloadUrl(url, out text)) { simpleScrapeResult.Headline = base.ExtractValue(text, "<h2 class=\"summary\">(?<value>.*?)</h2>"); simpleScrapeResult.DatePosted = base.ExtractValue(text, "<li>.*?<h3>Posted</h3>(?<value>.*?)</li>"); simpleScrapeResult.Description = base.ExtractValue(text, "<div id=\"description\">(?<value>.*?)</div>").Trim(); List <string> list = base.ExtractEmails(text); foreach (string current in list) { string text2 = current.ToLower().Trim(); if (!simpleScrapeResult.Emails.Contains(text2) && simpleScrapeResult.Email != text2) { simpleScrapeResult.Emails.Add(text2); } } string phone = base.ExtractValue(text, "<span.*?class=\"telephone\".*?>(?<value>.*?)</span>").Trim(); simpleScrapeResult.PhonesInBody.AddRange(base.ExtractPhones(simpleScrapeResult.Description)); simpleScrapeResult.PhonesInBody.RemoveAll((string s) => string.IsNullOrEmpty(s)); if (!string.IsNullOrEmpty(phone)) { simpleScrapeResult.PhonesInBody.RemoveAll((string s) => phone.Contains(s)); simpleScrapeResult.PhonesInBody.Insert(0, phone); } } return(simpleScrapeResult); }
public SimpleScrapeResult ParseDetails(string url) { SimpleScrapeResult simpleScrapeResult = new SimpleScrapeResult(); simpleScrapeResult.AdUrl = url; string text; if (this.downloader.DownloadUrl(url, out text)) { text = base.ExtractValue(text, "<body>(?<value>.*?)</body>"); simpleScrapeResult.Headline = base.ExtractValue(text, "<h1 id=\"ad-title\"><span>(?<value>.*?)</span>"); simpleScrapeResult.DatePosted = base.ExtractValue(text, "<span class=\"listlabel\">Date Posted:</span><span class=\"listvalue\">(?<date>.*?)</span></li>"); simpleScrapeResult.Description = base.ExtractValue(text, "<span style=\"display:block;\".*?</span>(?<value>.*?)<div id=\"ad-details-stats\">").Trim(); List <string> list = base.ExtractEmails(text); foreach (string current in list) { string text2 = current.ToLower().Trim(); if (!simpleScrapeResult.Emails.Contains(text2) && simpleScrapeResult.Email != text2) { simpleScrapeResult.Emails.Add(text2); } } simpleScrapeResult.PhonesInBody.AddRange(base.ExtractPhones(text)); } return(simpleScrapeResult); }
public SimpleScrapeResult ParseDetails(string url) { SimpleScrapeResult simpleScrapeResult = new SimpleScrapeResult(); simpleScrapeResult.AdUrl = url; string text; if (this.downloader.DownloadUrl(url, out text)) { simpleScrapeResult.Headline = base.ExtractValue(text, "<h2>(?<value>.*?)</h2>"); simpleScrapeResult.Email = base.ExtractValue(text, "<a href=\"mailto:(?<value>.*?)\\?subject=").ToLower().Trim(); simpleScrapeResult.DatePosted = base.ExtractValue(text, "Date: (?<value>.*?)<br>"); simpleScrapeResult.Description = base.ExtractValue(text, "<div id=\"userbody\">(?<value>.*?)(?:(<table summary=\"craigslist hosted images\">)|(?:</div>))").Trim(); List <string> list = base.ExtractEmails(text); foreach (string current in list) { string text2 = current.ToLower().Trim(); if (!simpleScrapeResult.Emails.Contains(text2) && simpleScrapeResult.Email != text2) { simpleScrapeResult.Emails.Add(text2); } } simpleScrapeResult.PhonesInBody.AddRange(base.ExtractPhones(text)); Stack <string> stack = new Stack <string>(); using (List <string> .Enumerator enumerator2 = simpleScrapeResult.PhonesInBody.GetEnumerator()) { while (enumerator2.MoveNext()) { string current2 = enumerator2.Current; if (simpleScrapeResult.Email.Contains(current2) || url.Contains(current2)) { stack.Push(current2); } else { foreach (string current3 in simpleScrapeResult.Emails) { if (current3.Contains(current2)) { stack.Push(current2); break; } } } } goto IL_1AA; } IL_197: simpleScrapeResult.PhonesInBody.Remove(stack.Pop()); IL_1AA: if (stack.Count > 0) { goto IL_197; } } return(simpleScrapeResult); }
protected override bool ProcessKeywordRequest(string text, List <IScrapeResult> result, SearchCategory Location, SearchCategory Category) { RegexOptions options = RegexOptions.IgnoreCase | RegexOptions.Compiled | RegexOptions.Singleline; Regex regex = new Regex("<p class=\"row\">.*?<span.*?>.*?</span>(?<date>.*?) - <a.*?href=\"(?<href>.*?)\".*?>.*?</p>", options); DateTime t = base.LowDate; MatchCollection matchCollection = regex.Matches(text); if (matchCollection.Count <= 0) { regex = new Regex("<p.*?>.*?(?<date>.*?) - <a.*?href=\"(?<href>.*?)\".*?>.*?</p>", options); matchCollection = regex.Matches(text); } bool result2; foreach (Match match in matchCollection) { t = this.ParseDate(match.Groups["date"].Value.Trim()); if (t < base.LowDate) { result2 = false; return(result2); } if (!(t > base.HighDate)) { SimpleScrapeResult simpleScrapeResult = this.ParseDetails(match.Groups["href"].Value); simpleScrapeResult.Location = Location.Name; simpleScrapeResult.Category = Category.Name; simpleScrapeResult.Emails.Remove(simpleScrapeResult.Email); if (this.terminated) { result2 = false; return(result2); } if (this.CanAddRes(result, simpleScrapeResult)) { Monitor.Enter(result); try { result.Add(simpleScrapeResult); } finally { Monitor.Exit(result); } if (base.Callback != null) { base.Callback.Process(simpleScrapeResult); } } } } return(text.IndexOf("<b>Next >></b>") >= 0); return(result2); }
protected override bool ProcessSimpleRequest(string text, List <IScrapeResult> result, SearchCategory Location, SearchCategory Category) { RegexOptions options = RegexOptions.IgnoreCase | RegexOptions.Compiled | RegexOptions.Singleline; Regex regex = new Regex("<h4.*?class=\"ban\".*?>(?<date>.*?)</h4>", options); DateTime t = base.LowDate; MatchCollection matchCollection = regex.Matches(text); bool result2; foreach (Match match in matchCollection) { t = this.ParseDate(match.Groups["date"].Value); if (t < base.LowDate) { result2 = false; return(result2); } if (!(t > base.HighDate)) { regex = new Regex("<p.*?href=\"(?<href>.*?)\".*?</p>", options); Match match2 = match.NextMatch(); string input; if (match2.Success) { input = text.Substring(match.Index, match2.Index - match.Index + 1); } else { input = text.Substring(match.Index); } MatchCollection matchCollection2 = regex.Matches(input); foreach (Match match3 in matchCollection2) { SimpleScrapeResult simpleScrapeResult = this.ParseDetails(match3.Groups["href"].Value); simpleScrapeResult.Location = Location.Name; simpleScrapeResult.Category = Category.Name; simpleScrapeResult.Emails.Remove(simpleScrapeResult.Email); if (this.terminated) { result2 = false; return(result2); } if (this.CanAddRes(result, simpleScrapeResult)) { Monitor.Enter(result); try { result.Add(simpleScrapeResult); } finally { Monitor.Exit(result); } if (base.Callback != null) { base.Callback.Process(simpleScrapeResult); } } } } } return(text.IndexOf("next 100 postings</a>") >= 0); return(result2); }
protected override bool ProcessKeywordRequest(string text, List <IScrapeResult> result, SearchCategory Location, SearchCategory Category) { if (text.IndexOf("0 results found in") >= 0 || text.IndexOf("No Results") >= 0) { return(false); } RegexOptions options = RegexOptions.IgnoreCase | RegexOptions.Compiled | RegexOptions.Singleline; Regex regex = new Regex("<input [^>]*?name=\"galleryoff\".*?/>"); Match match = regex.Match(text); if (match.Success && match.Value.IndexOf("checked") >= 0) { regex = new Regex("<div class=\"gallery-ad-title\"><a href=\"(?<href>.*?)\".*?<div class=\"post-date\">Posted: (?<date>.*?)</div>", options); } else { regex = new Regex("<li>.*?href=\"(?<href>.*?)\".*?<em>.*?</em>.*?- (?<date>.*?)</li>", options); } DateTime t = base.LowDate; bool result2; foreach (Match match2 in regex.Matches(text)) { t = this.ParseDate(match2.Groups["date"].Value); if (t < base.LowDate) { result2 = false; return(result2); } if (!(t > base.HighDate)) { SimpleScrapeResult simpleScrapeResult = this.ParseDetails(match2.Groups["href"].Value); simpleScrapeResult.Location = Location.Name; simpleScrapeResult.Category = Category.Name; if (string.IsNullOrEmpty(simpleScrapeResult.Email)) { simpleScrapeResult.Email = simpleScrapeResult.Emails.FirstOrDefault <string>(); } simpleScrapeResult.Emails.Remove(simpleScrapeResult.Email); if (this.terminated) { result2 = false; return(result2); } if (this.CanAddRes(result, simpleScrapeResult)) { Monitor.Enter(result); try { result.Add(simpleScrapeResult); } finally { Monitor.Exit(result); } if (base.Callback != null) { base.Callback.Process(simpleScrapeResult); } } } } return(text.IndexOf("<span class=\"pagination-label\">Next</span>") >= 0); return(result2); }
protected override bool ProcessSimpleRequest(string text, List <IScrapeResult> result, SearchCategory Location, SearchCategory Category) { if (text.IndexOf("0 results found in") >= 0) { return(false); } RegexOptions options = RegexOptions.IgnoreCase | RegexOptions.Compiled | RegexOptions.Singleline; Regex regex = new Regex("<input [^>]*?name=\"galleryoff\".*?/>"); Match match = regex.Match(text); if (match.Success && match.Value.IndexOf("checked") >= 0) { return(this.ProcessKeywordRequest(text, result, Location, Category)); } regex = new Regex("<h3 class=\"section-title\">(?<date>.*?)</h3>", options); DateTime t = base.LowDate; bool result2; foreach (Match match2 in regex.Matches(text)) { t = this.ParseDate(match2.Groups["date"].Value); if (t < base.LowDate) { result2 = false; return(result2); } if (!(t > base.HighDate)) { regex = new Regex("<span style=\"vertical-align:middle\">.*?<a href=\"(?<href>.*?)\".*?</a>", options); Match match3 = match2.NextMatch(); string input; if (match3.Success) { input = text.Substring(match2.Index, match3.Index - match2.Index + 1); } else { input = text.Substring(match2.Index); } foreach (Match match4 in regex.Matches(input)) { SimpleScrapeResult simpleScrapeResult = this.ParseDetails(match4.Groups["href"].Value); simpleScrapeResult.Location = Location.Name; simpleScrapeResult.Category = Category.Name; simpleScrapeResult.Emails.Remove(simpleScrapeResult.Email); if (this.terminated) { result2 = false; return(result2); } if (this.CanAddRes(result, simpleScrapeResult)) { Monitor.Enter(result); try { result.Add(simpleScrapeResult); } finally { Monitor.Exit(result); } if (base.Callback != null) { base.Callback.Process(simpleScrapeResult); } } } } } return(text.IndexOf("<span class=\"pagination-label\">Next</span>") >= 0); return(result2); }