コード例 #1
0
        public SimpleScrapeResult ParseDetails(string url)
        {
            SimpleScrapeResult simpleScrapeResult = new SimpleScrapeResult();

            simpleScrapeResult.AdUrl = url;
            string text;

            if (this.downloader.DownloadUrl(url, out text))
            {
                simpleScrapeResult.Headline    = base.ExtractValue(text, "<h1>(?<value>.*?)</h1>");
                simpleScrapeResult.Email       = base.ExtractValue(text, "b>Reply</b>:&nbsp;<a href=\"mailto:(?<value>.*?)\\?subject=").ToLower().Trim();
                simpleScrapeResult.DatePosted  = base.ExtractValue(text, ">posted: (?<value>.*?)</div>");
                simpleScrapeResult.Description = base.ExtractValue(text, "<div class=\"adBody\">(?<value>.*?)</div>").Trim();
                List <string> list = base.ExtractEmails(text);
                foreach (string current in list)
                {
                    string text2 = current.ToLower().Trim();
                    if (!simpleScrapeResult.Emails.Contains(text2) && simpleScrapeResult.Email != text2)
                    {
                        simpleScrapeResult.Emails.Add(text2);
                    }
                }
                simpleScrapeResult.PhonesInBody.AddRange(base.ExtractPhones(text));
            }
            return(simpleScrapeResult);
        }
コード例 #2
0
        public SimpleScrapeResult ParseDetails(string url)
        {
            SimpleScrapeResult simpleScrapeResult = new SimpleScrapeResult();

            simpleScrapeResult.AdUrl = url;
            string text;

            if (this.downloader.DownloadUrl(url, out text))
            {
                simpleScrapeResult.Headline    = base.ExtractValue(text, "<h2 class=\"summary\">(?<value>.*?)</h2>");
                simpleScrapeResult.DatePosted  = base.ExtractValue(text, "<li>.*?<h3>Posted</h3>(?<value>.*?)</li>");
                simpleScrapeResult.Description = base.ExtractValue(text, "<div id=\"description\">(?<value>.*?)</div>").Trim();
                List <string> list = base.ExtractEmails(text);
                foreach (string current in list)
                {
                    string text2 = current.ToLower().Trim();
                    if (!simpleScrapeResult.Emails.Contains(text2) && simpleScrapeResult.Email != text2)
                    {
                        simpleScrapeResult.Emails.Add(text2);
                    }
                }
                string phone = base.ExtractValue(text, "<span.*?class=\"telephone\".*?>(?<value>.*?)</span>").Trim();
                simpleScrapeResult.PhonesInBody.AddRange(base.ExtractPhones(simpleScrapeResult.Description));
                simpleScrapeResult.PhonesInBody.RemoveAll((string s) => string.IsNullOrEmpty(s));
                if (!string.IsNullOrEmpty(phone))
                {
                    simpleScrapeResult.PhonesInBody.RemoveAll((string s) => phone.Contains(s));
                    simpleScrapeResult.PhonesInBody.Insert(0, phone);
                }
            }
            return(simpleScrapeResult);
        }
コード例 #3
0
        public SimpleScrapeResult ParseDetails(string url)
        {
            SimpleScrapeResult simpleScrapeResult = new SimpleScrapeResult();

            simpleScrapeResult.AdUrl = url;
            string text;

            if (this.downloader.DownloadUrl(url, out text))
            {
                text = base.ExtractValue(text, "<body>(?<value>.*?)</body>");
                simpleScrapeResult.Headline    = base.ExtractValue(text, "<h1 id=\"ad-title\"><span>(?<value>.*?)</span>");
                simpleScrapeResult.DatePosted  = base.ExtractValue(text, "<span class=\"listlabel\">Date Posted:</span><span class=\"listvalue\">(?<date>.*?)</span></li>");
                simpleScrapeResult.Description = base.ExtractValue(text, "<span style=\"display:block;\".*?</span>(?<value>.*?)<div id=\"ad-details-stats\">").Trim();
                List <string> list = base.ExtractEmails(text);
                foreach (string current in list)
                {
                    string text2 = current.ToLower().Trim();
                    if (!simpleScrapeResult.Emails.Contains(text2) && simpleScrapeResult.Email != text2)
                    {
                        simpleScrapeResult.Emails.Add(text2);
                    }
                }
                simpleScrapeResult.PhonesInBody.AddRange(base.ExtractPhones(text));
            }
            return(simpleScrapeResult);
        }
コード例 #4
0
        public SimpleScrapeResult ParseDetails(string url)
        {
            SimpleScrapeResult simpleScrapeResult = new SimpleScrapeResult();

            simpleScrapeResult.AdUrl = url;
            string text;

            if (this.downloader.DownloadUrl(url, out text))
            {
                simpleScrapeResult.Headline    = base.ExtractValue(text, "<h2>(?<value>.*?)</h2>");
                simpleScrapeResult.Email       = base.ExtractValue(text, "<a href=\"mailto:(?<value>.*?)\\?subject=").ToLower().Trim();
                simpleScrapeResult.DatePosted  = base.ExtractValue(text, "Date: (?<value>.*?)<br>");
                simpleScrapeResult.Description = base.ExtractValue(text, "<div id=\"userbody\">(?<value>.*?)(?:(<table summary=\"craigslist hosted images\">)|(?:</div>))").Trim();
                List <string> list = base.ExtractEmails(text);
                foreach (string current in list)
                {
                    string text2 = current.ToLower().Trim();
                    if (!simpleScrapeResult.Emails.Contains(text2) && simpleScrapeResult.Email != text2)
                    {
                        simpleScrapeResult.Emails.Add(text2);
                    }
                }
                simpleScrapeResult.PhonesInBody.AddRange(base.ExtractPhones(text));
                Stack <string> stack = new Stack <string>();
                using (List <string> .Enumerator enumerator2 = simpleScrapeResult.PhonesInBody.GetEnumerator())
                {
                    while (enumerator2.MoveNext())
                    {
                        string current2 = enumerator2.Current;
                        if (simpleScrapeResult.Email.Contains(current2) || url.Contains(current2))
                        {
                            stack.Push(current2);
                        }
                        else
                        {
                            foreach (string current3 in simpleScrapeResult.Emails)
                            {
                                if (current3.Contains(current2))
                                {
                                    stack.Push(current2);
                                    break;
                                }
                            }
                        }
                    }
                    goto IL_1AA;
                }
IL_197:
                simpleScrapeResult.PhonesInBody.Remove(stack.Pop());
IL_1AA:
                if (stack.Count > 0)
                {
                    goto IL_197;
                }
            }
            return(simpleScrapeResult);
        }
コード例 #5
0
        protected override bool ProcessKeywordRequest(string text, List <IScrapeResult> result, SearchCategory Location, SearchCategory Category)
        {
            RegexOptions    options         = RegexOptions.IgnoreCase | RegexOptions.Compiled | RegexOptions.Singleline;
            Regex           regex           = new Regex("<p class=\"row\">.*?<span.*?>.*?</span>(?<date>.*?) - <a.*?href=\"(?<href>.*?)\".*?>.*?</p>", options);
            DateTime        t               = base.LowDate;
            MatchCollection matchCollection = regex.Matches(text);

            if (matchCollection.Count <= 0)
            {
                regex           = new Regex("<p.*?>.*?(?<date>.*?) - <a.*?href=\"(?<href>.*?)\".*?>.*?</p>", options);
                matchCollection = regex.Matches(text);
            }
            bool result2;

            foreach (Match match in matchCollection)
            {
                t = this.ParseDate(match.Groups["date"].Value.Trim());
                if (t < base.LowDate)
                {
                    result2 = false;
                    return(result2);
                }
                if (!(t > base.HighDate))
                {
                    SimpleScrapeResult simpleScrapeResult = this.ParseDetails(match.Groups["href"].Value);
                    simpleScrapeResult.Location = Location.Name;
                    simpleScrapeResult.Category = Category.Name;
                    simpleScrapeResult.Emails.Remove(simpleScrapeResult.Email);
                    if (this.terminated)
                    {
                        result2 = false;
                        return(result2);
                    }
                    if (this.CanAddRes(result, simpleScrapeResult))
                    {
                        Monitor.Enter(result);
                        try
                        {
                            result.Add(simpleScrapeResult);
                        }
                        finally
                        {
                            Monitor.Exit(result);
                        }
                        if (base.Callback != null)
                        {
                            base.Callback.Process(simpleScrapeResult);
                        }
                    }
                }
            }
            return(text.IndexOf("<b>Next &gt;&gt;</b>") >= 0);

            return(result2);
        }
コード例 #6
0
        protected override bool ProcessSimpleRequest(string text, List <IScrapeResult> result, SearchCategory Location, SearchCategory Category)
        {
            RegexOptions    options         = RegexOptions.IgnoreCase | RegexOptions.Compiled | RegexOptions.Singleline;
            Regex           regex           = new Regex("<h4.*?class=\"ban\".*?>(?<date>.*?)</h4>", options);
            DateTime        t               = base.LowDate;
            MatchCollection matchCollection = regex.Matches(text);
            bool            result2;

            foreach (Match match in matchCollection)
            {
                t = this.ParseDate(match.Groups["date"].Value);
                if (t < base.LowDate)
                {
                    result2 = false;
                    return(result2);
                }
                if (!(t > base.HighDate))
                {
                    regex = new Regex("<p.*?href=\"(?<href>.*?)\".*?</p>", options);
                    Match  match2 = match.NextMatch();
                    string input;
                    if (match2.Success)
                    {
                        input = text.Substring(match.Index, match2.Index - match.Index + 1);
                    }
                    else
                    {
                        input = text.Substring(match.Index);
                    }
                    MatchCollection matchCollection2 = regex.Matches(input);
                    foreach (Match match3 in matchCollection2)
                    {
                        SimpleScrapeResult simpleScrapeResult = this.ParseDetails(match3.Groups["href"].Value);
                        simpleScrapeResult.Location = Location.Name;
                        simpleScrapeResult.Category = Category.Name;
                        simpleScrapeResult.Emails.Remove(simpleScrapeResult.Email);
                        if (this.terminated)
                        {
                            result2 = false;
                            return(result2);
                        }
                        if (this.CanAddRes(result, simpleScrapeResult))
                        {
                            Monitor.Enter(result);
                            try
                            {
                                result.Add(simpleScrapeResult);
                            }
                            finally
                            {
                                Monitor.Exit(result);
                            }
                            if (base.Callback != null)
                            {
                                base.Callback.Process(simpleScrapeResult);
                            }
                        }
                    }
                }
            }
            return(text.IndexOf("next 100 postings</a>") >= 0);

            return(result2);
        }
コード例 #7
0
        protected override bool ProcessKeywordRequest(string text, List <IScrapeResult> result, SearchCategory Location, SearchCategory Category)
        {
            if (text.IndexOf("0 results found in") >= 0 || text.IndexOf("No Results") >= 0)
            {
                return(false);
            }
            RegexOptions options = RegexOptions.IgnoreCase | RegexOptions.Compiled | RegexOptions.Singleline;
            Regex        regex   = new Regex("<input [^>]*?name=\"galleryoff\".*?/>");
            Match        match   = regex.Match(text);

            if (match.Success && match.Value.IndexOf("checked") >= 0)
            {
                regex = new Regex("<div class=\"gallery-ad-title\"><a href=\"(?<href>.*?)\".*?<div class=\"post-date\">Posted: (?<date>.*?)</div>", options);
            }
            else
            {
                regex = new Regex("<li>.*?href=\"(?<href>.*?)\".*?<em>.*?</em>.*?- (?<date>.*?)</li>", options);
            }
            DateTime t = base.LowDate;
            bool     result2;

            foreach (Match match2 in regex.Matches(text))
            {
                t = this.ParseDate(match2.Groups["date"].Value);
                if (t < base.LowDate)
                {
                    result2 = false;
                    return(result2);
                }
                if (!(t > base.HighDate))
                {
                    SimpleScrapeResult simpleScrapeResult = this.ParseDetails(match2.Groups["href"].Value);
                    simpleScrapeResult.Location = Location.Name;
                    simpleScrapeResult.Category = Category.Name;
                    if (string.IsNullOrEmpty(simpleScrapeResult.Email))
                    {
                        simpleScrapeResult.Email = simpleScrapeResult.Emails.FirstOrDefault <string>();
                    }
                    simpleScrapeResult.Emails.Remove(simpleScrapeResult.Email);
                    if (this.terminated)
                    {
                        result2 = false;
                        return(result2);
                    }
                    if (this.CanAddRes(result, simpleScrapeResult))
                    {
                        Monitor.Enter(result);
                        try
                        {
                            result.Add(simpleScrapeResult);
                        }
                        finally
                        {
                            Monitor.Exit(result);
                        }
                        if (base.Callback != null)
                        {
                            base.Callback.Process(simpleScrapeResult);
                        }
                    }
                }
            }
            return(text.IndexOf("<span class=\"pagination-label\">Next</span>") >= 0);

            return(result2);
        }
コード例 #8
0
        protected override bool ProcessSimpleRequest(string text, List <IScrapeResult> result, SearchCategory Location, SearchCategory Category)
        {
            if (text.IndexOf("0 results found in") >= 0)
            {
                return(false);
            }
            RegexOptions options = RegexOptions.IgnoreCase | RegexOptions.Compiled | RegexOptions.Singleline;
            Regex        regex   = new Regex("<input [^>]*?name=\"galleryoff\".*?/>");
            Match        match   = regex.Match(text);

            if (match.Success && match.Value.IndexOf("checked") >= 0)
            {
                return(this.ProcessKeywordRequest(text, result, Location, Category));
            }
            regex = new Regex("<h3 class=\"section-title\">(?<date>.*?)</h3>", options);
            DateTime t = base.LowDate;
            bool     result2;

            foreach (Match match2 in regex.Matches(text))
            {
                t = this.ParseDate(match2.Groups["date"].Value);
                if (t < base.LowDate)
                {
                    result2 = false;
                    return(result2);
                }
                if (!(t > base.HighDate))
                {
                    regex = new Regex("<span style=\"vertical-align:middle\">.*?<a href=\"(?<href>.*?)\".*?</a>", options);
                    Match  match3 = match2.NextMatch();
                    string input;
                    if (match3.Success)
                    {
                        input = text.Substring(match2.Index, match3.Index - match2.Index + 1);
                    }
                    else
                    {
                        input = text.Substring(match2.Index);
                    }
                    foreach (Match match4 in regex.Matches(input))
                    {
                        SimpleScrapeResult simpleScrapeResult = this.ParseDetails(match4.Groups["href"].Value);
                        simpleScrapeResult.Location = Location.Name;
                        simpleScrapeResult.Category = Category.Name;
                        simpleScrapeResult.Emails.Remove(simpleScrapeResult.Email);
                        if (this.terminated)
                        {
                            result2 = false;
                            return(result2);
                        }
                        if (this.CanAddRes(result, simpleScrapeResult))
                        {
                            Monitor.Enter(result);
                            try
                            {
                                result.Add(simpleScrapeResult);
                            }
                            finally
                            {
                                Monitor.Exit(result);
                            }
                            if (base.Callback != null)
                            {
                                base.Callback.Process(simpleScrapeResult);
                            }
                        }
                    }
                }
            }
            return(text.IndexOf("<span class=\"pagination-label\">Next</span>") >= 0);

            return(result2);
        }