Example #1
0
        protected override bool ProcessKeywordRequest(string text, List <IScrapeResult> result, SearchCategory Location, SearchCategory Category)
        {
            RegexOptions    options         = RegexOptions.IgnoreCase | RegexOptions.Compiled | RegexOptions.Singleline;
            Regex           regex           = new Regex("<p class=\"row\">.*?<span.*?>.*?</span>(?<date>.*?) - <a.*?href=\"(?<href>.*?)\".*?>.*?</p>", options);
            DateTime        t               = base.LowDate;
            MatchCollection matchCollection = regex.Matches(text);

            if (matchCollection.Count <= 0)
            {
                regex           = new Regex("<p.*?>.*?(?<date>.*?) - <a.*?href=\"(?<href>.*?)\".*?>.*?</p>", options);
                matchCollection = regex.Matches(text);
            }
            bool result2;

            foreach (Match match in matchCollection)
            {
                t = this.ParseDate(match.Groups["date"].Value.Trim());
                if (t < base.LowDate)
                {
                    result2 = false;
                    return(result2);
                }
                if (!(t > base.HighDate))
                {
                    SimpleScrapeResult simpleScrapeResult = this.ParseDetails(match.Groups["href"].Value);
                    simpleScrapeResult.Location = Location.Name;
                    simpleScrapeResult.Category = Category.Name;
                    simpleScrapeResult.Emails.Remove(simpleScrapeResult.Email);
                    if (this.terminated)
                    {
                        result2 = false;
                        return(result2);
                    }
                    if (this.CanAddRes(result, simpleScrapeResult))
                    {
                        Monitor.Enter(result);
                        try
                        {
                            result.Add(simpleScrapeResult);
                        }
                        finally
                        {
                            Monitor.Exit(result);
                        }
                        if (base.Callback != null)
                        {
                            base.Callback.Process(simpleScrapeResult);
                        }
                    }
                }
            }
            return(text.IndexOf("<b>Next &gt;&gt;</b>") >= 0);

            return(result2);
        }
        protected bool ProcessUSA(string text, List <IScrapeResult> result, SearchCategory Location, SearchCategory Category)
        {
            RegexOptions    options         = RegexOptions.IgnoreCase | RegexOptions.Compiled | RegexOptions.Singleline;
            Regex           regex           = new Regex("<tr class=\"yls-rs-listinfo\">.*?href=\"(?<href>.*?)\".*?</tr>", options);
            Regex           regex2          = new Regex("<td class=\"distance\">(?<radius>.*?)</td>", options);
            MatchCollection matchCollection = regex.Matches(text);
            bool            result2;

            foreach (Match match in matchCollection)
            {
                if (this.terminated)
                {
                    result2 = false;
                    return(result2);
                }
                MapScrapeResult mapScrapeResult = this.ParseDetailsUSA(match.Groups["href"].Value);
                Match           match2          = regex2.Match(match.Value);
                if (match2.Success)
                {
                    mapScrapeResult.Radius = UrlDownloader.SkipHtmlTags(match2.Groups["radius"].Value).Trim();
                }
                mapScrapeResult.Category = Category.Name;
                if (this.CanAddRes(result, mapScrapeResult))
                {
                    if (this.terminated)
                    {
                        result2 = false;
                        return(result2);
                    }
                    if (!string.IsNullOrEmpty(mapScrapeResult.Website))
                    {
                        string website;
                        base.WalkWebsite(mapScrapeResult.Website, mapScrapeResult, out website);
                        mapScrapeResult.Website = website;
                    }
                    if (this.CanAddRes(result, mapScrapeResult))
                    {
                        Monitor.Enter(result);
                        try
                        {
                            result.Add(mapScrapeResult);
                        }
                        finally
                        {
                            Monitor.Exit(result);
                        }
                        if (this.terminated)
                        {
                            result2 = false;
                            return(result2);
                        }
                        if (base.Callback != null)
                        {
                            base.Callback.Process(mapScrapeResult);
                        }
                    }
                }
            }
            return(text.IndexOf("Next <span>&#187;") >= 0);

            return(result2);
        }
        protected override bool ProcessSimpleRequest(string text, List <IScrapeResult> result, SearchCategory Location, SearchCategory Category)
        {
            switch (this.country)
            {
            case AbstractScraper.Country.Canada:
            {
                return(this.ProcessCanada(text, result, Location, Category));
            }

            case AbstractScraper.Country.UK:
            {
                return(this.ProcessUK(text, result, Location, Category));
            }

            case AbstractScraper.Country.Australia:
            {
                return(this.ProcessAustralia(text, result, Location, Category));
            }

            default:
            {
                return(this.ProcessUSA(text, result, Location, Category));
            }
            }
        }
        protected bool ProcessAustralia(string text, List <IScrapeResult> result, SearchCategory Location, SearchCategory Category)
        {
            RegexOptions    options         = RegexOptions.IgnoreCase | RegexOptions.Compiled | RegexOptions.Singleline;
            Regex           regex           = new Regex("<a class=\"org\" href=\"/(?<href>.*?)\">", options);
            MatchCollection matchCollection = regex.Matches(text);
            bool            result2;

            foreach (Match match in matchCollection)
            {
                if (this.terminated)
                {
                    result2 = false;
                    return(result2);
                }
                MapScrapeResult mapScrapeResult = this.ParseDetailsAustralia(string.Format("{0}{1}", "http://local.yahoo.com.au/", match.Groups["href"].Value));
                mapScrapeResult.Category = Category.Name;
                mapScrapeResult.Radius   = 0.ToString();
                if (this.CanAddRes(result, mapScrapeResult))
                {
                    if (this.terminated)
                    {
                        result2 = false;
                        return(result2);
                    }
                    if (!string.IsNullOrEmpty(mapScrapeResult.Website))
                    {
                        string website;
                        base.WalkWebsite(mapScrapeResult.Website, mapScrapeResult, out website);
                        mapScrapeResult.Website = website;
                    }
                    if (this.CanAddRes(result, mapScrapeResult))
                    {
                        Monitor.Enter(result);
                        try
                        {
                            result.Add(mapScrapeResult);
                        }
                        finally
                        {
                            Monitor.Exit(result);
                        }
                        if (this.terminated)
                        {
                            result2 = false;
                            return(result2);
                        }
                        if (base.Callback != null)
                        {
                            base.Callback.Process(mapScrapeResult);
                        }
                    }
                }
            }
            return(text.IndexOf("\"TL_pagelink_next\"") >= 0);

            return(result2);
        }
 protected override bool ProcessKeywordRequest(string text, List <IScrapeResult> result, SearchCategory Location, SearchCategory Category)
 {
     return(this.ProcessSimpleRequest(text, result, Location, Category));
 }
        protected override bool ProcessKeywordRequest(string text, List <IScrapeResult> result, SearchCategory Location, SearchCategory Category)
        {
            if (text.IndexOf("0 results found in") >= 0 || text.IndexOf("No Results") >= 0)
            {
                return(false);
            }
            RegexOptions options = RegexOptions.IgnoreCase | RegexOptions.Compiled | RegexOptions.Singleline;
            Regex        regex   = new Regex("<input [^>]*?name=\"galleryoff\".*?/>");
            Match        match   = regex.Match(text);

            if (match.Success && match.Value.IndexOf("checked") >= 0)
            {
                regex = new Regex("<div class=\"gallery-ad-title\"><a href=\"(?<href>.*?)\".*?<div class=\"post-date\">Posted: (?<date>.*?)</div>", options);
            }
            else
            {
                regex = new Regex("<li>.*?href=\"(?<href>.*?)\".*?<em>.*?</em>.*?- (?<date>.*?)</li>", options);
            }
            DateTime t = base.LowDate;
            bool     result2;

            foreach (Match match2 in regex.Matches(text))
            {
                t = this.ParseDate(match2.Groups["date"].Value);
                if (t < base.LowDate)
                {
                    result2 = false;
                    return(result2);
                }
                if (!(t > base.HighDate))
                {
                    SimpleScrapeResult simpleScrapeResult = this.ParseDetails(match2.Groups["href"].Value);
                    simpleScrapeResult.Location = Location.Name;
                    simpleScrapeResult.Category = Category.Name;
                    if (string.IsNullOrEmpty(simpleScrapeResult.Email))
                    {
                        simpleScrapeResult.Email = simpleScrapeResult.Emails.FirstOrDefault <string>();
                    }
                    simpleScrapeResult.Emails.Remove(simpleScrapeResult.Email);
                    if (this.terminated)
                    {
                        result2 = false;
                        return(result2);
                    }
                    if (this.CanAddRes(result, simpleScrapeResult))
                    {
                        Monitor.Enter(result);
                        try
                        {
                            result.Add(simpleScrapeResult);
                        }
                        finally
                        {
                            Monitor.Exit(result);
                        }
                        if (base.Callback != null)
                        {
                            base.Callback.Process(simpleScrapeResult);
                        }
                    }
                }
            }
            return(text.IndexOf("<span class=\"pagination-label\">Next</span>") >= 0);

            return(result2);
        }
        protected bool ProcessUK(string text, List <IScrapeResult> result, SearchCategory Location, SearchCategory Category)
        {
            RegexOptions    options         = RegexOptions.IgnoreCase | RegexOptions.Compiled | RegexOptions.Singleline;
            Regex           regex           = new Regex("<div class=\"addr \">.*?<h2>(?<txt>.*?)</h2>.*?<span class=\"note\">(?<radius>.*?)<span>", options);
            MatchCollection matchCollection = regex.Matches(text);
            bool            result2;

            foreach (Match match in matchCollection)
            {
                if (this.terminated)
                {
                    result2 = false;
                    return(result2);
                }
                string          text2           = base.ExtractValue(match.Groups["txt"].Value, "href=\"(?<value>.*?)\"");
                MapScrapeResult mapScrapeResult = this.ParseDetailsUK(string.Format("{0}{1}", "http://uk.local.yahoo.com", text2));
                mapScrapeResult.Radius = match.Groups["radius"].Value;
                mapScrapeResult.Region = text2.Split(new string[]
                {
                    "/"
                }, StringSplitOptions.RemoveEmptyEntries).FirstOrDefault <string>();
                if (!string.IsNullOrEmpty(mapScrapeResult.Region))
                {
                    mapScrapeResult.Region = mapScrapeResult.Region.Replace('_', ' ');
                }
                mapScrapeResult.Category = Category.Name;
                if (this.CanAddRes(result, mapScrapeResult))
                {
                    if (this.terminated)
                    {
                        result2 = false;
                        return(result2);
                    }
                    if (!string.IsNullOrEmpty(mapScrapeResult.Website))
                    {
                        string website;
                        base.WalkWebsite(mapScrapeResult.Website, mapScrapeResult, out website);
                        mapScrapeResult.Website = website;
                    }
                    if (this.CanAddRes(result, mapScrapeResult))
                    {
                        Monitor.Enter(result);
                        try
                        {
                            result.Add(mapScrapeResult);
                        }
                        finally
                        {
                            Monitor.Exit(result);
                        }
                        if (this.terminated)
                        {
                            result2 = false;
                            return(result2);
                        }
                        if (base.Callback != null)
                        {
                            base.Callback.Process(mapScrapeResult);
                        }
                    }
                }
            }
            return(text.IndexOf("Next</a></li>") >= 0);

            return(result2);
        }
Example #8
0
        protected bool ProcessUK(string text, List <IScrapeResult> result, SearchCategory Location, SearchCategory Category)
        {
            RegexOptions    options         = RegexOptions.IgnoreCase | RegexOptions.Compiled | RegexOptions.Singleline;
            Regex           regex           = new Regex("<div class=\"advert-content\">(?<text>.*?)</div>.*?(?<cta><div class=\"advert-cta\">.*?</div>){1}.*?<ul class=\"tabbed\">(?<tab>.*?)</ul>{1}", options);
            MatchCollection matchCollection = regex.Matches(text);
            bool            result2;

            foreach (Match match in matchCollection)
            {
                if (this.terminated)
                {
                    result2 = false;
                    return(result2);
                }
                string          value           = match.Groups["text"].Value;
                string          value2          = match.Groups["cta"].Value;
                string          value3          = match.Groups["tab"].Value;
                MapScrapeResult mapScrapeResult = new MapScrapeResult();
                mapScrapeResult.AdUrl    = string.Format("http://www.yell.com{0}", base.ExtractValue(value3, "<li class=\"summaryTL\">.*?href=\"(?<value>.*?)\".*?</li>"));
                mapScrapeResult.Map      = string.Format("http://www.yell.com{0}", base.ExtractValue(value3, "<li class=\"mapTL\">.*?href=\"(?<value>.*?)\".*?</li>"));
                mapScrapeResult.Phone    = UrlDownloader.SkipHtmlTags(base.ExtractValue(value2, "<ul class=\"(tel-single|tel-multiple)\">.*?<strong>(?<value>.*?)</strong>.*?</ul>")).Trim();
                mapScrapeResult.Headline = base.ExtractValue(value, "<h2 class=\"coName\">(?<value>.*?)</h2>");
                mapScrapeResult.Website  = base.ExtractValue(mapScrapeResult.Headline, "href='(?<value>.*?)'").Trim();
                mapScrapeResult.Headline = UrlDownloader.SkipHtmlTags(mapScrapeResult.Headline).Trim();
                mapScrapeResult.Address  = UrlDownloader.SkipHtmlTags(base.ExtractValue(value, "<p class=\"address\">(?<value>.*?)</p>")).Trim();
                mapScrapeResult.ZipCode  = base.ExtractValue(mapScrapeResult.Address, "(?<value>[A-Z]{1,2}[0-9R][0-9A-Z]?\\s*?[0-9][ABD-HJLNP-UW-Z]{2})");
                string[] array = mapScrapeResult.Address.Split(new char[]
                {
                    ','
                });
                if (array.Length > 0)
                {
                    mapScrapeResult.Region = array[array.Length - 1];
                    if (mapScrapeResult.ZipCode.Length > 0)
                    {
                        mapScrapeResult.Region = mapScrapeResult.Region.Replace(mapScrapeResult.ZipCode, string.Empty).Trim();
                    }
                    else
                    {
                        mapScrapeResult.Region = mapScrapeResult.Region.Trim();
                    }
                }
                if (array.Length > 1)
                {
                    if (string.IsNullOrEmpty(mapScrapeResult.ZipCode))
                    {
                        mapScrapeResult.City = array[array.Length - 2].Trim();
                    }
                    else
                    {
                        mapScrapeResult.City = array[array.Length - 2].Replace(mapScrapeResult.ZipCode, string.Empty).Trim();
                    }
                }
                if (this.terminated)
                {
                    result2 = false;
                    return(result2);
                }
                if (this.CanAddRes(result, mapScrapeResult))
                {
                    if (!string.IsNullOrEmpty(mapScrapeResult.Website))
                    {
                        string website;
                        base.WalkWebsite(mapScrapeResult.Website, mapScrapeResult, out website);
                        mapScrapeResult.Website = website;
                    }
                    mapScrapeResult.Category = Category.Name;
                    if (this.CanAddRes(result, mapScrapeResult))
                    {
                        Monitor.Enter(result);
                        try
                        {
                            result.Add(mapScrapeResult);
                        }
                        finally
                        {
                            Monitor.Exit(result);
                        }
                        if (this.terminated)
                        {
                            result2 = false;
                            return(result2);
                        }
                        if (base.Callback != null)
                        {
                            base.Callback.Process(mapScrapeResult);
                        }
                    }
                }
            }
            return(text.IndexOf(">Next</a>") >= 0);

            return(result2);
        }
Example #9
0
        protected override string GetUrl(SearchCategory scLocation, SearchCategory scCategory, string keyword, int pageNum)
        {
            string text  = scLocation.Url.Replace(" ", "+");
            string text2 = scCategory.Url.Replace(" ", "+");
            string text3 = string.Empty;

            keyword = UrlDownloader.UrlEncode(keyword);
            string text4 = scLocation.Meta ?? string.Empty;
            string a;

            if ((a = text4) != null)
            {
                if (a == "canada")
                {
                    this.country = AbstractScraper.Country.Canada;
                    text2        = UrlDownloader.SkipHtmlTags(text2.Replace('-', ' '));
                    text3        = (string.IsNullOrEmpty(keyword) ? string.Format("http://www.yellowpages.ca/search/?stype=si&what={0}&where={1}", text2, text) : string.Format("http://www.yellowpages.ca/search/?stype=si&what={0}+{1}&where={2}", text2, keyword, text));
                    goto IL_1D9;
                }
                if (a == "australia")
                {
                    if (base.Downloader != null)
                    {
                        base.Downloader.KeepAlive = false;
                    }
                    this.country = AbstractScraper.Country.Australia;
                    text3        = (string.IsNullOrEmpty(keyword) ? string.Format("http://www.yellowpages.com.au/search/listings?clue={1}&locationClue={0}&x=0&y=0&pageNumber={2}", text, text2, pageNum) : string.Format("http://www.yellowpages.com.au/search/listings?clue={1}+{3}&locationClue={0}&x=0&y=0&pageNumber={2}", new object[]
                    {
                        text,
                        text2,
                        pageNum,
                        keyword
                    }));
                    goto IL_1D9;
                }
                if (a == "uk")
                {
                    this.country = AbstractScraper.Country.UK;
                    text3        = (string.IsNullOrEmpty(keyword) ? string.Format("http://www.yell.com/ucs/UcsSearchAction.do?keywords={0}&location={1}&pageNum={2}", text2, text, pageNum) : string.Format("http://www.yell.com/ucs/UcsSearchAction.do?keywords={0}+{1}&location={2}&pageNum={3}", new object[]
                    {
                        text2,
                        keyword,
                        text,
                        pageNum
                    }));
                    goto IL_1D9;
                }
            }
            text3 = (string.IsNullOrEmpty(keyword) ? string.Format("http://www.yellowpages.com/{0}/{1}?page={2}", text, text2, pageNum) : string.Format("http://www.yellowpages.com/{0}/{1}?q={2}&page={3}", new object[]
            {
                text,
                text2,
                keyword,
                pageNum
            }));
            this.country = AbstractScraper.Country.USA;
IL_1D9:
            if (base.Radius > 0f)
            {
                text3 += string.Format("&refinements[radius]={0}", base.Radius.ToString("#0.##"));
            }
            return(text3);
        }
Example #10
0
        protected override bool ProcessSimpleRequest(string text, List <IScrapeResult> result, SearchCategory Location, SearchCategory Category)
        {
            RegexOptions    options         = RegexOptions.IgnoreCase | RegexOptions.Compiled | RegexOptions.Singleline;
            Regex           regex           = new Regex("<span\\sclass=detls>.*?href=(?<href>.*?)\\s", options);
            MatchCollection matchCollection = regex.Matches(text);
            bool            result2;

            foreach (Match match in matchCollection)
            {
                if (this.terminated)
                {
                    result2 = false;
                    return(result2);
                }
                MapScrapeResult mapScrapeResult = this.ParseDetails(string.Format("http://maps.google.com{0}", match.Groups["href"].Value), Category.Url.Contains("Real+Estate"));
                if (this.CanAddRes(result, mapScrapeResult))
                {
                    if (!string.IsNullOrEmpty(mapScrapeResult.Website))
                    {
                        string website;
                        base.WalkWebsite(mapScrapeResult.Website, mapScrapeResult, out website);
                        mapScrapeResult.Website = website;
                    }
                    mapScrapeResult.Category = Category.Name;
                    if (this.CanAddRes(result, mapScrapeResult))
                    {
                        Monitor.Enter(result);
                        try
                        {
                            result.Add(mapScrapeResult);
                        }
                        finally
                        {
                            Monitor.Exit(result);
                        }
                        if (this.terminated)
                        {
                            result2 = false;
                            return(result2);
                        }
                        if (base.Callback != null)
                        {
                            base.Callback.Process(mapScrapeResult);
                        }
                    }
                }
            }
            return(text.IndexOf("</div>Next</a>") >= 0);

            return(result2);
        }
Example #11
0
        protected bool ProcessCanada(string text, List <IScrapeResult> result, SearchCategory Location, SearchCategory Category)
        {
            RegexOptions    options         = RegexOptions.IgnoreCase | RegexOptions.Compiled | RegexOptions.Singleline;
            Regex           regex           = new Regex("<div class=\"listingDetail\".*?>.*?<h3 class=\"listingTitleLine\">.*?href=\"(?<href>.*?)\".*?</h3>.*?<h4 class=\"phoneLink\">(?<phone>.*?)</h4>", options);
            MatchCollection matchCollection = regex.Matches(text);
            bool            result2;

            foreach (Match match in matchCollection)
            {
                if (this.terminated)
                {
                    result2 = false;
                    return(result2);
                }
                MapScrapeResult mapScrapeResult = this.ParseDetailsCanada(string.Format("http://www.yellowpages.ca{0}", match.Groups["href"].Value));
                mapScrapeResult.Phone    = UrlDownloader.SkipHtmlTags(match.Groups["phone"].Value);
                mapScrapeResult.Category = Category.Name;
                if (this.CanAddRes(result, mapScrapeResult))
                {
                    if (this.terminated)
                    {
                        result2 = false;
                        return(result2);
                    }
                    if (!string.IsNullOrEmpty(mapScrapeResult.Website))
                    {
                        string website;
                        base.WalkWebsite(mapScrapeResult.Website, mapScrapeResult, out website);
                        mapScrapeResult.Website = website;
                    }
                    if (this.CanAddRes(result, mapScrapeResult))
                    {
                        Monitor.Enter(result);
                        try
                        {
                            result.Add(mapScrapeResult);
                        }
                        finally
                        {
                            Monitor.Exit(result);
                        }
                        if (this.terminated)
                        {
                            result2 = false;
                            return(result2);
                        }
                        if (base.Callback != null)
                        {
                            base.Callback.Process(mapScrapeResult);
                        }
                    }
                }
            }
            return(text.IndexOf(">Next</a>") >= 0);

            return(result2);
        }
Example #12
0
        public virtual void Search()
        {
            if (this.Callback != null)
            {
                this.Callback.Init(this.Locations.Count * this.Categories.Count);
            }
            this.terminated = false;
            string text = UrlDownloader.UrlEncode(this.Keyword);

            if (this.Categories.Count <= 0)
            {
                if (!this.allowsCategorylessSearch)
                {
                    if (this.downloader != null && this.downloader.errorCallback != null)
                    {
                        this.downloader.errorCallback.Log("No categories selected.");
                    }
                    return;
                }
                if (string.IsNullOrEmpty(text))
                {
                    if (this.downloader != null && this.downloader.errorCallback != null)
                    {
                        this.downloader.errorCallback.Log("No keyword and categories selected.");
                    }
                    return;
                }
            }
            if (this.Locations.Count <= 0)
            {
                if (this.downloader != null && this.downloader.errorCallback != null)
                {
                    this.downloader.errorCallback.Log("No locations selected.");
                }
                return;
            }
            AbstractScraper.PageProcessDelegate    pageProcessDelegate = string.IsNullOrEmpty(text) ? new AbstractScraper.PageProcessDelegate(this.ProcessSimpleRequest) : new AbstractScraper.PageProcessDelegate(this.ProcessKeywordRequest);
            Queue <KeyValuePair <Thread, object> > queue = new Queue <KeyValuePair <Thread, object> >();

            if (this.Categories.Count > 0)
            {
                using (List <SearchCategory> .Enumerator enumerator = this.Locations.GetEnumerator())
                {
                    while (enumerator.MoveNext())
                    {
                        SearchCategory current = enumerator.Current;
                        foreach (SearchCategory current2 in this.Categories)
                        {
                            object[] value = new object[]
                            {
                                this.campaign.Leads,
                                current,
                                current2,
                                text,
                                pageProcessDelegate
                            };
                            queue.Enqueue(new KeyValuePair <Thread, object>(new Thread(new ParameterizedThreadStart(this.ProcessSearch))
                            {
                                Name = string.Format("{0}|{1}", current.Name, current2.Name)
                            }, value));
                        }
                    }
                    goto IL_2C0;
                }
            }
            foreach (SearchCategory current3 in this.Locations)
            {
                SearchCategory searchCategory = new SearchCategory();
                searchCategory.Name = text;
                searchCategory.Url  = text;
                object[] value2 = new object[]
                {
                    this.campaign.Leads,
                    current3,
                    searchCategory,
                    null,
                    pageProcessDelegate
                };
                queue.Enqueue(new KeyValuePair <Thread, object>(new Thread(new ParameterizedThreadStart(this.ProcessSearch))
                {
                    Name = string.Format("{0}|{1}", current3.Name, searchCategory.Name)
                }, value2));
            }
IL_2C0:
            List <Thread> list = new List <Thread>(this.maxThreadCount);

            Thread.CurrentThread.Priority = ThreadPriority.Lowest;
            int i = 0;

            while (queue.Count > 0 || list.Count > 0)
            {
                if (this.terminated)
                {
                    break;
                }
                while (list.Count < this.maxThreadCount && queue.Count > 0)
                {
                    KeyValuePair <Thread, object> keyValuePair = queue.Dequeue();
                    list.Add(keyValuePair.Key);
                    keyValuePair.Key.Start(keyValuePair.Value);
                }
                if (this.terminated)
                {
                    break;
                }
                i = 0;
                while (i < list.Count)
                {
                    if (list[i].ThreadState == ThreadState.Stopped)
                    {
                        list.RemoveAt(i);
                        if (this.Callback != null)
                        {
                            this.Callback.ShowProgress();
                        }
                    }
                    else
                    {
                        i++;
                    }
                }
            }
            if (this.Callback != null)
            {
                this.Callback.Finish();
            }
            if (this.downloader != null && this.downloader.errorCallback != null)
            {
                if (this.terminated)
                {
                    this.downloader.errorCallback.Log("Search has been stopped.");
                    return;
                }
                this.downloader.errorCallback.Log("Search finished.");
            }
        }
Example #13
0
 protected abstract string GetUrl(SearchCategory scLocation, SearchCategory scCategory, string keyword, int pageNum);
Example #14
0
 protected abstract bool ProcessKeywordRequest(string text, List <IScrapeResult> result, SearchCategory Location, SearchCategory Category);
Example #15
0
        protected override bool ProcessSimpleRequest(string text, List <IScrapeResult> result, SearchCategory Location, SearchCategory Category)
        {
            RegexOptions    options         = RegexOptions.IgnoreCase | RegexOptions.Compiled | RegexOptions.Singleline;
            Regex           regex           = new Regex("<h4.*?class=\"ban\".*?>(?<date>.*?)</h4>", options);
            DateTime        t               = base.LowDate;
            MatchCollection matchCollection = regex.Matches(text);
            bool            result2;

            foreach (Match match in matchCollection)
            {
                t = this.ParseDate(match.Groups["date"].Value);
                if (t < base.LowDate)
                {
                    result2 = false;
                    return(result2);
                }
                if (!(t > base.HighDate))
                {
                    regex = new Regex("<p.*?href=\"(?<href>.*?)\".*?</p>", options);
                    Match  match2 = match.NextMatch();
                    string input;
                    if (match2.Success)
                    {
                        input = text.Substring(match.Index, match2.Index - match.Index + 1);
                    }
                    else
                    {
                        input = text.Substring(match.Index);
                    }
                    MatchCollection matchCollection2 = regex.Matches(input);
                    foreach (Match match3 in matchCollection2)
                    {
                        SimpleScrapeResult simpleScrapeResult = this.ParseDetails(match3.Groups["href"].Value);
                        simpleScrapeResult.Location = Location.Name;
                        simpleScrapeResult.Category = Category.Name;
                        simpleScrapeResult.Emails.Remove(simpleScrapeResult.Email);
                        if (this.terminated)
                        {
                            result2 = false;
                            return(result2);
                        }
                        if (this.CanAddRes(result, simpleScrapeResult))
                        {
                            Monitor.Enter(result);
                            try
                            {
                                result.Add(simpleScrapeResult);
                            }
                            finally
                            {
                                Monitor.Exit(result);
                            }
                            if (base.Callback != null)
                            {
                                base.Callback.Process(simpleScrapeResult);
                            }
                        }
                    }
                }
            }
            return(text.IndexOf("next 100 postings</a>") >= 0);

            return(result2);
        }
        protected bool ProcessCanada(string text, List <IScrapeResult> result, SearchCategory Location, SearchCategory Category)
        {
            RegexOptions    options         = RegexOptions.IgnoreCase | RegexOptions.Compiled | RegexOptions.Singleline;
            Regex           regex           = new Regex("<li.*?class=\"vcard\".*?>.*?<a.*?class=\"ttl\".*?href=\"(?<href>.*?)\".*?</li>", options);
            MatchCollection matchCollection = regex.Matches(text);
            bool            result2;

            foreach (Match match in matchCollection)
            {
                if (this.terminated)
                {
                    result2 = false;
                    return(result2);
                }
                MapScrapeResult mapScrapeResult = this.ParseDetailsCanada(match.Groups["href"].Value);
                mapScrapeResult.Radius   = base.ExtractValue(match.Value, "<span.*?class=\"mlg\">(?<value>.*?)</span>");
                mapScrapeResult.Category = Category.Name;
                if (this.CanAddRes(result, mapScrapeResult))
                {
                    if (this.terminated)
                    {
                        result2 = false;
                        return(result2);
                    }
                    if (!string.IsNullOrEmpty(mapScrapeResult.Website))
                    {
                        string website;
                        base.WalkWebsite(mapScrapeResult.Website, mapScrapeResult, out website);
                        mapScrapeResult.Website = website;
                    }
                    if (this.CanAddRes(result, mapScrapeResult))
                    {
                        Monitor.Enter(result);
                        try
                        {
                            result.Add(mapScrapeResult);
                        }
                        finally
                        {
                            Monitor.Exit(result);
                        }
                        if (this.terminated)
                        {
                            result2 = false;
                            return(result2);
                        }
                        if (base.Callback != null)
                        {
                            base.Callback.Process(mapScrapeResult);
                        }
                    }
                }
            }
            return(text.IndexOf("<span>Next</span></a></b>") >= 0);

            return(result2);
        }
Example #17
0
        protected override string GetUrl(SearchCategory scLocation, SearchCategory scCategory, string keyword, int pageNum)
        {
            string url  = scLocation.Url;
            string url2 = scCategory.Url;
            bool   flag = url.IndexOf('.') > 0;
            string text = string.Empty;

            if (string.IsNullOrEmpty(keyword))
            {
                if (flag)
                {
                    if (pageNum != 0)
                    {
                        text = string.Format("http://{0}/{1}index{32}00.html", url, url2, pageNum);
                    }
                    else
                    {
                        text = string.Format("http://{0}/{1}", url, url2);
                    }
                }
                else
                {
                    if (pageNum != 0)
                    {
                        text = string.Format("http://{0}.en.craigslist.org/{1}index{2}00.html", url, url2, pageNum);
                    }
                    else
                    {
                        text = string.Format("http://{0}.en.craigslist.org/{1}", url, url2);
                    }
                }
            }
            else
            {
                if (flag)
                {
                    if (pageNum != 0)
                    {
                        text = string.Format("http://{0}/search/{1}?query={2}&s={3}00", new object[]
                        {
                            url,
                            url2,
                            keyword,
                            pageNum
                        });
                    }
                    else
                    {
                        text = string.Format("http://{0}/search/{1}?query={2}", url, url2, keyword);
                    }
                }
                else
                {
                    if (pageNum != 0)
                    {
                        text = string.Format("http://{0}.en.craigslist.org/search/{1}?query={2}&s={3}00", new object[]
                        {
                            url,
                            url2,
                            keyword,
                            pageNum
                        });
                    }
                    else
                    {
                        text = string.Format("http://{0}.en.craigslist.org/search/{1}?query={2}", url, url2, keyword);
                    }
                }
                text = text.Replace("/?", "?");
            }
            return(text);
        }
        protected override bool ProcessSimpleRequest(string text, List <IScrapeResult> result, SearchCategory Location, SearchCategory Category)
        {
            if (text.IndexOf("0 results found in") >= 0)
            {
                return(false);
            }
            RegexOptions options = RegexOptions.IgnoreCase | RegexOptions.Compiled | RegexOptions.Singleline;
            Regex        regex   = new Regex("<input [^>]*?name=\"galleryoff\".*?/>");
            Match        match   = regex.Match(text);

            if (match.Success && match.Value.IndexOf("checked") >= 0)
            {
                return(this.ProcessKeywordRequest(text, result, Location, Category));
            }
            regex = new Regex("<h3 class=\"section-title\">(?<date>.*?)</h3>", options);
            DateTime t = base.LowDate;
            bool     result2;

            foreach (Match match2 in regex.Matches(text))
            {
                t = this.ParseDate(match2.Groups["date"].Value);
                if (t < base.LowDate)
                {
                    result2 = false;
                    return(result2);
                }
                if (!(t > base.HighDate))
                {
                    regex = new Regex("<span style=\"vertical-align:middle\">.*?<a href=\"(?<href>.*?)\".*?</a>", options);
                    Match  match3 = match2.NextMatch();
                    string input;
                    if (match3.Success)
                    {
                        input = text.Substring(match2.Index, match3.Index - match2.Index + 1);
                    }
                    else
                    {
                        input = text.Substring(match2.Index);
                    }
                    foreach (Match match4 in regex.Matches(input))
                    {
                        SimpleScrapeResult simpleScrapeResult = this.ParseDetails(match4.Groups["href"].Value);
                        simpleScrapeResult.Location = Location.Name;
                        simpleScrapeResult.Category = Category.Name;
                        simpleScrapeResult.Emails.Remove(simpleScrapeResult.Email);
                        if (this.terminated)
                        {
                            result2 = false;
                            return(result2);
                        }
                        if (this.CanAddRes(result, simpleScrapeResult))
                        {
                            Monitor.Enter(result);
                            try
                            {
                                result.Add(simpleScrapeResult);
                            }
                            finally
                            {
                                Monitor.Exit(result);
                            }
                            if (base.Callback != null)
                            {
                                base.Callback.Process(simpleScrapeResult);
                            }
                        }
                    }
                }
            }
            return(text.IndexOf("<span class=\"pagination-label\">Next</span>") >= 0);

            return(result2);
        }