Exemple #1
0
        public override void Search()
        {
            Utils.ThrowException(mResultSetMaxSz > 1000 ? new ArgumentValueException("ResultSetMaxSize") : null);
            string langStr = TextMiningUtils.GetLanguageCode(mLanguage);

            mResultSet.Inner.Clear();
            if (mCache == null || !mCache.GetFromCache("YahooSearchEngine", mLanguage, mQuery, mResultSetMaxSz, ref mTotalHits, ref mResultSet))
            {
                int resultsPerPage = mResultSetMaxSz > 100 ? 100 : mResultSetMaxSz;
                for (int i = 0; i < mResultSetMaxSz;)
                {
                    string request = string.Format("http://search.yahooapis.com/WebSearchService/V1/webSearch?appid={0}&query={1}&results={2}&start={3}{4}",
                                                   HttpUtility.UrlEncode(mAppId), HttpUtility.UrlEncode(mQuery), resultsPerPage, i + 1,
                                                   mLanguage == Language.Unspecified ? "" : string.Format("&language={0}", langStr));
                    int    firstResult, resultsReturned;
                    string response = SendRequest(request, out firstResult, out resultsReturned); // throws WebException, QuotaExceededException
                    if (mRetry && mTotalHits == 0)                                                // *** Yahoo sometimes returns 0 results even if this is not the case (do a retry)
                    {
                        Thread.Sleep(2000);                                                       // delay for 2 seconds
                        response = SendRequest(request, out firstResult, out resultsReturned);    // throws WebException, QuotaExceededException
                    }
                    if (firstResult != i + 1)
                    {
                        mTotalHits = i;
                        break;
                    }
                    Match regexMatch = mResultItemRegex.Match(response);
                    while (regexMatch.Success)
                    {
                        string title   = HttpUtility.HtmlDecode(regexMatch.Result("${title}"));
                        string snippet = HttpUtility.HtmlDecode(regexMatch.Result("${summary}"));
                        string url     = HttpUtility.HtmlDecode(regexMatch.Result("${url}"));
                        mResultSet.Inner.Add(new SearchEngineResultItem(title, snippet, url, mResultSet.Count + 1));
                        regexMatch = regexMatch.NextMatch();
                        if (++i == mResultSetMaxSz)
                        {
                            break;
                        }
                    }
                    if (resultsReturned < resultsPerPage)
                    {
                        mTotalHits = firstResult + resultsReturned - 1;
                        break;
                    }
                }
                mTotalHits = Math.Max(mTotalHits, (long)mResultSet.Count); // just to make sure ...
                if (mCache != null)
                {
                    mCache.PutIntoCache("YahooSearchEngine", mLanguage, mQuery, mTotalHits, mResultSet);
                }
            }
        }
Exemple #2
0
        public override void Search()
        {
            string langStr = TextMiningUtils.GetLanguageCode(mLanguage);

            mResultSet.Inner.Clear();
            if (mCache == null || !mCache.GetFromCache("GoogleDefine", mLanguage, mQuery, mResultSetMaxSz, ref mTotalHits, ref mResultSet))
            {
                int    i        = 0;
                string defHtml  = WebUtils.GetWebPage(string.Format("http://www.google.com/search?defl={0}&q=define%3A{1}", langStr, HttpUtility.UrlEncode(mQuery))); // throws WebException
                Match  defMatch = new Regex("<li>(?<def>[^<]*)(<br><a href=\"(?<href>[^\"]*))?", RegexOptions.Singleline).Match(defHtml);
                while (defMatch.Success)
                {
                    string def      = HttpUtility.HtmlDecode(defMatch.Result("${def}").Trim());
                    string href     = defMatch.Result("${href}");
                    string url      = null;
                    Match  matchUrl = new Regex("&q=(?<url>[^&]*)").Match(href);
                    if (matchUrl.Success)
                    {
                        url = HttpUtility.UrlDecode(matchUrl.Result("${url}"));
                    }
                    mResultSet.Inner.Add(new SearchEngineResultItem(mQuery, def, url, ++i));
                    defMatch = defMatch.NextMatch();
                }
                string lastUrl = null;
                for (int j = mResultSet.Count - 1; j >= 0; j--)
                {
                    if (mResultSet[j].Url == null)
                    {
                        mResultSet[j].SetUrl(lastUrl);
                    }
                    else
                    {
                        lastUrl = mResultSet[j].Url;
                    }
                }
                mTotalHits = mResultSet.Count;
                if (mCache != null)
                {
                    mCache.PutIntoCache("GoogleDefine", mLanguage, mQuery, mTotalHits, mResultSet);
                }
                if (mResultSetMaxSz < mResultSet.Count)
                {
                    mResultSet.Inner.RemoveRange(mResultSetMaxSz, mResultSet.Count - mResultSetMaxSz);
                }
            }
        }