public override void Search() { Utils.ThrowException(mResultSetMaxSz > 1000 ? new ArgumentValueException("ResultSetMaxSize") : null); string langStr = TextMiningUtils.GetLanguageCode(mLanguage); mResultSet.Inner.Clear(); if (mCache == null || !mCache.GetFromCache("YahooSearchEngine", mLanguage, mQuery, mResultSetMaxSz, ref mTotalHits, ref mResultSet)) { int resultsPerPage = mResultSetMaxSz > 100 ? 100 : mResultSetMaxSz; for (int i = 0; i < mResultSetMaxSz;) { string request = string.Format("http://search.yahooapis.com/WebSearchService/V1/webSearch?appid={0}&query={1}&results={2}&start={3}{4}", HttpUtility.UrlEncode(mAppId), HttpUtility.UrlEncode(mQuery), resultsPerPage, i + 1, mLanguage == Language.Unspecified ? "" : string.Format("&language={0}", langStr)); int firstResult, resultsReturned; string response = SendRequest(request, out firstResult, out resultsReturned); // throws WebException, QuotaExceededException if (mRetry && mTotalHits == 0) // *** Yahoo sometimes returns 0 results even if this is not the case (do a retry) { Thread.Sleep(2000); // delay for 2 seconds response = SendRequest(request, out firstResult, out resultsReturned); // throws WebException, QuotaExceededException } if (firstResult != i + 1) { mTotalHits = i; break; } Match regexMatch = mResultItemRegex.Match(response); while (regexMatch.Success) { string title = HttpUtility.HtmlDecode(regexMatch.Result("${title}")); string snippet = HttpUtility.HtmlDecode(regexMatch.Result("${summary}")); string url = HttpUtility.HtmlDecode(regexMatch.Result("${url}")); mResultSet.Inner.Add(new SearchEngineResultItem(title, snippet, url, mResultSet.Count + 1)); regexMatch = regexMatch.NextMatch(); if (++i == mResultSetMaxSz) { break; } } if (resultsReturned < resultsPerPage) { mTotalHits = firstResult + resultsReturned - 1; break; } } mTotalHits = Math.Max(mTotalHits, (long)mResultSet.Count); // just to make sure ... if (mCache != null) { mCache.PutIntoCache("YahooSearchEngine", mLanguage, mQuery, mTotalHits, mResultSet); } } }
public override void Search() { string langStr = TextMiningUtils.GetLanguageCode(mLanguage); mResultSet.Inner.Clear(); if (mCache == null || !mCache.GetFromCache("GoogleDefine", mLanguage, mQuery, mResultSetMaxSz, ref mTotalHits, ref mResultSet)) { int i = 0; string defHtml = WebUtils.GetWebPage(string.Format("http://www.google.com/search?defl={0}&q=define%3A{1}", langStr, HttpUtility.UrlEncode(mQuery))); // throws WebException Match defMatch = new Regex("<li>(?<def>[^<]*)(<br><a href=\"(?<href>[^\"]*))?", RegexOptions.Singleline).Match(defHtml); while (defMatch.Success) { string def = HttpUtility.HtmlDecode(defMatch.Result("${def}").Trim()); string href = defMatch.Result("${href}"); string url = null; Match matchUrl = new Regex("&q=(?<url>[^&]*)").Match(href); if (matchUrl.Success) { url = HttpUtility.UrlDecode(matchUrl.Result("${url}")); } mResultSet.Inner.Add(new SearchEngineResultItem(mQuery, def, url, ++i)); defMatch = defMatch.NextMatch(); } string lastUrl = null; for (int j = mResultSet.Count - 1; j >= 0; j--) { if (mResultSet[j].Url == null) { mResultSet[j].SetUrl(lastUrl); } else { lastUrl = mResultSet[j].Url; } } mTotalHits = mResultSet.Count; if (mCache != null) { mCache.PutIntoCache("GoogleDefine", mLanguage, mQuery, mTotalHits, mResultSet); } if (mResultSetMaxSz < mResultSet.Count) { mResultSet.Inner.RemoveRange(mResultSetMaxSz, mResultSet.Count - mResultSetMaxSz); } } }