public void Initialize(SinaSearch.WeiboSearchQuery query) { _start = (DateTime)query.StartDate; _end = (DateTime)query.EndDate; _keyword = query.Keyword; // nodup是为了返回所有结果,不要忽略 _addtionQuery = String.Format("{0}&{1}", query.AddtionQuery, "nodup=1"); _isOrigin = query.isOrigin == true; _isVip = query.isVip == true; _currentEnd = _end; }
internal static IWeiboQueryComposer GetQueryComposer(SinaSearch.WeiboSearchQuery query) { IWeiboQueryComposer composer = null; if (query.SearchAll == true) { composer = new TimePeriodQueryComposer(); } else { composer = new SimpleQueryComposer(); } composer.Initialize(query); return(composer); }
public void Initialize(SinaSearch.WeiboSearchQuery query) { _query = query; }
public void ReportStatus(ref Item[] result, int count) { Logger.Warn(String.Format("ReportStatus count {0}, returned {1}", count, result.Length)); if (count < 0) { // 应该是被弹验证码了,等待一段时间看看 //Thread.Sleep(TimeSpan.FromMinutes(10)); return; } if (_fallToRegion && _regionComposer != null) { _regionComposer.ReportStatus(ref result, count); if (_regionComposer.Empty) { _fallToRegion = false; _regionComposer = null; Logger.Warn(String.Format("Region composer finished, next time end {0}", _currentEnd)); } return; } // 如果显示值大于MaximumItemPerRequest,但是返回值小于ConservativeItemPerRequest // 则认为这一次请求不完整,跳过重新发送请求 if (count > MaximumItemPerRequest && result.Length < ConservativeItemPerRequest) { return; } // 有的时候count会返回0,但是实际上有item返回 if (count <= MaximumItemPerRequest) { if (count < 0) { Logger.Warn("Fail to retrieve count element"); _finished = true; return; } Logger.Warn(String.Format("ReportStatus count {0} is less than {1}", count, MaximumItemPerRequest)); var latest = result.LastOrDefault(); _currentEnd = latest == null ? _currentEnd : latest.PubDate.Value; } else { Item last = result.LastOrDefault(); DateTime lastDate = last.PubDate.Value; Logger.Warn(String.Format("First {0}, Last {1}", result[0].PubDate, last.PubDate)); // 一个小时内超过1000条,这一个小时全部使用基于区域分组的算法 if (_currentEnd - lastDate < _hour && _currentEnd.Hour == lastDate.Hour) { _fallToRegion = true; _regionComposer = new RegionQueryComposer(); DateTime hour = new DateTime(_currentEnd.Year, _currentEnd.Month, _currentEnd.Day, _currentEnd.Hour, 0, 0); Logger.Warn(String.Format("Fall back to region composer {0}", hour)); var query = new SinaSearch.WeiboSearchQuery() { AddtionQuery = _addtionQuery, Keyword = _keyword, isOrigin = _isOrigin, isVip = _isVip, StartPage = 1, EndPage = 50, Option = SearchWeiboOption.RealTime, SearchAll = true, StartDate = hour, EndDate = hour, }; _regionComposer.Initialize(query); // 等区域算法结束之后从下一个小时开始继续跑 _currentEnd = hour.AddHours(-1); return; } else { _currentEnd = last.PubDate.Value; } } // 去重判定,可能没有必要 result = result.Where(model => !_lastQueryItems.Contains(model.Url)).ToArray(); _lastQueryItems.Clear(); _lastQueryItems = new HashSet <String>(result.Select(model => model.Url)); if (_currentEnd == _start) { _finished = true; } }