コード例 #1
0
        public void Initialize(SinaSearch.WeiboSearchQuery query)
        {
            _start   = (DateTime)query.StartDate;
            _end     = (DateTime)query.EndDate;
            _keyword = query.Keyword;
            // nodup是为了返回所有结果,不要忽略
            _addtionQuery = String.Format("{0}&{1}", query.AddtionQuery, "nodup=1");
            _isOrigin     = query.isOrigin == true;
            _isVip        = query.isVip == true;

            _currentEnd = _end;
        }
コード例 #2
0
        internal static IWeiboQueryComposer GetQueryComposer(SinaSearch.WeiboSearchQuery query)
        {
            IWeiboQueryComposer composer = null;

            if (query.SearchAll == true)
            {
                composer = new TimePeriodQueryComposer();
            }
            else
            {
                composer = new SimpleQueryComposer();
            }
            composer.Initialize(query);

            return(composer);
        }
コード例 #3
0
 public void Initialize(SinaSearch.WeiboSearchQuery query)
 {
     _query = query;
 }
コード例 #4
0
        public void ReportStatus(ref Item[] result, int count)
        {
            Logger.Warn(String.Format("ReportStatus count {0}, returned {1}", count, result.Length));

            if (count < 0)
            {
                // 应该是被弹验证码了,等待一段时间看看
                //Thread.Sleep(TimeSpan.FromMinutes(10));
                return;
            }

            if (_fallToRegion && _regionComposer != null)
            {
                _regionComposer.ReportStatus(ref result, count);
                if (_regionComposer.Empty)
                {
                    _fallToRegion   = false;
                    _regionComposer = null;
                    Logger.Warn(String.Format("Region composer finished, next time end {0}", _currentEnd));
                }
                return;
            }

            // 如果显示值大于MaximumItemPerRequest,但是返回值小于ConservativeItemPerRequest
            // 则认为这一次请求不完整,跳过重新发送请求
            if (count > MaximumItemPerRequest && result.Length < ConservativeItemPerRequest)
            {
                return;
            }

            // 有的时候count会返回0,但是实际上有item返回
            if (count <= MaximumItemPerRequest)
            {
                if (count < 0)
                {
                    Logger.Warn("Fail to retrieve count element");
                    _finished = true;
                    return;
                }
                Logger.Warn(String.Format("ReportStatus count {0} is less than {1}", count, MaximumItemPerRequest));
                var latest = result.LastOrDefault();
                _currentEnd = latest == null ? _currentEnd : latest.PubDate.Value;
            }
            else
            {
                Item     last     = result.LastOrDefault();
                DateTime lastDate = last.PubDate.Value;
                Logger.Warn(String.Format("First {0}, Last {1}", result[0].PubDate, last.PubDate));

                // 一个小时内超过1000条,这一个小时全部使用基于区域分组的算法
                if (_currentEnd - lastDate < _hour && _currentEnd.Hour == lastDate.Hour)
                {
                    _fallToRegion   = true;
                    _regionComposer = new RegionQueryComposer();
                    DateTime hour = new DateTime(_currentEnd.Year, _currentEnd.Month, _currentEnd.Day, _currentEnd.Hour, 0, 0);
                    Logger.Warn(String.Format("Fall back to region composer {0}", hour));

                    var query = new SinaSearch.WeiboSearchQuery()
                    {
                        AddtionQuery = _addtionQuery,
                        Keyword      = _keyword,
                        isOrigin     = _isOrigin,
                        isVip        = _isVip,
                        StartPage    = 1,
                        EndPage      = 50,
                        Option       = SearchWeiboOption.RealTime,
                        SearchAll    = true,
                        StartDate    = hour,
                        EndDate      = hour,
                    };
                    _regionComposer.Initialize(query);

                    // 等区域算法结束之后从下一个小时开始继续跑
                    _currentEnd = hour.AddHours(-1);

                    return;
                }
                else
                {
                    _currentEnd = last.PubDate.Value;
                }
            }

            // 去重判定,可能没有必要
            result = result.Where(model => !_lastQueryItems.Contains(model.Url)).ToArray();
            _lastQueryItems.Clear();
            _lastQueryItems = new HashSet <String>(result.Select(model => model.Url));

            if (_currentEnd == _start)
            {
                _finished = true;
            }
        }