コード例 #1
0
 /// <summary>
 /// 判断爬取结果是否可用
 /// </summary>
 /// <param name="input">输入的封装</param>
 /// <returns></returns>
 private bool IfHtmlPackerIsWanted(HtmlPacker input)
 {
     //null值检查
     if (null == input || string.IsNullOrEmpty(input.NowTitle))
     {
         return(false);
     }
     //null关键字不检查
     else if (string.IsNullOrEmpty(m_strTitlKeyWord))
     {
         return(true);
     }
     else
     {
         return(Regex.IsMatch(input.NowTitle, m_strTitlKeyWord));
     }
 }
コード例 #2
0
        /// <summary>
        /// 执行一次爬虫操作
        /// </summary>
        private void DoOneSpiderWork()
        {
            KeyValuePair <string, int> nowUseUrl = new KeyValuePair <string, int>();
            HtmlPacker tempHtmlPacker            = null;

            //若有可被使用的数据
            if (m_useQueue.UnUseSize > 0)
            {
                //从队列中获取结果
                nowUseUrl = m_useQueue.Pop();
                //获取一个Html封装
                tempHtmlPacker = m_useRequestManger.GetHtmlPacker
                                     (nowUseUrl.Key, nowUseUrl.Value);
                //获得下层深度
                int nextDeep = nowUseUrl.Value + 1;
                //判断结果是否需要保存
                if (IfHtmlPackerIsWanted(tempHtmlPacker))
                {
                    LstResult.Add(tempHtmlPacker);
                }
                //若成功获取封装且在合理深度
                if (null != tempHtmlPacker && nextDeep <= m_nMaxDeep)
                {
                    foreach (var oneSubUrl in tempHtmlPacker.SubUrls)
                    {
                        //若地址可用
                        if (true == IfUrlCanUse(oneSubUrl))
                        {
                            if (m_useQueue.UnUseSize <= m_nMaxCount)
                            {
                                //压入队列
                                m_useQueue.Push(oneSubUrl, nextDeep);
                            }
                        }
                    }
                }
            }
        }