Пример #1
0
        private static void Parser()
        {
            QueryElement qe;

            while (ParserWork)
            {
                while (WaitingQueue.Count != 0)
                {
                    qe = WaitingQueue.Dequeue();
                    qe.DoJob();
                    DoneQueue.Enqueue(qe);
                    qe = null;
                }
                Thread.Sleep(50);
            }
        }
Пример #2
0
        public void HandUrl(UrlGroup urlGroup, int depath)
        {
            if (DoneQueue.Contains(urlGroup))
            {
                return;
            }
            string html = HttpHelper.GetString(urlGroup.Url, Encoding.Default);

            lock (DoneQueueLock)
            {
                DoneQueue.Enqueue(urlGroup);
            }
            //1、获取页面所有url
            List <string> listAllUrl = MatchDomainURL(html);

            //2、过滤:去掉外站、js、图片等url
            string[]      extArray = new string[] { ".jpg", ".png", ".gif", ".js" };
            List <string> mlstUrl  = new List <string>();

            foreach (string url in listAllUrl)
            {
                var  tmpurl = url.ToLower();
                bool isfile = false;
                foreach (string ext in extArray)
                {
                    if (tmpurl.Contains(ext))
                    {
                        isfile = true;
                        break;
                    }
                }
                //匹配特殊url
                SepcialMatch(url);

                if (!isfile && !DoneQueue.Contains(url) && !UrlQueue.Contains(url))
                {
                    mlstUrl.Add(url);
                }
            }
            if (depath + 1 > MaxSearchDepth)
            {
                return;                              //达到最大搜索深度
            }
            lock (UrlQueueLock)
            {
                foreach (string url in mlstUrl)
                {
                    UrlQueue.Enqueue(new UrlGroup()
                    {
                        Url = url, DepathNumber = depath + 1
                    });
                    if (UrlQueue.Count > 10000)
                    {
                        Thread.Sleep(3000);
                    }
                    else if (UrlQueue.Count > 50000)
                    {
                        Thread.Sleep(10000);
                    }
                }
            }
        }