Example #1
0
        public void Crawl()
        {
            Init();
            Console.WriteLine("Spider Initialized");
            UrlsMng.AddUrl(new KeyValuePair <string, int> (rootUrl, 0));

            for (int i = 0; i < threadsNum; i++)
            {
                threads [i].Start(i);
                idleThreads [i] = false;
                Console.WriteLine("第" + (i + 1) + "条线程开启");
            }
        }
Example #2
0
        void CrawlProc(object threadIndex)
        {
            var currentIndex = (int)threadIndex;

            while (true)
            {
                if (!UrlsMng.HasNewUrl)
                {
                    idleThreads[currentIndex] = true;
                    if (idleThreads.All(t => t))
                    {
                        Console.WriteLine("第" + currentIndex + "条线程退出");
                        break;
                    }
                    Thread.Sleep(2000);
                    continue;
                }
                idleThreads[currentIndex] = false;

                KeyValuePair <string, int> curntURL = new KeyValuePair <string, int>();
                lock (UrlsMng)
                {
                    if (UrlsMng.HasNewUrl)
                    {
                        curntURL = UrlsMng.GetUrl();
                    }
                    else
                    {
                        continue;
                    }
                }
                var html        = Downloader.Download(curntURL.Key);
                var parseResult = Parser.ParseURLS(html, curntURL.Value);
                lock (UrlsMng)
                    parseResult.Keys.ToList().ForEach(url => UrlsMng.AddUrl(new KeyValuePair <string, int>(url, curntURL.Value + 1)));
                var filteredContent = Parser.ParseHTML(html);
                if (filteredContent == null)
                {
                    filteredContent = html;
                }
                lock (DataHdler)
                    DataHdler.CollectData(curntURL.Key, curntURL.Value, html, filteredContent);
            }
        }