Beispiel #1
0
 /// <summary>
 /// 线程执行方法
 /// </summary>
 void StartCrawlePage()
 {
     // 循环抓取URL
     while (MainForm.UrlManager.Urls.Count > 0)
     {
         UrlItem url;
         lock (MainForm.UrlManager.Urls)
         {
             if (MainForm.UrlManager.Urls != null && MainForm.UrlManager.Urls.Count > 0)
             {
                 url = MainForm.UrlManager.Urls[0];
                 MainForm.UrlManager.Urls.Remove(url);
             }
             else
             {
                 return;
             }
         }
         WordSeg seg = Service.WordSegment;
         // 抓取单个网页并分词
         Dictionary <string, ulong> words = HtmlContent.SetupSingleUrl(url.Url, false, seg);
         // 合并所有线程抓回的单词
         if (words != null && words.Count > 0)
         {
             lock (NewWords)
             {
                 foreach (KeyValuePair <string, ulong> word in words)
                 {
                     if (NewWords.ContainsKey(word.Key))
                     {
                         NewWords[word.Key] = NewWords[word.Key] + word.Value;
                     }
                     else
                     {
                         NewWords.Add(word.Key, word.Value);
                     }
                 }
             }
         }
         // 抓取一个网页
         Interlocked.Increment(ref _getedPage);
         _mainForm.BeginInvoke(new Action(UpdateProgress));
         // 如果已经抓取完毕
         if (_getedPage == _totalPages)
         {
             _mainForm.BeginInvoke(new Action(PostCrawlerProcess));
         }
     }//while
 }
Beispiel #2
0
        private void urlTextBox_KeyPress(object sender, KeyPressEventArgs e)
        {
            if (e.KeyChar == (char)Keys.Enter)
            {
                Stopwatch watch = new Stopwatch();

                string Url = this.urlTextBox.Text;
                watch.Start();

                Dictionary <string, ulong> content = HtmlContent.SetupSingleUrl(Url, false, _JWordSegmentor, wrcList);

                watch.Stop();

                this.messageListBox.Items.Add("ExtractContent: " + watch.ElapsedMilliseconds.ToString());

                this.propertyGrid.SelectedObject = content;
            }
        }