Example #1
0
        private void ExtractDivs(Crawler c)
        {
            MatchCollection res = Regex.Matches(c.HTMLText, @"<div\s?(.*?)>(.*?)</div", RegexOptions.Multiline);

            StringBuilder s = new StringBuilder();
            string subres = "";
            foreach (Match m in res)
            {
                subres = Regex.Replace(m.Groups[2].Value, "(<.*?>)", " ");
                if (!String.IsNullOrWhiteSpace(subres))
                {
                    s.Append(subres);
                    s.Append("\n");
                }
            }

            c.ExtractedDivs = s.ToString();
        }
Example #2
0
 private void StartCrawlbutton_Copy_Click(object sender, RoutedEventArgs e)
 {
     try
     {
         int tmp_depth = (int)DepthTextBox.Value;
         Thread t = new Thread(() =>
         {
             Crawler c = new Crawler();
             c.CrawlNext(tmp_depth);
         });
         Tasks.Add(t);
         t.Start();
         ThreadslistBox.BeginInit();
         TaskNames.Add("Crawling from last visited links");
         ThreadslistBox.DataContext = TaskNames;
         ThreadslistBox.EndInit();
     }
     catch
     {
     }
 }
Example #3
0
 private void StartCrawlbutton_Click(object sender, RoutedEventArgs e)
 {
     try
     {
         string tmp_link = StartLinkTextBox.Text;
         int tmp_depth = (int)DepthTextBox.Value;
         Thread t = new Thread(() =>
         {
             Crawler c = new Crawler();
             c.Crawl(tmp_link, tmp_depth);
         });
         Tasks.Add(t);
         t.Name = tmp_link;
         t.Start();
         ThreadslistBox.BeginInit();
         TaskNames.Add(t.Name);
         ThreadslistBox.DataContext = TaskNames;
         ThreadslistBox.EndInit();
     }
     catch
     {
     }
 }
Example #4
0
 public void ParseLinkText(String url)
 {
     if (_crawler == null) { _crawler = new Crawler(); }
     _crawler.ParseLinkText(url);
 }
Example #5
0
 public void Crawl(String start_url, int depth)
 {
     if (_crawler == null) { _crawler = new Crawler(); }
     _crawler.Crawl(start_url, depth);
 }
Example #6
0
 public override void SendNext(Crawler c)
 {
     this.ExtractHeaders(c);
     base.SendNext(c);
 }
Example #7
0
 public virtual void SendNext(Crawler c)
 {
     if (this.Next != null) { this.Next.SendNext(c); }
 }
Example #8
0
 private void ExtractTitle(Crawler c)
 {
     MatchCollection regex_reults = Regex.Matches(c.HTMLText, @"<title\s?(.*?)>(.*?)</", RegexOptions.Multiline);
     StringBuilder s = new StringBuilder();
     try
     {
         string subres = regex_reults[0].Groups[2].Value;
         if (!String.IsNullOrWhiteSpace(subres))
         {
             c.ExtractedTitle = subres;
         }
     }
     catch
     {
         c.ExtractedTitle = null;
     }
 }
Example #9
0
 public override void SendNext(Crawler c)
 {
     this.ExtractTitle(c);
     if (!String.IsNullOrWhiteSpace(c.ExtractedTitle))
     {
         base.SendNext(c);
     }
 }
Example #10
0
 public override void SendNext(Crawler c)
 {
     this.ExtractParagraphs(c);
     base.SendNext(c);
 }
Example #11
0
 public override void SendNext(Crawler c)
 {
     this.ExtractLinksText(c);
     base.SendNext(c);
 }
Example #12
0
 private void ExtractLinks(Crawler c)
 {
     MatchCollection res = Regex.Matches(c.HTMLText, @"href=(.*)");
     string link = "";
     foreach (Match m in res)
     {
         link = this.ExtractSingleLink(m.ToString(), c.URL);
         if (!String.IsNullOrWhiteSpace(link))
         { c.LINKS.Add(link); }
     }
 }