-
Notifications
You must be signed in to change notification settings - Fork 0
/
LinkScraper.cs
55 lines (48 loc) · 1.28 KB
/
LinkScraper.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
using System;
using System.Collections.Generic;
using System.Net;
using System.Text.RegularExpressions;
using HtmlAgilityPack;
using System.Xml;
namespace WpfEmailScraper
{
class LinkScraper
{
//Private Class Members
private HashSet<Uri> _results = new HashSet<Uri>();
//Public Class Properties
public HashSet<Uri> Results
{
get
{
return this._results;
}
}
//Public Methods
public void Scrape(string url)
{
try
{
HtmlWeb hw = new HtmlWeb();
HtmlDocument doc = hw.Load(url);
foreach(HtmlNode link in doc.DocumentNode.SelectNodes("//a[@href]"))
{
try
{
HtmlAttribute att = link.Attributes["href"];
Console.WriteLine(att.Value);
this._results.Add(new Uri(att.Value));
}
catch
{
}
}
}
catch
{
//What Should I Do Here?
//Maybe Nothing for Now
}
}
}
}