public void Crawl() { currentFileIndex = 0; noOfErrors = 0; using (WebClient wc = new WebClient()) { SetProxy(wc); HashSet <string> visited = new HashSet <string>(); Queue <string> queue = new Queue <string>(); int counter = 0; string url = Seed; visited.Add(""); visited.Add(url); queue.Enqueue(url); counter++; while (queue.Count > 0) { url = queue.Dequeue(); string page = Download(wc, url); foreach (LinkItem link in LinkFinder.Find(page)) { string newUrl = GetNewUrl(url, link.Href); if (counter >= MaxNbrOfLinks) { break; } if (visited.Contains(newUrl)) { continue; } visited.Add(newUrl); queue.Enqueue(newUrl); counter++; } } } }
public void RunTest() { using (WebClient wc = new WebClient()) { if (Credentials != null) { wc.Proxy = new WebProxy(Config.proxyIp, Config.proxyPort); wc.Proxy.Credentials = Credentials; } else { WebRequest.DefaultWebProxy = null; } string page = wc.DownloadString(Seed); string file = Config.filesDirectory + "/test.html"; File.WriteAllText(file, page); foreach (LinkItem i in LinkFinder.Find(page)) { string s = i.ToString(); } } }