Program() { this.Size = new Size(300, 200); var b = new Button(); b.Parent = this; b.Text = "Cancel"; b.Anchor = AnchorStyles.None; b.Click += (object sender, EventArgs e) => { tokenSource.Cancel(); lock (Lock) { PoisonTheWater(); Monitor.PulseAll(Lock); } this.Close(); }; b.Left = b.Parent.ClientSize.Width / 2 - b.Width / 2; b.Top = b.Parent.ClientSize.Height / 2 - b.Height / 2; this.Show(); webLink rootLink = new webLink(); rootLink.link = rootAddress; rootLink.origin = rootAddress; rootLink.depth = 0; //start thread pool and wait till finish Console.WriteLine("Working...\n"); setTimer(); sw.Start(); startThreadTask(rootLink); return; }
public static void startThreadTask(webLink link) { lock (Lock) { currentWorkers++; } ThreadPool.QueueUserWorkItem((si) => { Consumer(link); lock (Lock) { currentWorkers--; if (currentWorkers == 0) //last worker poisons everyone else { PoisonTheWater(); return; } } }); }
public static void startThreadTask(webLink link) { lock (Lock) { currentWorkers++; } Task.Run(() => { Consumer(link); lock (Lock) { currentWorkers--; if (currentWorkers == 0) //last worker poisons everyone else { PoisonTheWater(); return; } } }, tokenSource.Token); }
public static void Consumer(webLink linkToTry) { WebClient Client = new WebClient(); int num = 0; try { lock (Lock) { if (poison) { return; } num = fileNum++; //increment file name for link } Client.DownloadFile(linkToTry.link.AbsoluteUri, num.ToString()); //Try to download, throws exception if link is dead or doesnt work if (linkToTry.depth < maxDepth) //start producing more consumer tasks if maxDepth not reached { Producer(new Tuple <string, webLink>(num.ToString(), linkToTry)); } } catch (Exception e) //link is dead or doesnt respond, add to deadlinks and incriment death count. { lock (Lock) { if (!deadLinks.ContainsKey(linkToTry.link.AbsoluteUri)) { linkToTry.exception = e; deadLinks.Add(linkToTry.link.AbsoluteUri, linkToTry); numDeadLinks++; } } } }
public static void Producer() { //string regMatch = "<\\s*a\\s+[^>]*href\\s*=\\s*['\"][^'\"]*['\"]"; string regMatch = "href\\s*=\\s*(?:[\"'](?<1>[^\"']*)[\"']|(?<1>\\S+))"; //this regex is provided on the microsoft C# documentation website string data, address = ""; Regex URLmatch = new Regex(regMatch); MatchCollection MC; Tuple <string, webLink> link = null; //fileName, weblink struct webLink newLink; Uri newAddress; bool poisoned = false; while (!poisoned) { try { //Check the Queue for contents, else wait or die if poisoned lock (Lock) { while (producerLinks.Count == 0 && poison == false) { Monitor.Wait(Lock); } if (poison) { return; } link = producerLinks.Dequeue(); if (visitedLinks.ContainsKey(link.Item2.link.AbsoluteUri)) { throw new Exception("Error: Link already added to Dictionary!!! Why is it in Queue!?"); } visitedLinks.Add(link.Item2.link.AbsoluteUri, link.Item2); currentWorkers++; //Console.WriteLine("Producer Working on: {0}, depth: {1}", link.Item2.link.AbsoluteUri, link.Item2.depth); } //read the entire document and match all instances of an address to a collention data = File.ReadAllText(link.Item1); if (data.Length > 0) { MC = URLmatch.Matches(data); foreach (Match m in MC) { int testEqPos = m.Value.IndexOf('='); if (testEqPos > 0) { address = m.Value.Substring(m.Value.IndexOf('=') + 1).Trim(); //get the address after the = and href if (address.Length > 3) { address = address.Substring(1, address.Length - 2); //remove the " " from the address } if (address.Contains(link.Item2.link.Host) || address.StartsWith("http") || address.StartsWith("https")) { newAddress = new Uri(address); } else { newAddress = new Uri(link.Item2.link, address); } if (newAddress.Host == link.Item2.link.Host) { newLink = new webLink(); newLink.link = newAddress; newLink.origin = link.Item2.link; newLink.depth = link.Item2.depth + 1; lock (Lock) { //Console.WriteLine("match address: '{0}'", address); if (!visitedLinks.ContainsKey(newLink.link.AbsoluteUri) && !deadLinks.ContainsKey(newLink.link.AbsoluteUri) && !quedLinks.ContainsKey(newLink.link.AbsoluteUri)) { quedLinks.Add(newLink.link.AbsoluteUri, newLink); clientLinks.Enqueue(newLink); Monitor.PulseAll(Lock); } } } } } } } catch (Exception e) { lock (Lock) { Console.WriteLine("Error This shouldnt happen, Trying to Read from File:'{0}' for : '{1}'\nAddress:'{2}', Origin:'{3}'\nException: '{4}'\n" , link.Item1, link.Item2.link.AbsoluteUri, address, link.Item2.link.AbsoluteUri, e.Message); poison = true; Monitor.PulseAll(Lock); return; } } lock (Lock) { currentWorkers--; } } }
static void Main(string[] args) { Uri rootAddress = null; bool gotDistance = false; if (args.Length != 0) { if (args[0].Length > 0) { try { rootAddress = new Uri(args[0]); } catch (Exception e) { ErrorMessageExit("Issue with creating root Uri", e); } gotDistance = Int32.TryParse(args[1], out maxDepth); if (gotDistance == false) { maxDepth = 1; //ErrorMessageExit("Please Input a integer for distance!"); } } } else if (rootAddress == null) { OpenFileDialog dlg = new OpenFileDialog(); dlg.Filter = "All files|*.*"; dlg.ShowDialog(); rootAddress = new Uri(dlg.FileName.Trim()); if (rootAddress == null) { return; } dlg.Dispose(); maxDepth = 1; } else { ErrorMessageExit("Please Input a URL string!"); } if (gotDistance == false) { maxDepth = 1; //ErrorMessageExit("Please Input a integer for distance!"); } if (maxDepth < 0) { ErrorMessageExit("Please Input a positive integer for distance!"); } //Console.WriteLine("Args0: {0}, Args1: {1}", args[0], args[1]); //Console.WriteLine("Address: {0}, maxDepth: {1}", rootAddress.AbsoluteUri, maxDepth); //INITIALLIZE EVERYTHING visitedLinks = new Dictionary <string, webLink>(); deadLinks = new Dictionary <string, webLink>(); quedLinks = new Dictionary <string, webLink>(); clientLinks = new Queue <webLink>(); producerLinks = new Queue <Tuple <string, webLink> >(); Threads = new List <Thread>(); Lock = new object(); webLink rootLink = new webLink(); setTimer(); numDeadLinks = 0; currentWorkers = 0; fileNum = 0; rootLink.link = rootAddress; rootLink.origin = rootAddress; rootLink.depth = 0; clientLinks.Enqueue(rootLink); Console.WriteLine("Working...\n"); //create 4 producers and consumers for (int t = 0; t < 4; t++) { Thread newConsumer = new Thread(() => Consumer()); Thread newProducer = new Thread(() => Producer()); newConsumer.Start(); newProducer.Start(); Threads.Add(newConsumer); Threads.Add(newProducer); } //wait for the work to be done foreach (Thread t in Threads) { t.Join(); } //get ride of timer onTheClock.Stop(); onTheClock.Dispose(); //print out the bad links if (visitedLinks.Count > 0) { foreach (KeyValuePair <string, webLink> key in deadLinks) { Console.WriteLine("DeadLinkOrigin: {0}\nDeadLink: {1}\nDepth: {2}\nException: {3}\n", key.Value.origin, key.Key, key.Value.depth, key.Value.exception.Message); } Console.WriteLine("number of links visited: {0}\nnumber of deadLinks: {1}\n", visitedLinks.Count, numDeadLinks); } Console.WriteLine("Done"); Console.Read(); }
public static void Consumer() { WebClient Client = new WebClient(); webLink linkToTry = new webLink(); Tuple <string, webLink> linkToAdd = null; int num = 0; bool poisoned = false; while (!poisoned) { try { lock (Lock) { if (clientLinks.Count == 0 && producerLinks.Count == 0 && currentWorkers == 0) //last worker poisons everyone else { poison = true; Monitor.PulseAll(Lock); return; } while (clientLinks.Count == 0 && poison == false) //no work, no poison? wait { Monitor.Wait(Lock); } if (poison) //Is there poison to partake of and leave? { return; } linkToTry = clientLinks.Dequeue(); //no snack, just work, get item to work on currentWorkers++; //incriment workers num = fileNum++; //increment file name for link //Console.WriteLine("Consumer working on: {0}\nDepth: {1}", linkToTry.link.AbsoluteUri, linkToTry.depth); } Client.DownloadFile(linkToTry.link.AbsoluteUri, num.ToString()); //Try to download, throws exception if link is dead or doesnt work if (linkToTry.depth < maxDepth) //add links file to queue for more work, and wake everyone up { if (linkToTry.link.Host == linkToTry.origin.Host) { linkToAdd = new Tuple <string, webLink>(num.ToString(), linkToTry); lock (Lock) { producerLinks.Enqueue(linkToAdd); Monitor.PulseAll(Lock); } } } } catch (Exception e) //link is dead or doesnt respond, add to deadlinks and incriment death count. { lock (Lock) { if (!deadLinks.ContainsKey(linkToTry.link.AbsoluteUri)) { linkToTry.exception = e; deadLinks.Add(linkToTry.link.AbsoluteUri, linkToTry); numDeadLinks++; } } } lock (Lock) { currentWorkers--; } } }
public static void Producer(Tuple <string, webLink> link) { //string regMatch = "<\\s*a\\s+[^>]*href\\s*=\\s*['\"][^'\"]*['\"]"; string regMatch = "href\\s*=\\s*(?:[\"'](?<1>[^\"']*)[\"']|(?<1>\\S+))"; //this regex is provided on the microsoft C# documentation website string data, address = ""; Regex URLmatch = new Regex(regMatch); webLink newLink; Uri newAddress; try { //Check the Queue for contents, else wait or die if poisoned lock (Lock) { if (poison) { return; } } //read the entire document and match all instances of an address to a collention data = File.ReadAllText(link.Item1); if (data.Length > 0) { foreach (Match m in URLmatch.Matches(data)) { int testEqPos = m.Value.IndexOf('='); address = m.Value.Substring(m.Value.IndexOf('=') + 1).Trim(); //get the address after the = and href address = address.Substring(1, address.Length - 2); //remove the " " from the address if (address.Contains(link.Item2.link.Host) || address.StartsWith("http") || address.StartsWith("https")) { newAddress = new Uri(address); } else { newAddress = new Uri(link.Item2.link, address); } if (newAddress.Host == link.Item2.link.Host) { newLink = new webLink(); newLink.link = newAddress; newLink.origin = link.Item2.link; newLink.depth = link.Item2.depth + 1; lock (Lock) { if (!visitedLinks.ContainsKey(newAddress.AbsoluteUri) && !deadLinks.ContainsKey(newLink.link.AbsoluteUri)) { visitedLinks.Add(newAddress.AbsoluteUri, newLink); startThreadTask(newLink); } } } } } } catch (Exception e) { lock (Lock) { Console.WriteLine("Error!!! This shouldnt happen; Trying to Read from File:'{0}' for : '{1}'\nAddress:'{2}', Origin:'{3}'\nException: '{4}'\n" , link.Item1, link.Item2.link.AbsoluteUri, address, link.Item2.link.AbsoluteUri, e.Message); PoisonTheWater(); return; } } }
static void Main(string[] args) { Uri rootAddress = null; bool gotDistance = false; if (args.Length != 0) { if (args[0].Length > 0) { try { rootAddress = new Uri(args[0]); } catch (Exception e) { ErrorMessageExit("Issue with creating root Uri", e); } gotDistance = Int32.TryParse(args[1], out maxDepth); if (gotDistance == false) { ErrorMessageExit("Please Input a integer for distance!"); } } } else if (rootAddress == null) { OpenFileDialog dlg = new OpenFileDialog(); dlg.Filter = "All files|*.*"; dlg.ShowDialog(); rootAddress = new Uri(dlg.FileName.Trim()); if (rootAddress == null) { return; } dlg.Dispose(); maxDepth = 1; } else { ErrorMessageExit("Please Input a URL string!"); } if (gotDistance == false) { maxDepth = 1; //ErrorMessageExit("Please Input a integer for distance!"); } if (maxDepth < 0) { ErrorMessageExit("Please Input a positive integer for distance!"); } //INITIALLIZE EVERYTHING visitedLinks = new Dictionary <string, webLink>(); deadLinks = new Dictionary <string, webLink>(); Lock = new object(); webLink rootLink = new webLink(); Stopwatch sw = new Stopwatch(); numDeadLinks = 0; currentWorkers = 0; fileNum = 0; rootLink.link = rootAddress; rootLink.origin = rootAddress; rootLink.depth = 0; setTimer(); sw.Start(); //start thread pool and wait till finish Console.WriteLine("Working...\n"); visitedLinks.Add(rootAddress.AbsoluteUri, rootLink); startThreadTask(rootLink); lock (Lock) { while (!poison) { Monitor.Wait(Lock); } } //get ride of timer and stop stopwatch sw.Stop(); onTheClock.Stop(); onTheClock.Dispose(); //print out the bad links if (visitedLinks.Count > 0) { foreach (KeyValuePair <string, webLink> key in deadLinks) { Console.WriteLine("DeadLinkOrigin: {0}\nDeadLink: {1}\nDepth: {2}\nException: {3}\n", key.Value.origin, key.Key, key.Value.depth, key.Value.exception.Message); } Console.WriteLine("number of links visited: {0}\nnumber of deadLinks: {1}\nElapsedTime: {2}", visitedLinks.Count, numDeadLinks, sw.Elapsed); } else { Console.WriteLine("No Links Visited, RootAddressGiven: '{0}', DepthGiven: '{1}'", rootAddress, maxDepth); } Console.WriteLine("Done"); Console.Read(); }
public static void Consumer(webLink linkToTry) { HttpClient Client = new HttpClient(); FileStream newFile; int num; string FileName; try { lock (Lock) { if (poison) { return; } num = fileNum++; //increment file name for link } var result = Client.GetAsync(linkToTry.link.AbsoluteUri, tokenSource.Token); //Try to download, throws exception if link is dead or doesnt work if (tokenSource.IsCancellationRequested) { tokenSource.Token.ThrowIfCancellationRequested(); } if (result.Result.IsSuccessStatusCode) //Http connection success { if (linkToTry.link.IsFile) { FileName = Path.GetFileName(linkToTry.link.LocalPath) + '_' + num.ToString(); } else { FileName = num.ToString(); } newFile = new FileStream(FileName, FileMode.Create); byte[] webData = result.Result.Content.ReadAsByteArrayAsync().Result; newFile.Write(webData, 0, webData.Length); newFile.Close(); if (linkToTry.depth < maxDepth) //start producing more consumer tasks if maxDepth not reached { Producer(new Tuple <string, webLink>(num.ToString(), linkToTry)); } } else //link is dead or doesnt respond, add to deadlinks and incriment death count. { lock (Lock) { if (!deadLinks.ContainsKey(linkToTry.link.AbsoluteUri)) { linkToTry.exception = result.Result.StatusCode.ToString(); deadLinks.Add(linkToTry.link.AbsoluteUri, linkToTry); numDeadLinks++; } } } } catch (OperationCanceledException e) { PoisonTheWater(); return; } catch (Exception e) { lock (Lock) { if (!deadLinks.ContainsKey(linkToTry.link.AbsoluteUri)) { linkToTry.exception = e.Message.ToString(); deadLinks.Add(linkToTry.link.AbsoluteUri, linkToTry); numDeadLinks++; } } } }