Exemple #1
0
    Program()
    {
        this.Size = new Size(300, 200);
        var b = new Button();

        b.Parent = this;
        b.Text   = "Cancel";
        b.Anchor = AnchorStyles.None;
        b.Click += (object sender, EventArgs e) =>
        {
            tokenSource.Cancel();
            lock (Lock)
            {
                PoisonTheWater();
                Monitor.PulseAll(Lock);
            }
            this.Close();
        };
        b.Left = b.Parent.ClientSize.Width / 2 - b.Width / 2;
        b.Top  = b.Parent.ClientSize.Height / 2 - b.Height / 2;
        this.Show();

        webLink rootLink = new webLink();

        rootLink.link   = rootAddress;
        rootLink.origin = rootAddress;
        rootLink.depth  = 0;

        //start thread pool and wait till finish
        Console.WriteLine("Working...\n");
        setTimer();
        sw.Start();
        startThreadTask(rootLink);
        return;
    }
Exemple #2
0
 public static void startThreadTask(webLink link)
 {
     lock (Lock)
     {
         currentWorkers++;
     }
     ThreadPool.QueueUserWorkItem((si) =>
     {
         Consumer(link);
         lock (Lock)
         {
             currentWorkers--;
             if (currentWorkers == 0)    //last worker poisons everyone else
             {
                 PoisonTheWater();
                 return;
             }
         }
     });
 }
Exemple #3
0
 public static void startThreadTask(webLink link)
 {
     lock (Lock)
     {
         currentWorkers++;
     }
     Task.Run(() =>
     {
         Consumer(link);
         lock (Lock)
         {
             currentWorkers--;
             if (currentWorkers == 0)    //last worker poisons everyone else
             {
                 PoisonTheWater();
                 return;
             }
         }
     }, tokenSource.Token);
 }
Exemple #4
0
    public static void Consumer(webLink linkToTry)
    {
        WebClient Client = new WebClient();
        int       num    = 0;

        try
        {
            lock (Lock)
            {
                if (poison)
                {
                    return;
                }

                num = fileNum++;                                                            //increment file name for link
            }

            Client.DownloadFile(linkToTry.link.AbsoluteUri, num.ToString());                //Try to download, throws exception if link is dead or doesnt work
            if (linkToTry.depth < maxDepth)                                                 //start producing more consumer tasks if maxDepth not reached
            {
                Producer(new Tuple <string, webLink>(num.ToString(), linkToTry));
            }
        }
        catch (Exception e)                                                                 //link is dead or doesnt respond, add to deadlinks and incriment death count.
        {
            lock (Lock)
            {
                if (!deadLinks.ContainsKey(linkToTry.link.AbsoluteUri))
                {
                    linkToTry.exception = e;
                    deadLinks.Add(linkToTry.link.AbsoluteUri, linkToTry);
                    numDeadLinks++;
                }
            }
        }
    }
Exemple #5
0
    public static void Producer()
    {
        //string regMatch = "<\\s*a\\s+[^>]*href\\s*=\\s*['\"][^'\"]*['\"]";
        string                  regMatch = "href\\s*=\\s*(?:[\"'](?<1>[^\"']*)[\"']|(?<1>\\S+))"; //this regex is provided on the microsoft C# documentation website
        string                  data, address = "";
        Regex                   URLmatch = new Regex(regMatch);
        MatchCollection         MC;
        Tuple <string, webLink> link = null;         //fileName, weblink struct
        webLink                 newLink;
        Uri  newAddress;
        bool poisoned = false;

        while (!poisoned)
        {
            try
            {
                //Check the Queue for contents, else wait or die if poisoned
                lock (Lock)
                {
                    while (producerLinks.Count == 0 && poison == false)
                    {
                        Monitor.Wait(Lock);
                    }

                    if (poison)
                    {
                        return;
                    }

                    link = producerLinks.Dequeue();

                    if (visitedLinks.ContainsKey(link.Item2.link.AbsoluteUri))
                    {
                        throw new Exception("Error: Link already added to Dictionary!!! Why is it in Queue!?");
                    }
                    visitedLinks.Add(link.Item2.link.AbsoluteUri, link.Item2);
                    currentWorkers++;
                    //Console.WriteLine("Producer Working on: {0}, depth: {1}", link.Item2.link.AbsoluteUri, link.Item2.depth);
                }

                //read the entire document and match all instances of an address to a collention
                data = File.ReadAllText(link.Item1);
                if (data.Length > 0)
                {
                    MC = URLmatch.Matches(data);

                    foreach (Match m in MC)
                    {
                        int testEqPos = m.Value.IndexOf('=');
                        if (testEqPos > 0)
                        {
                            address = m.Value.Substring(m.Value.IndexOf('=') + 1).Trim();    //get the address after the = and href
                            if (address.Length > 3)
                            {
                                address = address.Substring(1, address.Length - 2);                 //remove the " " from the address
                            }
                            if (address.Contains(link.Item2.link.Host) || address.StartsWith("http") || address.StartsWith("https"))
                            {
                                newAddress = new Uri(address);
                            }
                            else
                            {
                                newAddress = new Uri(link.Item2.link, address);
                            }

                            if (newAddress.Host == link.Item2.link.Host)
                            {
                                newLink        = new webLink();
                                newLink.link   = newAddress;
                                newLink.origin = link.Item2.link;
                                newLink.depth  = link.Item2.depth + 1;

                                lock (Lock)
                                {
                                    //Console.WriteLine("match address: '{0}'", address);
                                    if (!visitedLinks.ContainsKey(newLink.link.AbsoluteUri) && !deadLinks.ContainsKey(newLink.link.AbsoluteUri) && !quedLinks.ContainsKey(newLink.link.AbsoluteUri))
                                    {
                                        quedLinks.Add(newLink.link.AbsoluteUri, newLink);
                                        clientLinks.Enqueue(newLink);
                                        Monitor.PulseAll(Lock);
                                    }
                                }
                            }
                        }
                    }
                }
            }
            catch (Exception e)
            {
                lock (Lock)
                {
                    Console.WriteLine("Error This shouldnt happen, Trying to Read from File:'{0}' for : '{1}'\nAddress:'{2}', Origin:'{3}'\nException: '{4}'\n"
                                      , link.Item1, link.Item2.link.AbsoluteUri, address, link.Item2.link.AbsoluteUri, e.Message);
                    poison = true;
                    Monitor.PulseAll(Lock);
                    return;
                }
            }
            lock (Lock)
            {
                currentWorkers--;
            }
        }
    }
Exemple #6
0
    static void Main(string[] args)
    {
        Uri  rootAddress = null;
        bool gotDistance = false;

        if (args.Length != 0)
        {
            if (args[0].Length > 0)
            {
                try
                {
                    rootAddress = new Uri(args[0]);
                }
                catch (Exception e)
                {
                    ErrorMessageExit("Issue with creating root Uri", e);
                }

                gotDistance = Int32.TryParse(args[1], out maxDepth);
                if (gotDistance == false)
                {
                    maxDepth = 1;
                    //ErrorMessageExit("Please Input a integer for distance!");
                }
            }
        }
        else if (rootAddress == null)
        {
            OpenFileDialog dlg = new OpenFileDialog();
            dlg.Filter = "All files|*.*";
            dlg.ShowDialog();
            rootAddress = new Uri(dlg.FileName.Trim());
            if (rootAddress == null)
            {
                return;
            }
            dlg.Dispose();

            maxDepth = 1;
        }
        else
        {
            ErrorMessageExit("Please Input a URL string!");
        }

        if (gotDistance == false)
        {
            maxDepth = 1;
            //ErrorMessageExit("Please Input a integer for distance!");
        }
        if (maxDepth < 0)
        {
            ErrorMessageExit("Please Input a positive integer for distance!");
        }
        //Console.WriteLine("Args0: {0}, Args1: {1}", args[0], args[1]);
        //Console.WriteLine("Address: {0}, maxDepth: {1}", rootAddress.AbsoluteUri, maxDepth);

        //INITIALLIZE EVERYTHING
        visitedLinks  = new Dictionary <string, webLink>();
        deadLinks     = new Dictionary <string, webLink>();
        quedLinks     = new Dictionary <string, webLink>();
        clientLinks   = new Queue <webLink>();
        producerLinks = new Queue <Tuple <string, webLink> >();
        Threads       = new List <Thread>();
        Lock          = new object();
        webLink rootLink = new webLink();

        setTimer();

        numDeadLinks   = 0;
        currentWorkers = 0;
        fileNum        = 0;

        rootLink.link   = rootAddress;
        rootLink.origin = rootAddress;
        rootLink.depth  = 0;
        clientLinks.Enqueue(rootLink);

        Console.WriteLine("Working...\n");
        //create 4 producers and consumers
        for (int t = 0; t < 4; t++)
        {
            Thread newConsumer = new Thread(() => Consumer());
            Thread newProducer = new Thread(() => Producer());
            newConsumer.Start();
            newProducer.Start();

            Threads.Add(newConsumer);
            Threads.Add(newProducer);
        }
        //wait for the work to be done
        foreach (Thread t in Threads)
        {
            t.Join();
        }

        //get ride of timer
        onTheClock.Stop();
        onTheClock.Dispose();

        //print out the bad links
        if (visitedLinks.Count > 0)
        {
            foreach (KeyValuePair <string, webLink> key in deadLinks)
            {
                Console.WriteLine("DeadLinkOrigin: {0}\nDeadLink: {1}\nDepth: {2}\nException: {3}\n", key.Value.origin, key.Key, key.Value.depth, key.Value.exception.Message);
            }
            Console.WriteLine("number of links visited: {0}\nnumber of deadLinks: {1}\n", visitedLinks.Count, numDeadLinks);
        }
        Console.WriteLine("Done");
        Console.Read();
    }
Exemple #7
0
    public static void Consumer()
    {
        WebClient Client    = new WebClient();
        webLink   linkToTry = new webLink();
        Tuple <string, webLink> linkToAdd = null;
        int  num      = 0;
        bool poisoned = false;

        while (!poisoned)
        {
            try
            {
                lock (Lock)
                {
                    if (clientLinks.Count == 0 && producerLinks.Count == 0 && currentWorkers == 0) //last worker poisons everyone else
                    {
                        poison = true;
                        Monitor.PulseAll(Lock);
                        return;
                    }
                    while (clientLinks.Count == 0 && poison == false)                               //no work, no poison? wait
                    {
                        Monitor.Wait(Lock);
                    }

                    if (poison)                                                                     //Is there poison to partake of and leave?
                    {
                        return;
                    }

                    linkToTry = clientLinks.Dequeue();                                              //no snack, just work, get item to work on
                    currentWorkers++;                                                               //incriment workers
                    num = fileNum++;                                                                //increment file name for link
                    //Console.WriteLine("Consumer working on: {0}\nDepth: {1}", linkToTry.link.AbsoluteUri, linkToTry.depth);
                }

                Client.DownloadFile(linkToTry.link.AbsoluteUri, num.ToString());                    //Try to download, throws exception if link is dead or doesnt work

                if (linkToTry.depth < maxDepth)                                                     //add links file to queue for more work, and wake everyone up
                {
                    if (linkToTry.link.Host == linkToTry.origin.Host)
                    {
                        linkToAdd = new Tuple <string, webLink>(num.ToString(), linkToTry);
                        lock (Lock)
                        {
                            producerLinks.Enqueue(linkToAdd);
                            Monitor.PulseAll(Lock);
                        }
                    }
                }
            }
            catch (Exception e)                                                                     //link is dead or doesnt respond, add to deadlinks and incriment death count.
            {
                lock (Lock)
                {
                    if (!deadLinks.ContainsKey(linkToTry.link.AbsoluteUri))
                    {
                        linkToTry.exception = e;
                        deadLinks.Add(linkToTry.link.AbsoluteUri, linkToTry);
                        numDeadLinks++;
                    }
                }
            }
            lock (Lock)
            {
                currentWorkers--;
            }
        }
    }
Exemple #8
0
    public static void Producer(Tuple <string, webLink> link)
    {
        //string regMatch = "<\\s*a\\s+[^>]*href\\s*=\\s*['\"][^'\"]*['\"]";
        string  regMatch = "href\\s*=\\s*(?:[\"'](?<1>[^\"']*)[\"']|(?<1>\\S+))"; //this regex is provided on the microsoft C# documentation website
        string  data, address = "";
        Regex   URLmatch = new Regex(regMatch);
        webLink newLink;
        Uri     newAddress;

        try
        {
            //Check the Queue for contents, else wait or die if poisoned
            lock (Lock)
            {
                if (poison)
                {
                    return;
                }
            }

            //read the entire document and match all instances of an address to a collention
            data = File.ReadAllText(link.Item1);
            if (data.Length > 0)
            {
                foreach (Match m in URLmatch.Matches(data))
                {
                    int testEqPos = m.Value.IndexOf('=');
                    address = m.Value.Substring(m.Value.IndexOf('=') + 1).Trim();    //get the address after the = and href
                    address = address.Substring(1, address.Length - 2);              //remove the " " from the address

                    if (address.Contains(link.Item2.link.Host) || address.StartsWith("http") || address.StartsWith("https"))
                    {
                        newAddress = new Uri(address);
                    }
                    else
                    {
                        newAddress = new Uri(link.Item2.link, address);
                    }

                    if (newAddress.Host == link.Item2.link.Host)
                    {
                        newLink        = new webLink();
                        newLink.link   = newAddress;
                        newLink.origin = link.Item2.link;
                        newLink.depth  = link.Item2.depth + 1;

                        lock (Lock)
                        {
                            if (!visitedLinks.ContainsKey(newAddress.AbsoluteUri) && !deadLinks.ContainsKey(newLink.link.AbsoluteUri))
                            {
                                visitedLinks.Add(newAddress.AbsoluteUri, newLink);
                                startThreadTask(newLink);
                            }
                        }
                    }
                }
            }
        }
        catch (Exception e)
        {
            lock (Lock)
            {
                Console.WriteLine("Error!!! This shouldnt happen; Trying to Read from File:'{0}' for : '{1}'\nAddress:'{2}', Origin:'{3}'\nException: '{4}'\n"
                                  , link.Item1, link.Item2.link.AbsoluteUri, address, link.Item2.link.AbsoluteUri, e.Message);
                PoisonTheWater();
                return;
            }
        }
    }
Exemple #9
0
    static void Main(string[] args)
    {
        Uri  rootAddress = null;
        bool gotDistance = false;

        if (args.Length != 0)
        {
            if (args[0].Length > 0)
            {
                try
                {
                    rootAddress = new Uri(args[0]);
                }
                catch (Exception e)
                {
                    ErrorMessageExit("Issue with creating root Uri", e);
                }

                gotDistance = Int32.TryParse(args[1], out maxDepth);
                if (gotDistance == false)
                {
                    ErrorMessageExit("Please Input a integer for distance!");
                }
            }
        }
        else if (rootAddress == null)
        {
            OpenFileDialog dlg = new OpenFileDialog();
            dlg.Filter = "All files|*.*";
            dlg.ShowDialog();
            rootAddress = new Uri(dlg.FileName.Trim());
            if (rootAddress == null)
            {
                return;
            }
            dlg.Dispose();

            maxDepth = 1;
        }
        else
        {
            ErrorMessageExit("Please Input a URL string!");
        }

        if (gotDistance == false)
        {
            maxDepth = 1;
            //ErrorMessageExit("Please Input a integer for distance!");
        }
        if (maxDepth < 0)
        {
            ErrorMessageExit("Please Input a positive integer for distance!");
        }

        //INITIALLIZE EVERYTHING
        visitedLinks = new Dictionary <string, webLink>();
        deadLinks    = new Dictionary <string, webLink>();
        Lock         = new object();
        webLink   rootLink = new webLink();
        Stopwatch sw       = new Stopwatch();

        numDeadLinks   = 0;
        currentWorkers = 0;
        fileNum        = 0;

        rootLink.link   = rootAddress;
        rootLink.origin = rootAddress;
        rootLink.depth  = 0;

        setTimer();
        sw.Start();

        //start thread pool and wait till finish
        Console.WriteLine("Working...\n");
        visitedLinks.Add(rootAddress.AbsoluteUri, rootLink);
        startThreadTask(rootLink);
        lock (Lock)
        {
            while (!poison)
            {
                Monitor.Wait(Lock);
            }
        }

        //get ride of timer and stop stopwatch
        sw.Stop();
        onTheClock.Stop();
        onTheClock.Dispose();

        //print out the bad links
        if (visitedLinks.Count > 0)
        {
            foreach (KeyValuePair <string, webLink> key in deadLinks)
            {
                Console.WriteLine("DeadLinkOrigin: {0}\nDeadLink: {1}\nDepth: {2}\nException: {3}\n", key.Value.origin, key.Key, key.Value.depth, key.Value.exception.Message);
            }
            Console.WriteLine("number of links visited: {0}\nnumber of deadLinks: {1}\nElapsedTime: {2}", visitedLinks.Count, numDeadLinks, sw.Elapsed);
        }
        else
        {
            Console.WriteLine("No Links Visited, RootAddressGiven: '{0}', DepthGiven: '{1}'", rootAddress, maxDepth);
        }

        Console.WriteLine("Done");
        Console.Read();
    }
Exemple #10
0
    public static void Consumer(webLink linkToTry)
    {
        HttpClient Client = new HttpClient();
        FileStream newFile;
        int        num;
        string     FileName;

        try
        {
            lock (Lock)
            {
                if (poison)
                {
                    return;
                }

                num = fileNum++;                                                            //increment file name for link
            }

            var result = Client.GetAsync(linkToTry.link.AbsoluteUri, tokenSource.Token);  //Try to download, throws exception if link is dead or doesnt work
            if (tokenSource.IsCancellationRequested)
            {
                tokenSource.Token.ThrowIfCancellationRequested();
            }

            if (result.Result.IsSuccessStatusCode)           //Http connection success
            {
                if (linkToTry.link.IsFile)
                {
                    FileName = Path.GetFileName(linkToTry.link.LocalPath) + '_' + num.ToString();
                }
                else
                {
                    FileName = num.ToString();
                }
                newFile = new FileStream(FileName, FileMode.Create);
                byte[] webData = result.Result.Content.ReadAsByteArrayAsync().Result;

                newFile.Write(webData, 0, webData.Length);
                newFile.Close();

                if (linkToTry.depth < maxDepth)                                                 //start producing more consumer tasks if maxDepth not reached
                {
                    Producer(new Tuple <string, webLink>(num.ToString(), linkToTry));
                }
            }
            else                    //link is dead or doesnt respond, add to deadlinks and incriment death count.
            {
                lock (Lock)
                {
                    if (!deadLinks.ContainsKey(linkToTry.link.AbsoluteUri))
                    {
                        linkToTry.exception = result.Result.StatusCode.ToString();
                        deadLinks.Add(linkToTry.link.AbsoluteUri, linkToTry);
                        numDeadLinks++;
                    }
                }
            }
        }
        catch (OperationCanceledException e)
        {
            PoisonTheWater();
            return;
        }
        catch (Exception e)
        {
            lock (Lock)
            {
                if (!deadLinks.ContainsKey(linkToTry.link.AbsoluteUri))
                {
                    linkToTry.exception = e.Message.ToString();
                    deadLinks.Add(linkToTry.link.AbsoluteUri, linkToTry);
                    numDeadLinks++;
                }
            }
        }
    }