private Page fetchNextPage(BackQueue backQueue, Uri currentUrl)
        {
            String host = currentUrl.GetLeftPart(UriPartial.Authority);

            while (backQueue.EnoughTimeHasPassed(host, DateTime.Now) == false)
            {
                Thread.Yield();
            }

            Page newPage = new Page(currentUrl);

            parser.AddHtmlToPage(newPage);
            if (String.IsNullOrEmpty(newPage.Html))
            {
                return(null);
            }

            parser.AddBodyToPage(newPage);
            if (String.IsNullOrEmpty(newPage.SiteText))
            {
                return(null);
            }

            parser.AddPathsToPage(newPage);
            return(newPage);
        }
 private void initialiseSeed()
 {
     foreach (Uri seed in initialSeeds)
     {
         String    domain    = seed.GetLeftPart(UriPartial.Authority);
         BackQueue backQueue = new BackQueue(domain);
         backQueue.Enqueue(seed);
         BackQueues.TryAdd(Interlocked.Increment(ref i), backQueue);
     }
 }
        private void NewMethod()
        {
            var random = new Random();

            while (_webGraph.Count() < _numberOfPages)
            {
                BackQueue b       = new BackQueue(null);           //dummy
                Uri       nextUrl = new Uri("https://www.aau.dk"); // dummy

                if (BackQueues.Count() == 0)
                {
                    continue;
                }

                b = BackQueues[random.Next(0, BackQueues.Count)];

                if (b.Count == 0 || !b.TryPeek(out nextUrl))
                {
                    continue;
                }
                if (_webGraph.ToList().Where(x => x.Url == nextUrl).Count() > 0)
                {
                    continue;
                }

                //2. Fetch next page from URL in queue
                Page newPage = fetchNextPage(b, nextUrl);
                if (newPage is null)
                {
                    continue;
                }

                addToWebGraph(newPage);

                //For each extracted URL
                //• Obey robots.txt (freshness caveat)
                //c. Check that not already in frontier
                var paths = newPage.OutLinks.Where(x => b.RobotsAreObeyed(x) &&
                                                   b.Contains(x) == false);
                //d. Add to frontier if passing tests

                addPathToFrontierIfTestsPassed(b, paths);
            }
            //5. Delete or re-prioritize current URL from queue
        }
        private void addPathToFrontierIfTestsPassed(BackQueue b, IEnumerable <Uri> paths)
        {
            foreach (Uri path in paths)
            {
                String pathDomain = path.GetLeftPart(UriPartial.Authority);

                if (pathDomain.Equals(b.Domain))
                {
                    b.Enqueue(path);
                }

                else if (BackQueues.Any(x => x.Value.Domain.Equals(pathDomain)))
                {
                    BackQueues.First(x => x.Value.Domain.Equals(pathDomain)).Value.Enqueue(path);
                }

                else
                {
                    BackQueue newQueue = new BackQueue(pathDomain);
                    newQueue.Enqueue(path);
                    BackQueues.TryAdd(Interlocked.Increment(ref i), newQueue);
                }
            }
        }