Example #1
0
        private List <CrawlPlan> GetPagesToBrowserCrawl(string[] hosts)
        {
            var db = CrawlerContext.Create(_dbConStr);

            Console.WriteLine("loading un-browser-crawled pages...");
            List <CrawlPlan> pagesToCrawl;

            pagesToCrawl = db.Uri.Where(o => o.CrawledAt.HasValue && !o.BrowserCrawledAt.HasValue && o.StatusCode == 200

                                        && (o.Scheme == "http" || o.Scheme == "https") &&
                                        hosts.Contains(o.Host) &&
                                        o.Fragment == ""

                                        //&& o.Query == ""

                                        && !o.AbsoluteUri.Contains("c!") &&
                                        !o.AbsoluteUri.Contains("i!") &&
                                        !o.AbsoluteUri.Contains("a!") &&
                                        !o.AbsoluteUri.Contains("p!")

                                        && !o.AbsoluteUri.EndsWith(".png") &&
                                        !o.AbsoluteUri.EndsWith(".gif") &&
                                        !o.AbsoluteUri.EndsWith(".jpg")
                                        )
                           .OrderBy(o => o.Id)
                           .Select(o => new CrawlPlan()
            {
                AbsoluteUri = o.AbsoluteUri
            })
                           .ToList();

            return(pagesToCrawl);
        }
Example #2
0
        //private readonly Dictionary<int, string> _dicUriPages = new Dictionary<int, string>();

        public LightningCrawler(string dbConStr, string startPage, string[] hosts = null, int crawlerThreadCount = 20, int browserCrawlerThreadCount = 5, int browserLoadWait = 0)
        {
            _dbConStr                  = dbConStr;
            _crawlerThreadCount        = crawlerThreadCount;
            _browserCrawlerThreadCount = browserCrawlerThreadCount;
            _browserLoadWait           = browserLoadWait;
            _db           = CrawlerContext.Create(dbConStr);
            _startPageUri = new System.Uri(startPage);
            _hosts        = hosts ?? new string[] { _startPageUri.Host };
        }
Example #3
0
        //private void SaveCrawlResult(CrawlResult crawlResult)
        //{
        //    var stopWatch=new Stopwatch();
        //    stopWatch.Start();

        //    var page = _db.Uri.FirstOrDefault(o => o.AbsoluteUri == crawlResult.AbsoluteUri);

        //    if (page == null) Debugger.Break();

        //    if (page.FailedAt.HasValue)
        //    {
        //        page.FailedAt = crawlResult.FailedAt;
        //        page.FailedException = crawlResult.FailException;
        //        _db.SaveChanges();
        //        return;
        //    }

        //    if (crawlResult.StatusCode != 200)
        //    {
        //        if (crawlResult.LocationAbsoluteUri != null)
        //        {
        //            var destinationPage = _db.Uri.FirstOrDefault(o => o.AbsoluteUri == crawlResult.LocationAbsoluteUri);
        //            if (destinationPage == null)
        //            {
        //                //Console.WriteLine($"\tadding destination page to db...");
        //                destinationPage = NewUriDbModel(new System.Uri(crawlResult.LocationAbsoluteUri));
        //                _db.Uri.Add(destinationPage);
        //                _db.SaveChanges();
        //            }
        //            else
        //            {
        //                //Console.WriteLine($"\tdestination page already exists in db...");
        //            }

        //            //add redirect relations
        //            var redirectRelation = _db.RedirectRelation.FirstOrDefault(o => o.SourceId == page.Id && o.DestinationId == destinationPage.Id);
        //            if (redirectRelation == null)
        //            {
        //                //Console.WriteLine($"\tsaving redirect relation to db...");
        //                redirectRelation = new RedirectRelation()
        //                {
        //                    SourceId = page.Id,
        //                    DestinationId = destinationPage.Id,
        //                    CreatedAt = DateTime.UtcNow,
        //                };
        //                _db.RedirectRelation.Add(redirectRelation);
        //                _db.SaveChanges();
        //            }
        //            else
        //            {
        //                //Console.WriteLine($"\tredirect relation already exists in db...");
        //            }
        //        }
        //    }

        //    if (crawlResult.Doc != null)
        //    {
        //        //find links
        //        var links = crawlResult.Doc.DocumentNode.SelectNodes("//a[@href]");
        //        //Console.WriteLine($"\tfound {links.Count} child links.");
        //        var linksToSave = new List<System.Uri>();
        //        foreach (var link in links)
        //        {
        //            var href = link.Attributes["href"].Value;

        //            //if(href.Contains("fortnite-stats/")) Debugger.Break();

        //            if (href == "" || href.StartsWith("javascript:"))
        //                continue;

        //            var decoded = HttpUtility.HtmlDecode(href);
        //            var childUri = Util.GetUriObjectFromUriString(decoded, page.AbsoluteUri);

        //            //no duplicated links
        //            if (linksToSave.All(o => o.AbsoluteUri != childUri.AbsoluteUri))
        //                linksToSave.Add(childUri);
        //        }

        //        Console.WriteLine($"html links parsed {stopWatch.ElapsedMilliseconds}");

        //        //save links & relations to db
        //        if (linksToSave.Count > 0)
        //        {
        //            //Console.WriteLine($"\tthere are {linksToSave.Count} unique links");
        //            //Console.WriteLine($"\tchecking db for existence...");
        //            var lstUri = linksToSave.Select(o => o.AbsoluteUri).ToList();
        //            var pages = _db.Uri.Where(o => lstUri.Contains(o.AbsoluteUri)).ToList();
        //            //Console.WriteLine($"\t{pages.Count} of them are already in db");

        //            Console.WriteLine($"uri pages fetched {stopWatch.ElapsedMilliseconds}");

        //            //save pages
        //            //Console.WriteLine($"\tsaving pages...");
        //            foreach (var linkToSave in linksToSave)
        //            {
        //                var childPage = pages.FirstOrDefault(p => p.AbsoluteUri == linkToSave.AbsoluteUri);
        //                if (childPage == null)
        //                {
        //                    childPage = NewUriDbModel(linkToSave);
        //                    _db.Uri.Add(childPage);
        //                }
        //            }
        //            _db.SaveChanges();

        //            Console.WriteLine($"uri pages saved {stopWatch.ElapsedMilliseconds}");

        //            //save relations
        //            //Console.WriteLine($"\trefetching {lstUri.Count} pages from db...");
        //            pages = _db.Uri.Where(o => lstUri.Contains(o.AbsoluteUri)).ToList();
        //            //Console.WriteLine($"\tchecking existing relations...");
        //            var relations = _db.Relation.Where(o => o.ParentId == page.Id).ToList();
        //            //Console.WriteLine($"\t{relations.Count} of them are already in db");
        //            //Console.WriteLine($"\tsaving relations...");

        //            Console.WriteLine($"relations fetched {stopWatch.ElapsedMilliseconds}");

        //            foreach (var linkToSave in linksToSave)
        //            {
        //                var childPage = pages.FirstOrDefault(p => p.AbsoluteUri == linkToSave.AbsoluteUri);
        //                var relation = relations.FirstOrDefault(o => o.ChildId == childPage.Id);
        //                if (relation == null)
        //                {
        //                    relation = new Relation()
        //                    {
        //                        ParentId = page.Id,
        //                        ChildId = childPage.Id,
        //                        CreatedAt = DateTime.UtcNow,
        //                    };
        //                    _db.Relation.Add(relation);
        //                }
        //            }
        //            _db.SaveChanges();

        //            Console.WriteLine($"relations saved {stopWatch.ElapsedMilliseconds}");
        //        }

        //        //find canonical
        //        var canonicalLinks = crawlResult.Doc.DocumentNode.SelectNodes("//link[@rel='canonical']");
        //        if (canonicalLinks != null && canonicalLinks.Count > 0)
        //        {
        //            var canonicalLinkValue = canonicalLinks[0].Attributes["href"].Value;
        //            //Console.WriteLine($"\tfound canonical");
        //            page.Canonical = canonicalLinkValue;

        //            Console.WriteLine($"canonical extracted {stopWatch.ElapsedMilliseconds}");
        //        }

        //        page.ContentLength = crawlResult.Doc.ParsedText.Length;
        //    }

        //    page.CrawledAt = crawlResult.CrawledAt;
        //    page.StatusCode = crawlResult.StatusCode;
        //    page.StatusCodeString = crawlResult.StatusCodeStr;
        //    page.TimeTaken = crawlResult.TimeTaken;

        //    _db.SaveChanges();

        //    Console.WriteLine($"everything saved {stopWatch.ElapsedMilliseconds}");
        //}

        //private void CrawlPages(CrawlerContext db, string host)
        //{
        //    //var pagesToCrawl = GetPagesToCrawl(db, _hosts);
        //    var pagesToCrawl = new List<Uri>();

        //    Console.WriteLine($"Found {pagesToCrawl.Count} pages to crawl.");
        //    Console.WriteLine();

        //    var web = new HtmlWeb();
        //    web.CaptureRedirect = true;
        //    web.PostResponse = (request, response) => HtmlWeb_PostResponse(request, response);

        //    for (var i = 0; i < pagesToCrawl.Count; i++)
        //    {
        //        var page = pagesToCrawl[i];
        //        Console.WriteLine($"{i + 1}/{pagesToCrawl.Count} {page.AbsoluteUri}");

        //        var stopWatch = new Stopwatch();
        //        stopWatch.Start();

        //        HtmlDocument doc;
        //        try
        //        {
        //            doc = web.Load(page.AbsoluteUri);
        //        }
        //        catch (Exception e)
        //        {
        //            Console.WriteLine(e);
        //            page.FailedAt = DateTime.UtcNow;
        //            page.FailedException = e.ToString();
        //            db.SaveChanges();
        //            Console.WriteLine();
        //            continue;
        //        }

        //        stopWatch.Stop();

        //        var statusCode = (int)web.StatusCode;
        //        var statusCodeString = web.StatusCode.ToString();
        //        var timeTaken = stopWatch.Elapsed.TotalSeconds;

        //        //not 200 OK
        //        if (web.StatusCode != HttpStatusCode.OK)
        //        {
        //            Console.WriteLine($"\tstatus code = {statusCode}");

        //            //3xx redirect
        //            if (statusCode / 100 == 3)
        //            {
        //                //Console.WriteLine($"\tfound redirect {statusCode} {_redirectLocation}");
        //                var locationUri = GetUriObjectFromUriString(_redirectLocation, page.AbsoluteUri);
        //                Console.WriteLine($"\tfound new location {locationUri.AbsoluteUri}");

        //                //var linkKey = locationUri.AbsoluteUri.TruncateMax(MAX_URI_LEN);

        //                //add destination to db
        //                var destinationPage = db.Uri.FirstOrDefault(o => o.AbsoluteUri == locationUri.AbsoluteUri);
        //                if (destinationPage == null)
        //                {
        //                    Console.WriteLine($"\tadding destination page to db...");
        //                    destinationPage = NewUriDbModel(locationUri);
        //                    db.Uri.Add(destinationPage);
        //                    db.SaveChanges();
        //                }
        //                else
        //                    Console.WriteLine($"\tdestination page already exists in db...");

        //                //add redirect relations
        //                var redirectRelation = db.RedirectRelation.FirstOrDefault(o =>
        //                    o.SourceId == page.Id && o.DestinationId == destinationPage.Id);
        //                if (redirectRelation == null)
        //                {
        //                    Console.WriteLine($"\tsaving redirect relation to db...");
        //                    redirectRelation = new RedirectRelation()
        //                    {
        //                        SourceId = page.Id,
        //                        DestinationId = destinationPage.Id,
        //                        CreatedAt = DateTime.UtcNow,
        //                    };
        //                    db.RedirectRelation.Add(redirectRelation);
        //                    db.SaveChanges();
        //                }
        //                else
        //                    Console.WriteLine($"\tredirect relation already exists in db...");
        //            }

        //            //save page info
        //            page.CrawledAt = DateTime.UtcNow;
        //            page.StatusCodeString = statusCodeString;
        //            page.StatusCode = statusCode;
        //            page.TimeTaken = (decimal?)timeTaken;
        //            db.SaveChanges();
        //            Console.WriteLine();
        //            continue;
        //        }

        //        //not a document
        //        if (doc.ParsedText == null)
        //        {
        //            page.CrawledAt = DateTime.UtcNow;
        //            page.StatusCodeString = statusCodeString;
        //            page.StatusCode = statusCode;
        //            page.TimeTaken = (decimal?)timeTaken;
        //            db.SaveChanges();
        //            Console.WriteLine();
        //            continue;
        //        }

        //        var links = doc.DocumentNode.SelectNodes("//a[@href]");

        //        Console.WriteLine($"\tfound {links.Count} child links.");

        //        var linksToSave = new List<System.Uri>();
        //        foreach (var link in links)
        //        {
        //            var href = link.Attributes["href"].Value;

        //            if (href == "" || href.StartsWith("javascript:"))
        //                continue;

        //            var decoded = HttpUtility.HtmlDecode(href);
        //            //if (decoded!= href)
        //            //    Debugger.Break();

        //            var childUri = GetUriObjectFromUriString(decoded, page.AbsoluteUri);

        //            //Console.WriteLine($"\t{href}\r\n\t{childUri.AbsoluteUri}");
        //            //Console.WriteLine();

        //            //no duplicated links
        //            if (linksToSave.All(o => o.AbsoluteUri != childUri.AbsoluteUri))
        //                linksToSave.Add(childUri);
        //        }

        //        //save links & relations to db
        //        if (linksToSave.Count > 0)
        //        {
        //            //var uriKeyStrings = linksToSave.Select(o => o.AbsoluteUri.TruncateMax(MAX_URI_LEN)).Distinct().ToList();
        //            Console.WriteLine($"\tthere are {linksToSave.Count} unique links"
        //                              //+$", {uriKeyStrings.Count} unique uri keys"
        //                              );

        //            Console.WriteLine($"\tchecking db for existence...");
        //            var lstUri = linksToSave.Select(o => o.AbsoluteUri).ToList();
        //            var pages = db.Uri.Where(o => lstUri.Contains(o.AbsoluteUri)).ToList();
        //            //var childRelation = db.Relation.Where(o => o.ParentId == page.Id).ToList();
        //            Console.WriteLine($"\t{pages.Count} of them are already in db");

        //            //save pages
        //            Console.WriteLine($"\tsaving pages...");
        //            //var keysAdded = new List<string>();
        //            foreach (var linkToSave in linksToSave)
        //            {
        //                //var uriKey = linkToSave.AbsoluteUri.TruncateMax(MAX_URI_LEN).ToLower();

        //                ////not add uris that are different but with same unique keys
        //                //if (keysAdded.Contains(uriKey))
        //                //    continue;

        //                var childPage = pages.FirstOrDefault(p => p.AbsoluteUri == linkToSave.AbsoluteUri);

        //                if (childPage == null)
        //                {
        //                    childPage = NewUriDbModel(linkToSave);

        //                    //if (linkToSave.AbsoluteUri.Length > MAX_URI_LEN)
        //                    //    childPage.FullAbsoluteUri = linkToSave.AbsoluteUri;

        //                    db.Uri.Add(childPage);
        //                    //keysAdded.Add(uriKey);
        //                }
        //            }
        //            db.SaveChanges();

        //            //save relations
        //            Console.WriteLine($"\trefetching {lstUri.Count} pages from db...");
        //            pages = db.Uri.Where(o => lstUri.Contains(o.AbsoluteUri)).ToList();
        //            Console.WriteLine($"\tchecking existing relations...");
        //            var relations = db.Relation.Where(o => o.ParentId == page.Id).ToList();
        //            Console.WriteLine($"\t{relations.Count} of them are already in db");
        //            Console.WriteLine($"\tsaving relations...");
        //            foreach (var linkToSave in linksToSave)
        //            {
        //                //var uriKey = linkToSave.AbsoluteUri.TruncateMax(MAX_URI_LEN);

        //                var childPage = pages.FirstOrDefault(p => p.AbsoluteUri == linkToSave.AbsoluteUri);
        //                var relation = relations.FirstOrDefault(o => o.ChildId == childPage.Id);
        //                if (relation == null)
        //                {
        //                    relation = new Relation()
        //                    {
        //                        ParentId = page.Id,
        //                        ChildId = childPage.Id,
        //                        CreatedAt = DateTime.UtcNow,
        //                    };
        //                    db.Relation.Add(relation);
        //                }
        //            }

        //            db.SaveChanges();
        //        }

        //        //find canonical
        //        var canonicalLinks = doc.DocumentNode.SelectNodes("//link[@rel='canonical']");
        //        if (canonicalLinks != null && canonicalLinks.Count > 0)
        //        {
        //            var canonicalLinkValue = canonicalLinks[0].Attributes["href"].Value;
        //            Console.WriteLine($"\tfound canonical"
        //                              //+$": {canonicalLinkValue}"
        //                              );
        //            page.Canonical = canonicalLinkValue;
        //        }

        //        //save page info
        //        var contentLength = doc.ParsedText.Length;
        //        var content = doc.ParsedText;
        //        page.CrawledAt = DateTime.UtcNow;
        //        //page.Content = content;
        //        page.ContentLength = contentLength;
        //        page.StatusCodeString = statusCodeString;
        //        page.StatusCode = statusCode;
        //        page.TimeTaken = (decimal?)timeTaken;

        //        db.SaveChanges();

        //        Console.WriteLine();
        //    }
        //}

        private List <CrawlPlan> GetPagesToCrawl(string[] hosts)
        {
            var db = CrawlerContext.Create(_dbConStr);

            Console.WriteLine("loading uncrawled pages...");
            List <CrawlPlan> pagesToCrawl;

            //pagesToCrawl = db.Uri.Where(o => !o.CrawledAt.HasValue
            //                                 && hosts.Contains(o.Host)
            //                                 && o.Fragment == ""
            //                                 && o.Query == ""
            //                                 && !o.AbsoluteUri.Contains("c!")
            //                                 && !o.AbsoluteUri.Contains("i!")
            //                                 && !o.AbsoluteUri.Contains("a!")
            //                                 && !o.AbsoluteUri.Contains("p!")
            //                                 && !o.AbsoluteUri.EndsWith(".png")
            //                                 && !o.AbsoluteUri.EndsWith(".gif")
            //                                 && !o.AbsoluteUri.EndsWith(".jpg")
            //    )
            //    .OrderBy(o => o.Id)
            //    .Select(o => new CrawlPlan()
            //    {
            //        AbsoluteUri = o.AbsoluteUri
            //    })
            //    .ToList();

            //if (pagesToCrawl.Count < 100)
            //{
            //    Console.WriteLine("loading uncrawled pages (2)...");
            //    pagesToCrawl = db.Uri.Where(o => !o.CrawledAt.HasValue
            //                                     && hosts.Contains(o.Host)
            //                                     && o.Fragment == ""
            //                                     && o.Query == ""

            //                                     //&& !o.AbsoluteUri.Contains("c!")
            //                                     //&& !o.AbsoluteUri.Contains("i!")
            //                                     //&& !o.AbsoluteUri.Contains("a!")
            //                                     //&& !o.AbsoluteUri.Contains("p!")
            //                                     && !o.AbsoluteUri.EndsWith(".png")
            //                                     && !o.AbsoluteUri.EndsWith(".gif")
            //                                     && !o.AbsoluteUri.EndsWith(".jpg")
            //        )
            //        .OrderBy(o => o.Id)
            //        .Select(o => new CrawlPlan()
            //        {
            //            AbsoluteUri = o.AbsoluteUri
            //        })
            //        .ToList();
            //}

            //if (pagesToCrawl.Count < 100)
            //{
            //Console.WriteLine("loading uncrawled pages (3)...");


            pagesToCrawl = db.Uri.Where(o => !o.CrawledAt.HasValue &&
                                        (o.Scheme == "http" || o.Scheme == "https") &&
                                        hosts.Contains(o.Host) &&
                                        o.Fragment == ""

                                        //&& o.Query == ""

                                        //&& !o.AbsoluteUri.Contains("c!")
                                        //&& !o.AbsoluteUri.Contains("i!")
                                        //&& !o.AbsoluteUri.Contains("a!")
                                        //&& !o.AbsoluteUri.Contains("p!")
                                        && !o.AbsoluteUri.EndsWith(".png") &&
                                        !o.AbsoluteUri.EndsWith(".gif") &&
                                        !o.AbsoluteUri.EndsWith(".jpg")
                                        )
                           .OrderBy(o => o.Id)
                           .Select(o => new CrawlPlan()
            {
                AbsoluteUri = o.AbsoluteUri
            })
                           .ToList();


            //}

            //recrawl 5xx pages
            //pagesToCrawl = db.Uri.Where(o => o.CrawledAt.HasValue && o.StatusCode.ToString().StartsWith("5")
            //                                 && (o.Scheme == "http" || o.Scheme == "https")
            //                                 && hosts.Contains(o.Host)
            //                                 && o.Fragment == ""

            //                                 //&& o.Query == ""

            //                                 //&& !o.AbsoluteUri.Contains("c!")
            //                                 //&& !o.AbsoluteUri.Contains("i!")
            //                                 //&& !o.AbsoluteUri.Contains("a!")
            //                                 //&& !o.AbsoluteUri.Contains("p!")
            //                                 && !o.AbsoluteUri.EndsWith(".png")
            //                                 && !o.AbsoluteUri.EndsWith(".gif")
            //                                 && !o.AbsoluteUri.EndsWith(".jpg")
            //    )
            //    .OrderBy(o => o.Id)
            //    .Select(o => new CrawlPlan()
            //    {
            //        AbsoluteUri = o.AbsoluteUri
            //    })
            //    .ToList();

            return(pagesToCrawl);
        }
        //const int MAX_URI_LEN = 450;
        static void Main(string[] args)
        {
            ThreadPool.SetMinThreads(1000, 1000);

            IConfiguration Configuration = new ConfigurationBuilder()
                                           .AddJsonFile("appsettings.json", optional: true, reloadOnChange: true)
                                           .Build();


            if (args.Length > 0 && args[0] == "indexer")
            {
                var esConfig = Configuration.GetSection("ESConnection");
                var settings = new ConnectionSettings(new System.Uri(esConfig["Host"]))
                               .DefaultIndex("uri").BasicAuthentication(esConfig["Username"], esConfig["Password"]);
                var elasticClient = new ElasticClient(settings);
                //var uri = new Uri { AbsoluteUri = "key1", BrowserContent = "<fkjaslkdf>a sdlfjlasjdflsM</asdf> lkafsjiw fasd fjl<a></a>", CrawledAt = DateTime.UtcNow };
                //var indexResponse = elasticClient.IndexDocument(uri);
                var db = CrawlerContext.Create(Configuration.GetConnectionString("CrawlerDatabase"));
                Console.WriteLine($"Fetching db data...");
                var uriDocuments = db.Uri.Where(o => o.Content != null)
                                   .OrderBy(o => o.Id)
                                   .Select(o => new UriDocument()
                {
                    AbsoluteUri       = o.AbsoluteUri,
                    BrowserHtml       = o.BrowserContent ?? o.Content,
                    Id                = o.Id,
                    OriginalUriString = o.OriginalString,
                })
                                   .ToList();
                Console.WriteLine($"Parsing html and generating doc text...");
                var htmlDoc = new HtmlDocument();
                foreach (var doc in uriDocuments)
                {
                    htmlDoc.LoadHtml(doc.BrowserHtml);
                    var htmlBody = htmlDoc.DocumentNode.SelectSingleNode("//html");
                    doc.BrowserText = htmlBody.InnerText.Trim();
                }
                Console.WriteLine($"Saving to ES...");
                var bulkAllObservable = elasticClient.BulkAll(uriDocuments, b => b
                                                              .Index("uri")
                                                              // how long to wait between retries
                                                              .BackOffTime("30s")
                                                              // how many retries are attempted if a failure occurs
                                                              .BackOffRetries(2)
                                                              // refresh the index once the bulk operation completes
                                                              .RefreshOnCompleted()
                                                              // how many concurrent bulk requests to make
                                                              .MaxDegreeOfParallelism(Environment.ProcessorCount)
                                                              // number of items per bulk request
                                                              .Size(1000)
                                                              //.RetryDocumentPredicate((item, person) =>
                                                              //{
                                                              //    // decide if a document should be retried in the event of a failure
                                                              //    return item.Error.Index == "even-index" && person.FirstName == "Martijn";
                                                              //})
                                                              .DroppedDocumentCallback((item, uri) =>
                {
                    // if a document cannot be indexed this delegate is called
                    Console.WriteLine($"Unable to index: {item} {uri}");
                })
                                                              )
                                        // Perform the indexing, waiting up to 15 minutes.
                                        // Whilst the BulkAll calls are asynchronous this is a blocking operation
                                        .Wait(TimeSpan.FromMinutes(15), next =>
                {
                    // do something on each response e.g. write number of batches indexed to console
                    Console.WriteLine($"ES Bulked Items: {next.Items.Count}");
                    Console.WriteLine(
                        $"{next.Items.GroupBy(o => o.Result).Select(o => o.Key + ":" + o.Count()).Aggregate((o, n) => o + " " + n)}");
                });
                //Console.ReadKey();
                return;
            }



            //var db = CrawlerContext.Create(Configuration.GetConnectionString("CrawlerDatabase"));
            //var u1 = new Uri { AbsoluteUri = "key1" };
            //var u2 = new Uri { AbsoluteUri = "key2" };
            //var u3 = new Uri { AbsoluteUri = "key1" };
            //var list = new List<Uri> { u1, u2, u3 };

            ////db.Uri.Add(u1);
            ////db.Uri.Add(u2);
            ////db.SaveChanges();
            ////db.BulkInsertOrUpdate(list);

            //db.BulkMerge(list, options => options.ColumnPrimaryKeyExpression = o => o.AbsoluteUri);



            const string startPage = "https://www.domain.com/";
            //var browserWebCrawler = new BrowserWebCrawler();
            //var crawlResult = browserWebCrawler.CrawlPage(new CrawlPlan() {AbsoluteUri = "https://www.playerauctions.com/wow-account"});


            //var staticWebCrawler = new StaticWebCrawler();
            //var crawlResult = staticWebCrawler.CrawlPage(new CrawlPlan() { AbsoluteUri = "https://www.playerauctions.com/wow-gold/" });



            var crawler = new LightningCrawler(Configuration.GetConnectionString("CrawlerDatabase"), startPage, new string[]
            {
                "www.domain.com",
                "subdomain1.domain.com",
                "subdomain2.domain.com",
            },
                                               20,
                                               2,
                                               0);

            crawler.Run();
        }