Ejemplo n.º 1
0
    public static void Main(String[] args)
    {
        WebCrawler crawler = new WebCrawler();

        crawler.urlList.Add("http://data.kaohsiung.gov.tw/Opendata/List.aspx");
        crawler.craw();
    }
Ejemplo n.º 2
0
        public void GetTextEmptyTest()
        {
            WebCrawler webpage  = new WebCrawler();
            string     pagetext = webpage.GetText("");

            Assert.AreEqual(pagetext, "");
        }
Ejemplo n.º 3
0
        public void GetTextNotEmptyTest()
        {
            WebCrawler webpage  = new WebCrawler();
            string     pagetext = webpage.GetText("google.com");

            Assert.IsTrue(!String.IsNullOrEmpty(pagetext));
        }
Ejemplo n.º 4
0
        public void TestWebCrawl()
        {
            var req = new WebCrawlJob()
            {
                SeedUrl         = "http://stormwater.wef.org/2016/12/",
                Depth           = 1,
                SourceId        = 103,
                CrawlUrlPattern = new List <string>()
                {
                    //"http://stormwater.wef.org/\\d.*/\\d.*/"
                },
                IndexUrlPattern = new List <string>()
                {
                    "http://stormwater.wef.org/\\d.*/\\d.*/.*",
                },
                TitlePattern = new List <string>()
                {
                    "\\body\\h1"
                },
                SummaryPattern = "/meta[@property='og:description']",
                ContentPattern = new List <string>()
                {
                    "/div[@id='content']"
                },
            };

            var crawler = new WebCrawler <WebCrawlBasePage, WebCrawlerSearchDoc>(req);

            //var results = crawler.Run();
        }
Ejemplo n.º 5
0
        private List <Product> ConvertSearchModelsToDomainModels(WebCrawler crawler)
        {
            var result = new List <Product>();

            foreach (var product in crawler.Products)
            {
                result.Add(new Product
                {
                    Name        = product.Name,
                    Description = product.Description,
                    Category    = new Category {
                        CategoryName = product.Category
                    },
                    Producer = new Producer {
                        ProducerName = product.Producer
                    },
                    Distributor = new Distributor {
                        DistributorName = product.Seller
                    },
                    SourceUrl = product.SourceUrl,
                    TimeStamp = product.TimeStamp,
                    Value     = product.Value,

                    OnSale          = product.OnSale,
                    SaleDeadline    = product.SaleDeadline,
                    SaleDescription = product.SaleDescription,
                    SaleValue       = product.SaleValue
                });
            }

            return(result);
        }
Ejemplo n.º 6
0
        private async void procurar_Click(object sender, EventArgs e)
        {
            if (CheckTextBox())
            {
                string plat = "";
                if (pcRadio.Checked)
                {
                    plat = "pc";
                }
                else if (psRadio.Checked)
                {
                    plat = "psn";
                }
                else if (xboxRadio.Checked)
                {
                    plat = "xbox";
                }
                await WebCrawler.StartCrawlerAsync(plat, idTextBox.Text.Trim());


                if (!IdController.Existe)
                {
                    string message = "Id de jogador não encontrado";
                    string caption = "Erro";
                    alerts.Alert(message, caption);
                }
                else
                {
                    IdController.ContaName = idTextBox.Text.Trim();
                    TrocarForm();
                }
            }
        }
Ejemplo n.º 7
0
        static void Memento(string path)
        {
            CareTaker  careTaker  = CareTaker.RestoreFromFile();
            Originator originator = null;
            WebCrawler webcrawler = WebCrawler.GetInstance();

            if (careTaker != null)
            {
                originator = careTaker.Originator;
            }
            if (careTaker == null)
            {
                var model = webcrawler.LoadFromFile(path);
                originator = new Originator(model);
                careTaker  = new CareTaker(originator);
            }

            //careTaker.Restore("nkbrrk");
            //Console.WriteLine($"Originator stateName {originator.StateName}");

            careTaker.Compare("citmxv");

            //originator.MakeChanges(webcrawler);
            //await careTaker.Save();

            //Console.WriteLine("History");
            //careTaker.ShowHistory();
        }
 public async Task <CrawlResult> GetWebCrawlingResultAsync()
 {
     using (WebCrawler webCrawler = new WebCrawler())
     {
         return(await webCrawler.PerformCrawlingAsync(ConfigData.Depth, ConfigData.RootResources));
     }
 }
Ejemplo n.º 9
0
        public ActionResult ReadAds(Parameters parameters)
        {
            if (!IsLogged())
            {
                return(Unauthorized());
            }

            var urlParameters = new Dictionary <string, string> {
                { "q", parameters.ProductName }
            };

            var baseUrl = string.IsNullOrEmpty(parameters.SearchRegion)
                ? "https://olx.com.br/brasil"
                : $"https://{parameters.SearchRegion}.olx.com.br";


            var webCrawler = new WebCrawler(baseUrl, urlParameters);

            var ads = webCrawler.Read();

            foreach (var ad in ads)
            {
                _adService.Create(ad);
            }

            return(NoContent());
        }
Ejemplo n.º 10
0
        public CrawlerReport AnalyseSite(dynamic site)
        {
            ServicePointManager.SecurityProtocol = SecurityProtocolType.Tls12;
            var startUrl        = new Uri(site.Url);
            var crawlerSettings = BuildCrawlerSettings(startUrl);

            var crawler = new WebCrawler(crawlerSettings);

            try
            {
                crawler.Start();
                while (crawler.IsRunning)
                {
                    Thread.Sleep(500);
                }

                crawler.Report.Save(BuildReportPath());
            }
            catch (Exception ex)
            {
                //logging needs added here
                throw new ApplicationException(string.Format("Error Crawling {0}", startUrl));
            }

            return(crawler.Report);
        }
Ejemplo n.º 11
0
        private void downloadWorker_DoWork(object sender, DoWorkEventArgs e)
        {
            BackgroundWorker backgroundWorker = (BackgroundWorker)sender;

            backgroundWorker.ReportProgress(0);

            SortedSet <ScriptureReference> allReferences = new SortedSet <ScriptureReference>();

            List <string> sitesToSearch = new List <string>(websitesListBox.CheckedItems.OfType <string>());

            int i = 0;

            foreach (string page in sitesToSearch)
            {
                List <ScriptureReference> found = WebCrawler.SearchPageForScriptures(page, out _);

                // Remove references to whole chapters
                found.RemoveAll(x => x.isChapter);

                allReferences.UnionWith(found);
                i++;

                backgroundWorker.ReportProgress((int)(100 * ((float)i / sitesToSearch.Count)));
            }

            e.Result = allReferences;
        }
Ejemplo n.º 12
0
        public async Task <ApkName> GetOrCreateApkName(string packageName)
        {
            var a = _con.ApkNameCacheTable.Where(e => e.PackageName.Equals(packageName));

            if (await a.CountAsync() > 0)
            {
                return(await a.FirstOrDefaultAsync());
            }

            var webCrawler = new WebCrawler();
            var apkName    = await webCrawler.GetName(packageName);

            if (!string.IsNullOrEmpty(apkName))
            {
                var cacheTable = new ApkNameCacheTable
                {
                    Name        = apkName,
                    PackageName = packageName
                };

                _con.ApkNameCacheTable.Update(cacheTable);
                await _con.SaveChangesAsync();

                return(cacheTable);
            }

            return(null);
        }
Ejemplo n.º 13
0
        public void CAN_EXTRACT_BASE_URL_FROM_URL_WITH_FILE_IN_SUBDIRECTORY()
        {
            var objectUnderTest = new WebCrawler(BASE_URL_WITH_FILE_IN_SUBDIRECTORY);
            var result          = objectUnderTest.ExtractBaseUrl(BASE_URL_WITH_FILE_IN_SUBDIRECTORY);

            Assert.AreEqual(BASE_URL, result);
        }
Ejemplo n.º 14
0
        public void CAN_EXTRACT_NOT_CLOSED_BASE_URL()
        {
            var objectUnderTest = new WebCrawler(BASE_URL_NOT_CLOSED);
            var result          = objectUnderTest.ExtractBaseUrl(BASE_URL_NOT_CLOSED);

            Assert.AreEqual(BASE_URL, result);
        }
Ejemplo n.º 15
0
        private void AddCover()
        {
            try
            {
                byte[] cover = WebCrawler.DownloadCover(lnParameters.urlCover);

                PdfImage pic = PdfImage.GetInstance(cover);

                if (pic.Height > pic.Width)
                {
                    //Maximum height is 800 pixels.
                    float percentage = 0.0f;
                    percentage = 700 / pic.Height;
                    pic.ScalePercent(percentage * 100);
                }
                else
                {
                    //Maximum width is 600 pixels.
                    float percentage = 0.0f;
                    percentage = 540 / pic.Width;
                    pic.ScalePercent(percentage * 100);
                }

                pic.Border      = Rectangle.BOX;
                pic.BorderColor = BaseColor.BLACK;
                pic.BorderWidth = 3f;
                pdf.NewPage();
                pdf.Add(pic);
            }
            catch (CoverException)
            {}
        }
Ejemplo n.º 16
0
        public void RETURNS_READABLE_URL_IN_CASE_OF_INVALID_URL()
        {
            var objectUnderTest = new WebCrawler(INVALID_BASE_URL);
            var result          = objectUnderTest.ExtractBaseUrl(INVALID_BASE_URL);

            Assert.AreEqual(READABLE_INVALID_BASE_URL, result);
        }
Ejemplo n.º 17
0
        public void RETURNS_VALID_URL_WITH_PORT()
        {
            var objectUnderTest = new WebCrawler(URL_WITH_PORT);
            var result          = objectUnderTest.ExtractBaseUrl(URL_WITH_PORT);

            Assert.AreEqual(URL_WITH_PORT, result);
        }
Ejemplo n.º 18
0
        public void When_HtmlHasSeveralHyperlinks_Then_ReturnSeveralChildrenNodes()
        {
            // Arrange
            ArrangeMocks(DomainBase, new List <Node>()
            {
                HomeNode, AboutNode, ContactNode
            });
            ArrangeMocks(HomeNode.Uri, new List <Node>()
            {
            });
            ArrangeMocks(AboutNode.Uri, new List <Node>()
            {
            });
            ArrangeMocks(ContactNode.Uri, new List <Node>()
            {
            });
            var target = new WebCrawler(_factoryMock.Object, _httpMock.Object);

            //Act
            var result = target.Crawl(DomainBase);

            // Assert
            Assert.AreEqual(3, result.Nodes.Count);
            Assert.AreEqual(HomeNode.Uri, result.Nodes[0].Uri);
            Assert.AreEqual(AboutNode.Uri, result.Nodes[1].Uri);
            Assert.AreEqual(ContactNode.Uri, result.Nodes[2].Uri);
        }
Ejemplo n.º 19
0
        public void When_PageContainsVisitedNode_Then_DontRepeatVisitedNode()
        {
            // Arrange
            ArrangeMocks(DomainBase, new List <Node>()
            {
                HomeNode, AboutNode
            });
            ArrangeMocks(HomeNode.Uri, new List <Node>()
            {
                BaseNode
            });
            ArrangeMocks(AboutNode.Uri, new List <Node>()
            {
                BaseNode
            });
            var target = new WebCrawler(_factoryMock.Object, _httpMock.Object);

            //Act
            var result = target.Crawl(DomainBase);

            // Assert
            Assert.AreEqual(2, result.Nodes.Count);
            Assert.AreEqual(HomeNode.Uri, result.Nodes[0].Uri);
            Assert.AreEqual(AboutNode.Uri, result.Nodes[1].Uri);
            Assert.AreEqual(0, result.Nodes[0].Nodes[0].Nodes.Count);
            Assert.AreEqual(0, result.Nodes[1].Nodes[0].Nodes.Count);
        }
Ejemplo n.º 20
0
        public void TestRemoveScriptSestionFromHTML()
        {
            var    crawler      = new WebCrawler();
            string inputHTML    = "<html><script>some javascript</script><body><script>some javascript</script></body></html>";
            string requiredHTML = "<html><body></body></html>";

            Assert.IsTrue(crawler.RemoveTagsFromHTML(inputHTML, "script") == requiredHTML);
        }
Ejemplo n.º 21
0
        public void DoJobTest()
        {
            var crawler = new WebCrawler {
                InitLink = "http://dantri.com.vn"
            };

            crawler.DoJob();
        }
Ejemplo n.º 22
0
        public void TestRemoveStyleSestionFromHTML()
        {
            var    crawler      = new WebCrawler();
            string inputHTML    = "<html><style>some tyle</style><body><style>some style</style></body></html>";
            string requiredHTML = "<html><body></body></html>";

            Assert.IsTrue(crawler.RemoveTagsFromHTML(inputHTML, "style") == requiredHTML);
        }
Ejemplo n.º 23
0
 static void Main(string[] args)
 {
     ICrawlingFilterDetail crawlingFilterDetail = new CrawlingFilterDetail("jobdetail-iframe", "src", "/jobdetail");
     ICrawlingStats crawlingStats = new XingCrawlingStats(new[] { "jobdetail" }, crawlingFilterDetail); 
     IResultWriter resultWriter = new ResultWriter(crawlingStats);
     var walter = new WebCrawler(crawlingStats, resultWriter, new Clock());
     var result = walter.Crawl(new Uri("https://www.xn--jobbrse-d1a.com/list/jobtitle/"), @"c:\temp\WalterResult.csv");
 }
Ejemplo n.º 24
0
        public void MakeChanges(WebCrawler webCrawler)
        {
            var model = webCrawler.LoadFromFile(MyHtmlModel.Name);

            MyHtmlModel = model;
            StateName   = GenerateHashSum();
            Console.WriteLine($"Originator StateName changed to {StateName}");
        }
Ejemplo n.º 25
0
        private ConsoleCrawlerApp CreateTarget()
        {
            var crawler    = new WebCrawler(new NodeFactory(new LinkExtractor()), _httpMock.Object);
            var output     = new ConsoleOutputHandler(new TextOutputGenerator(), _fileMock.Object, _consoleMock.Object);
            var consoleApp = new ConsoleCrawlerApp(new ConsoleInputHandler(_clockMock.Object), output, crawler);

            return(consoleApp);
        }
 public WebCrawlerVM()
 {
     ConfigReader.Instance.Read();
     maxNestity    = ConfigReader.Instance.MaxCrawlNestity;
     rootResources = ConfigReader.Instance.RootResources.ToArray();
     webCrawler    = new WebCrawler(maxNestity);
     Clicks        = 0;
 }
Ejemplo n.º 27
0
        // GET: HomePage
        public ActionResult Index()
        {
            var webCrawler = new WebCrawler();

            webCrawler.SearchWeb();

            return(View());
        }
Ejemplo n.º 28
0
        /// <summary>
        /// Creates a new instance of the <see cref="Buildit.Crawler.Console.ConsoleCrawlerApp"/> class.
        /// </summary>
        /// <returns>A new <see cref="Buildit.Crawler.Console.ConsoleCrawlerApp"/> object created.</returns>
        public static IConsoleApp Create()
        {
            var crawler    = new WebCrawler(new NodeFactory(new LinkExtractor()), new SystemHttp());
            var output     = new ConsoleOutputHandler(new TextOutputGenerator(), new SystemFile(), new SystemConsole());
            var consoleApp = new ConsoleCrawlerApp(new ConsoleInputHandler(new SystemClock()), output, crawler);

            return(consoleApp);
        }
Ejemplo n.º 29
0
        static void Main(string[] args)
        {
            #region 调试程序
            //FansCrawler fansCrawler = new FansCrawler(1, 1);
            ////fansCrawler.GetHtmlFromWeiBo(1);
            //fansCrawler.ReadInHtmlContent();
            //List<Fan> fansList = new List<Fan>();
            //StreamReader reader = new StreamReader("content.html");
            //string content = reader.ReadToEnd();
            //fansCrawler.currentHtmlContent = content;
            //fansCrawler.GetInfoFromHtml(fansList);
            //OutputFansList(fansList);
            #endregion

            #region 正式爬取微博程序
            User     user      = new User();
            ICrawler crawler   = null;
            int      taskCount = 278;
            for (int i = 1; i <= taskCount; i = i + 25)
            {
                if (i + 24 <= taskCount)
                {
                    //第二个参数0表示爬取个人主页的微博,1表示爬取自己首页的微博
                    //25是通过实践得到的比较可靠的数字,当单个爬取程序爬取页面数大于25时可能被服务器屏蔽
                    crawler = new WebCrawler(user, PageType.PersonalPage, i, 25);
                }
                else
                {
                    crawler = new WebCrawler(user, PageType.PersonalPage, i, taskCount - i + 1);
                }
                crawler.RunCrawler(user.FeedList);
            }
            Output(user, crawler.Name);
            #endregion

            #region 正式爬取粉丝程序

            //List<Fan> fansList = new List<Fan>();
            //FansAndFollowCrawler fansCrawler = null;
            //int taskCountForFansPage =58;
            //RelateType type = RelateType.Follow;

            //for (int i = 1; i <= taskCountForFansPage; i = i + 25)
            //{
            //    if (i + 24 <= taskCountForFansPage)
            //    {
            //        fansCrawler = new FansAndFollowCrawler(type, i, 25);
            //    }
            //    else
            //    {
            //        fansCrawler = new FansAndFollowCrawler(type, i, taskCountForFansPage - i + 1);
            //    }
            //    fansCrawler.RunCrawler(fansList);
            //}
            //Output(fansList, fansCrawler.Name);
            //Console.WriteLine("关注总数:" + fansList.Count);
            #endregion
        }
Ejemplo n.º 30
0
        public void getPropertiesTest()
        {
            //Arrange
            WebCrawler crawler = new WebCrawler("http://www.bbc.co.uk/news", 2);

            //Assert
            Assert.AreEqual("http://www.bbc.co.uk/news", crawler.root);
            Assert.AreEqual(2, crawler.maxDepth);
        }
Ejemplo n.º 31
0
        static void Main(string[] args)
        {
            Console.WriteLine("Welcome to WebCrawler Mangakakalot");
            Console.WriteLine("Do you want to collect data from web page?\nY|N");
            string ansr = Console.ReadLine().ToLower();

            if (ansr != "y")
            {
                return;
            }
            Console.WriteLine("With great waiting times, comes great loss of patience...");

            //Get data from web page into a list based off a model
            WebCrawler     spider = new WebCrawler();
            List <Popular> pops   = spider.startCrawler().GetAwaiter().GetResult();

            bool smartUser = false;

            while (!smartUser)
            {
                Console.WriteLine("======================");
                Console.WriteLine("What to to with all the collected info?");
                Console.WriteLine("1 - Upload into DataBase");
                Console.WriteLine("2 - Download From DataBase");
                int opt = Convert.ToInt32(Console.ReadLine());
                switch (opt)
                {
                case 1:
                    //Putting each Popular into the database
                    SpiderInsert spider1 = new SpiderInsert();
                    foreach (Popular pop in pops)
                    {
                        spider1.SetIntoDB(pop);
                    }
                    Console.WriteLine("Upload Action Completed");
                    spider1.CloseBD();
                    smartUser = true;
                    break;

                case 2:
                    Console.WriteLine("Not yet Implemented");
                    smartUser = true;
                    break;

                default:
                    Console.WriteLine("Option not calibrated by the system");
                    continue;
                }
            }
            Console.WriteLine("Press Enter to exit program...");
            ConsoleKeyInfo keyInfo = Console.ReadKey(true);

            if (keyInfo.Key == ConsoleKey.Enter)
            {
                System.Environment.Exit(0);
            }
        }
Ejemplo n.º 32
0
        static void Main(string[] args)
        {
            var directoryName = Environment.ExpandEnvironmentVariables(@"%USERPROFILE%\personal\gazeta\krasev");
            //var directoryName = @"D:\gazeta\krasev";

            var webCrawler = new WebCrawler(directoryName);

            const String baseUrl = "http://www.booksite.ru/krassever/";

            var years = webCrawler.ExtractAll(baseUrl + "index.htm", @" href=""(\d\d\d\d\..+?)""");

            foreach (var year in years)
            {
                var yearUrl = baseUrl + year;

                var issues = webCrawler.ExtractAll(yearUrl, @"<a href=""(\d\d\d\d/(?:\w+/)?\d\d\d\d_\d+\.pdf)");

                foreach (var issue in issues)
                {
                    var pdfUrl = baseUrl + issue;

                    var directory = webCrawler.Parse(pdfUrl, @"/(\d\d\d\d)/");
                    var number = webCrawler.Parse(pdfUrl, @"_(\d+)\.").PadLeft(3, '0');

                    var name = webCrawler.Parse(pdfUrl, @"/\d\d\d\d/(?:(\w+)/)\d\d\d\d_");
                    if (!String.IsNullOrEmpty(name))
                    {
                        if (name.Equals("izvestya"))
                        {
                            name = "A_";
                        }
                        else if (name.Equals("krassever"))
                        {
                            name = "B_";
                        }
                        else
                        {
                            throw new Exception();
                        }
                    }

                    var fileName = String.Format(@"{0}\{0}_{2}{1}.pdf", directory, number, name);

                    Console.WriteLine("{0} => {1}", pdfUrl, fileName);

                    webCrawler.AddFile(pdfUrl, fileName);
                }
            }
        }
Ejemplo n.º 33
0
        static void Main(string[] args)
        {
            //var directoryName = Environment.ExpandEnvironmentVariables(@"%USERPROFILE%\personal\gazeta\sovsib");
            var directoryName = @"D:\gazeta\sovsib";

            var webCrawler = new WebCrawler(directoryName);

            const String baseUrl = "http://elib.ngonb.ru";

            var years = webCrawler.ExtractAll("http://elib.ngonb.ru/jspui/handle/NGONB/32", @"<option value=""NGONB/(\d+)"">\d\d\d\d</option>");

            foreach (var year in years)
            {
                var url = String.Format("http://elib.ngonb.ru/jspui/handle/NGONB/{0}/browse?type=dateissued&submit_browse=Issue+Date", year);

                while (true)
                {
                    var issues = webCrawler.ExtractAll(url, @"<a href=""/jspui/handle/NGONB/(\d+)"">(.*?)</a></td>");

                    foreach (var issue in issues)
                    {
                        var issueUrl = "http://elib.ngonb.ru/jspui/handle/NGONB/" + issue;
                        var pdfUrl = webCrawler.Extract(issueUrl, @"""(/jspui/bitstream/NGONB/" + issue + @"(?:.+?).pdf)""");
                        if (!String.IsNullOrEmpty(pdfUrl))
                        {
                            pdfUrl = baseUrl + pdfUrl;

                            var date = webCrawler.Extract(issueUrl, @">(\d\d\d\d-\d\d-\d\d)<");
                            var directory = date.Substring(0, 4);
                            var number = webCrawler.Extract(issueUrl, @">(\d+).pdf<");

                            var fileName = String.Format(@"{0}\{1}_{2}.pdf", directory, date.Replace('-', '_'), number);

                            Console.WriteLine("{0} => {1}", pdfUrl, fileName);

                            webCrawler.AddFile(pdfUrl, fileName);
                        }
                    }

                    // next page
                    url = webCrawler.Extract(url, @"href=""(.+?)"">next");
                    if (String.IsNullOrEmpty(url))
                    {
                        break;
                    }
                    url = baseUrl + url.XmlUnescape();
                }
            }
        }
Ejemplo n.º 34
0
        private void Run(string[] args)
        {
            parseArguments (args);

            if (show_help || urls.Count <= 0) {
                showHelp();
                Environment.Exit(1);
            }

            // run the Crawler on all provided URLs
            foreach (string url in urls) {
                WebCrawler crawl = new WebCrawler (url, depth, cross_domain, debug);
                crawl.RunAsync (200); // run the crawler in async mode (with 20 threads)
            }
            Environment.Exit(0);
        }
Ejemplo n.º 35
0
        public CrawlerReport AnalyseSite(dynamic site)
        {
            var startUrl = new Uri(site.Url);
            var crawlerSettings = BuildCrawlerSettings(startUrl);

            var crawler = new WebCrawler(crawlerSettings);
            try
            {
                crawler.Start();
                while (crawler.IsRunning)
                {
                    Thread.Sleep(500);
                }

                crawler.Report.Save(BuildReportPath());
            }
            catch (Exception ex)
            {
                //logging needs added here
                throw new ApplicationException(string.Format("Error Crawling {0}", startUrl));
            }

            return crawler.Report;
        }
Ejemplo n.º 36
0
 public void DoJobTest()
 {
     var crawler = new WebCrawler {InitLink = "http://dantri.com.vn"};
     crawler.DoJob();
 }
Ejemplo n.º 37
0
        CrawlerReport RunAnalysis(Uri startUrl)
        {
            var settings = new CrawlerSettings(startUrl);
            settings.UseUserAgentForRobots = true;
            settings.ExternalLinkCriteria = ExternalLinkCriteria.SameFolderAndDeeper;
            // Generate a unique name
            var name = settings.Name = "SEOREPORT" + DateTime.Now.ToString("yy-MM-dd hh-mm-ss");

            // Use the same directory as the default used by the UI
            var path = Path.Combine(
                Environment.GetFolderPath(Environment.SpecialFolder.MyDocuments),
                "IIS SEO Reports");
            settings.IgnoreRobots = true;
            settings.IgnoreNoIndex = true;
            settings.IgnoreNoFollow = true;
            settings.Timeout = 200000;
            settings.MaximumLinkCount = MaxPages;
            settings.DirectoryCache = Path.Combine(path, settings.Name);

            // Create a new crawler and start running
            var crawler = new WebCrawler(settings);

            crawler.Start();

            while (crawler.IsRunning)
            {
                Thread.Sleep(2000);
                log.LogMessage("{0,9:N0} - {1,9:N0} - {2,9:N2} MB",
                               crawler.Report.GetUrlCount(),
                               crawler.RemainingUrls,
                               crawler.BytesDownloaded/1048576.0f);
            }

            crawler.Report.Save(Environment.GetFolderPath(Environment.SpecialFolder.MyDocuments) + "/IIS Seo Reports/");

            return crawler.Report;
        }
Ejemplo n.º 38
0
 public void Init()
 {
     _clock.FormattedCurrentTime().Returns(ArbitraryFormattedCurrentTime);
     _clock.FormattedCurrentTime().Returns(ArbitraryFormattedCurrentTime);
     _crawler = new WebCrawler(_crawlingStats, _resultWriter, _clock);
 }
Ejemplo n.º 39
0
 void crawler_CrawlStarted(object sender, WebCrawler.Event.CrawlStartedEventArgs e)
 {
     //throw new NotImplementedException();
 }
Ejemplo n.º 40
0
 void crawler_CrawlAnnounced(object sender, WebCrawler.Event.CrawlAnnouncedEventArgs e)
 {
     this.Dispatcher.BeginInvoke(new UpdateUserInterface(OnCrawlAnnounced), e);
 }