Exemplo n.º 1
0
        /// <summary>
        /// Starts spider to get the list of all relevant urls
        /// Than analyses pages recognized as a record page
        /// </summary>
        /// <param name="firstUrl"></param>
        private void startSpider(string firstUrl)
        {
            WebSpider spider = new WebSpider(firstUrl, REQUIRED_URL_BODY, NUMBER_OF_PAGES_LIMIT);

            spider.Execute();

            StringBuilder builder = new StringBuilder();
            int           i       = 0;

            foreach (DictionaryEntry entry in spider.WebPages)
            {
                var page = ((System.Uri)entry.Key).ToString();
                if (!page.Contains(REQUIRED_URL_BODY_WITHOUT))
                {
                    continue;
                }

                if (page.Contains(RECORD_PAGE_IDENTIFIER))
                {
                    analyzeContent(page);
                }

                builder.AppendLine(page);

                i++;
                if (i % 10 == 0)
                {
                    Console.Out.WriteLine(string.Format("Progress {0} %", Math.Round((i / (double)spider.WebPages.Count * 100)), 2));
                }
            }

            File.WriteAllText(outputDirectory + LOG_NAME, builder.ToString());
            Console.Out.WriteLine("Processed urls: " + collectionIndex);
        }
Exemplo n.º 2
0
        public Page FetchPage(string template, string url)
        {
            WebSpider spider = new WebSpider();
            var       page   = Page.GetFromFile(template);

            page.Value = url;
            spider.FetchData(page);
            ValidatePages(spider.Pages);
            return(spider.Pages.Find(x => x.Value.Equals(url)));
        }
Exemplo n.º 3
0
        static void LaunchSpider(string source, int max_count = 5000)
        {
            WebSpider spider = new WebSpider();

            spider.SetSourcePage(source);

            while (spider.CountToVisit > 0 && spider.Count < max_count)
            {
                WEBPAGE page = spider.SingleStep();
            }
        }
Exemplo n.º 4
0
        public override int Run()
        {
            Uri uri;

            url = url.StartsWith("http://", StringComparison.OrdinalIgnoreCase) ||
                  url.StartsWith("https://", StringComparison.OrdinalIgnoreCase)
                      ? url
                      : string.Format(CultureInfo.InvariantCulture, "http://{0}", url);

            if (!Uri.TryCreate(url, UriKind.Absolute, out uri))
            {
                Logger.Error(CultureInfo.CurrentUICulture, Resources.ParseUrlRunCantCreateUriError);
                return(-1);
            }

            int count = -1;

            if (!string.IsNullOrEmpty(number) && (!Int32.TryParse(number, out count) || count < 1))
            {
                Logger.Error(Resources.ParseUrlRunNotIntegerError, number);
                return(-1);
            }

            if (!string.IsNullOrEmpty(outputFileFormat) && FileFormat == Report.OutputFileFormat.None)
            {
                Logger.Error(Resources.ParseUrlRunUnsupportedFormatError, outputFileFormat);
                return(-1);
            }

            if (string.IsNullOrEmpty(username) != string.IsNullOrEmpty(password))
            {
                Logger.Error("Username and Password should be both set.");
                return(-1);
            }


            WebSpiderOptions options = new WebSpiderOptions
            {
                UriProcessedCountMax = count,
                ShowSuccessUrls      = !errorsOnly,
                Username             = username,
                Password             = password,
                Domain = domain
            };

            Report report = new WebSpider(uri, options).Execute();

            if (FileFormat != Report.OutputFileFormat.None)
            {
                report.SaveReport(FileFormat, htmlTemplate);
            }
            return(0);
        }
Exemplo n.º 5
0
        public void StartForm(string formName)
        {
            switch (formName)
            {
            case "WebSpider":
                this.Invoke(new Action(() => {
                    if (_formWebSpider == null)
                    {
                        _formWebSpider           = new WebSpider();
                        _formWebSpider.MdiParent = this;
                        _formWebSpider.Show();
                    }
                    else
                    {
                        if (_formWebSpider.IsDisposed)
                        {
                            _formWebSpider           = new WebSpider();
                            _formWebSpider.MdiParent = this;
                            _formWebSpider.Show();
                        }
                        else
                        {
                            _formWebSpider.Activate();
                        }
                    }
                }));
                break;

            case "ScreenConsole":
                this.Invoke(new Action(() => {
                    if (_formScreenConsole == null)
                    {
                        _formScreenConsole           = new ScreenConsole();
                        _formScreenConsole.MdiParent = this;
                        _formScreenConsole.Show();
                    }
                    else
                    {
                        if (_formScreenConsole.IsDisposed)
                        {
                            _formScreenConsole           = new ScreenConsole();
                            _formScreenConsole.MdiParent = this;
                            _formScreenConsole.Show();
                        }
                        else
                        {
                            _formScreenConsole.Activate();
                        }
                    }
                }));
                break;
            }
        }
Exemplo n.º 6
0
        public RunSpider(string uri, string baseUri, int maxUri)
        {
            Spider = new WebSpider(uri, baseUri, maxUri);
            Spider.Execute( );

            ICollection webPages = Spider.WebPages.Values;

            Pages = new WebPageState[webPages.Count];

            int index = 0;

            foreach (WebPageState webPage in webPages)
            {
                Pages[index++] = webPage;
            }
        }
Exemplo n.º 7
0
        public List <Page> FetchPages(Dictionary <string, string> para)
        {
            WebSpider spider = new WebSpider();
            var       pages  = para.Select(
                x => {
                var page   = Page.GetFromFile(x.Value);
                page.Value = x.Key;
                return(page);
            }).ToList();

            foreach (var page in pages)
            {
                spider.FetchData(page);
            }
            ValidatePages(spider.Pages);
            return(spider.Pages);
        }
Exemplo n.º 8
0
        public void WebSpider_Constructor_PagesNotNull()
        {
            WebSpider spider = new WebSpider();

            Assert.IsNotNull(spider.Pages);
        }