Esempio n. 1
0
        public Report Analyze(int reportId)
        {
            Report report = new Report(reportId);
            report.MainUrl = Url;
            report.RobotsTxt = CheckRobotsTxt(Url);
            Thread.Sleep(SLEEP_TIME);
            report.Error404 = CheckError404(Url);
            Thread.Sleep(SLEEP_TIME);
            report.Redirect = CheckMirror(Url);

            report.mainPageResult = this.AnalyzePage();
            Features result = new Features();
            int count = 0;
            foreach (string page in _pages) {
                try {
                    Analyzer analyzer = new Analyzer(page, false);
                    result = analyzer.AnalyzePage();
                    report.AddCheckedPage(result, page);
                    count++;
                    if (count == MAX_CHILD_PAGE_IN_REPORT) break;
                    Thread.Sleep(SLEEP_TIME);
                }
                catch (Exception ex) {
                    Console.WriteLine(@"method: Report Analyze(...)\n {0}\n,
                        stackTrace{1} ", ex.Message, ex.StackTrace);
                }
            }
            return report;
        }
Esempio n. 2
0
        // Example: analyze one url from database.
        //
        public static void Analyze(Model model)
        {
            string url = "";
            int siteId = -1;
            NpgsqlDataReader urlRow;

            // This part must be locked, otherwise threads will read
            // the same site.
            //
            lock (urlLocker) {
                urlRow = model.GetUrl();

                // If database haven't site to analyze.
                if (!urlRow.Read()) {
                    Console.WriteLine("All sites are processed or "
                        + "processing now.");
                    Thread.Sleep(SLEEP_TIME);
                    return;
                }
                siteId = urlRow.GetInt32(0);
                model.MarkSiteProcessed(siteId);
            }
            url = urlRow.GetString(1);
            Console.WriteLine(url);

            try {
                /*Analyzer analyzer = new Analyzer(url);
                Report report = new Report(model, siteId);
                report = analyzer.Analyze(report.Id);*/
                Report report = new Report(model, siteId);
                report = Analyzer.Analyze(report.Id, url);
                report.PutIntoDB(model, siteId);
                Thread.Sleep(SLEEP_TIME);
            }
            catch (InvalidOperationException ex) {
                Console.WriteLine("Analyze Error: {0}", ex.Message);
                model.MarkSiteFailed(siteId);
            }
            catch (Exception ex) {
                Console.WriteLine("Unknown error: " + ex.Message);
                model.MarkSiteFailed(siteId);
            }
        }
Esempio n. 3
0
        public static Report Analyze(int reportId, string url)
        {
            Site site = new Site();
            site.mainUrl = NormalizeUrl(url);
            if (IsCorrectURL(site.mainUrl)) {
                site.content = GetContent(url);
                site.pages = GetPages(site.content, site.mainUrl);
            }
            else {
                throw new Exception("Uncorrect url");
            }

            Report report = new Report(reportId);
            report.MainUrl = site.mainUrl;
            report.RobotsTxt = CheckRobotsTxt(site.mainUrl);
            Thread.Sleep(1000);
            report.Error404 = CheckError404(site.mainUrl);
            Thread.Sleep(1000);
            report.Redirect = CheckMirror(site.mainUrl);
            report.mainPageResult = AnalyzePage(site);
            int count = 0;
            foreach (string urlPage in site.pages) {
                try {
                    Features result = AnalyzePage(GetContent(urlPage));
                    report.AddCheckedPage(result, urlPage);
                    count++;
                    if (count == MAX_CHILD_PAGE_IN_REPORT) {
                        break;
                    }
                    Thread.Sleep(1000);
                }
                catch (Exception ex) {
                    Console.WriteLine(@"method: Analyzer.Analyze()\n {0}\n,", ex.Message);
                }
            }
            return report;
        }