public Report Analyze(int reportId) { Report report = new Report(reportId); report.MainUrl = Url; report.RobotsTxt = CheckRobotsTxt(Url); Thread.Sleep(SLEEP_TIME); report.Error404 = CheckError404(Url); Thread.Sleep(SLEEP_TIME); report.Redirect = CheckMirror(Url); report.mainPageResult = this.AnalyzePage(); Features result = new Features(); int count = 0; foreach (string page in _pages) { try { Analyzer analyzer = new Analyzer(page, false); result = analyzer.AnalyzePage(); report.AddCheckedPage(result, page); count++; if (count == MAX_CHILD_PAGE_IN_REPORT) break; Thread.Sleep(SLEEP_TIME); } catch (Exception ex) { Console.WriteLine(@"method: Report Analyze(...)\n {0}\n, stackTrace{1} ", ex.Message, ex.StackTrace); } } return report; }
// Example: analyze one url from database. // public static void Analyze(Model model) { string url = ""; int siteId = -1; NpgsqlDataReader urlRow; // This part must be locked, otherwise threads will read // the same site. // lock (urlLocker) { urlRow = model.GetUrl(); // If database haven't site to analyze. if (!urlRow.Read()) { Console.WriteLine("All sites are processed or " + "processing now."); Thread.Sleep(SLEEP_TIME); return; } siteId = urlRow.GetInt32(0); model.MarkSiteProcessed(siteId); } url = urlRow.GetString(1); Console.WriteLine(url); try { /*Analyzer analyzer = new Analyzer(url); Report report = new Report(model, siteId); report = analyzer.Analyze(report.Id);*/ Report report = new Report(model, siteId); report = Analyzer.Analyze(report.Id, url); report.PutIntoDB(model, siteId); Thread.Sleep(SLEEP_TIME); } catch (InvalidOperationException ex) { Console.WriteLine("Analyze Error: {0}", ex.Message); model.MarkSiteFailed(siteId); } catch (Exception ex) { Console.WriteLine("Unknown error: " + ex.Message); model.MarkSiteFailed(siteId); } }
public static Report Analyze(int reportId, string url) { Site site = new Site(); site.mainUrl = NormalizeUrl(url); if (IsCorrectURL(site.mainUrl)) { site.content = GetContent(url); site.pages = GetPages(site.content, site.mainUrl); } else { throw new Exception("Uncorrect url"); } Report report = new Report(reportId); report.MainUrl = site.mainUrl; report.RobotsTxt = CheckRobotsTxt(site.mainUrl); Thread.Sleep(1000); report.Error404 = CheckError404(site.mainUrl); Thread.Sleep(1000); report.Redirect = CheckMirror(site.mainUrl); report.mainPageResult = AnalyzePage(site); int count = 0; foreach (string urlPage in site.pages) { try { Features result = AnalyzePage(GetContent(urlPage)); report.AddCheckedPage(result, urlPage); count++; if (count == MAX_CHILD_PAGE_IN_REPORT) { break; } Thread.Sleep(1000); } catch (Exception ex) { Console.WriteLine(@"method: Analyzer.Analyze()\n {0}\n,", ex.Message); } } return report; }