public Report Analyze(int reportId) { Report report = new Report(reportId); report.MainUrl = Url; report.RobotsTxt = CheckRobotsTxt(Url); Thread.Sleep(SLEEP_TIME); report.Error404 = CheckError404(Url); Thread.Sleep(SLEEP_TIME); report.Redirect = CheckMirror(Url); report.mainPageResult = this.AnalyzePage(); Features result = new Features(); int count = 0; foreach (string page in _pages) { try { Analyzer analyzer = new Analyzer(page, false); result = analyzer.AnalyzePage(); report.AddCheckedPage(result, page); count++; if (count == MAX_CHILD_PAGE_IN_REPORT) break; Thread.Sleep(SLEEP_TIME); } catch (Exception ex) { Console.WriteLine(@"method: Report Analyze(...)\n {0}\n, stackTrace{1} ", ex.Message, ex.StackTrace); } } return report; }
// Example: analyze one url from database. // public static void Analyze(Model model) { string url = ""; int siteId = -1; NpgsqlDataReader urlRow; // This part must be locked, otherwise threads will read // the same site. // lock (urlLocker) { urlRow = model.GetUrl(); // If database haven't site to analyze. if (!urlRow.Read()) { Console.WriteLine("All sites are processed or " + "processing now."); Thread.Sleep(SLEEP_TIME); return; } siteId = urlRow.GetInt32(0); model.MarkSiteProcessed(siteId); } url = urlRow.GetString(1); Console.WriteLine(url); try { Analyzer analyzer = new Analyzer(url); Report report = new Report(model, siteId); report = analyzer.Analyze(report.Id); report.PutIntoDB(model, siteId); Thread.Sleep(SLEEP_TIME); } catch (InvalidOperationException ex) { Console.WriteLine("Analyze Error: {0}", ex.Message); model.MarkSiteFailed(siteId); } catch (Exception ex) { Console.WriteLine("Unknown error: " + ex.Message); model.MarkSiteFailed(siteId); } }