static List <Report> get_reports(DbConnection dbc) { List <Report> reports = new List <Report>(); foreach (Record crawler in dbc.Get("SELECT * FROM Crawlers WHERE State<>" + (int)Crawler.State.DISABLED).GetRecordset()) { Report report = new Report(); reports.Add(report); report.Source = (string)crawler["Id"]; report.SourceType = ReportSourceType.CRAWLER; DateTime earliest_start_time = DateTime.Now.AddSeconds(-(int)crawler["RunTimeSpan"]); Record start_message = dbc["SELECT * FROM Messages WHERE Source=@Source AND Value LIKE '" + CrawlerApi.MessageMark.STARTED + "%' ORDER BY Time DESC"].GetFirstRecord("@Source", crawler["Id"]); Crawler.SessionState state = (Crawler.SessionState)(int) dbc["SELECT _LastSessionState FROM Crawlers WHERE Id=@Id"].GetSingleValue("@Id", crawler["Id"]); if (start_message == null || (DateTime)start_message["Time"] < earliest_start_time) { if (state == Crawler.SessionState.STARTED) { report.MessageType = Log.MessageType.WARNING; report.Value = "LONG WORK"; report.Details = "Works longer than its RunTimeSpane"; continue; } report.MessageType = Log.MessageType.ERROR; report.Value = "NO START"; report.Details = "Not started within its RunTimeSpan"; continue; } Record end_message = dbc["SELECT * FROM Messages WHERE Source=@Source AND (Value LIKE '" + CrawlerApi.MessageMark.ABORTED + "%' OR Value LIKE '" + CrawlerApi.MessageMark.UNCOMPLETED + "%' OR Value LIKE '" + CrawlerApi.MessageMark.COMPLETED + "%') ORDER BY Time DESC"].GetFirstRecord("@Source", crawler["Id"]); if (end_message == null) { if (state == Crawler.SessionState.KILLED) { report.MessageType = Log.MessageType.ERROR; report.Value = "KILLED"; report.Details = "KIlled by Manager."; continue; } report.MessageType = Log.MessageType.INFORM; report.Value = "RUNNING"; report.Details = "Running"; continue; } if (Regex.IsMatch((string)end_message["Value"], @"^" + CrawlerApi.MessageMark.ABORTED)) { report.MessageType = Log.MessageType.ERROR; report.Value = "ABORTED"; report.Details = "Last session is " + CrawlerApi.MessageMark.ABORTED; continue; } if (Regex.IsMatch((string)end_message["Value"], @"^" + CrawlerApi.MessageMark.UNCOMPLETED)) { report.MessageType = Log.MessageType.WARNING; report.Value = "UNCOMPLETED"; report.Details = "Last session is " + CrawlerApi.MessageMark.UNCOMPLETED; continue; } if (Regex.IsMatch((string)end_message["Value"], @"^" + CrawlerApi.MessageMark.COMPLETED)) { report.MessageType = Log.MessageType.INFORM; report.Value = "COMPLETED"; report.Details = "Completed"; continue; } report.MessageType = Log.MessageType.ERROR; report.Value = "SYSTEM ERROR"; report.Details = "Unknown MessageMark"; } foreach (Record service in dbc.Get("SELECT * FROM Services WHERE State<>" + (int)Service.State.DISABLED).GetRecordset()) { Report report = new Report(); reports.Add(report); report.Source = (string)service["Id"]; report.SourceType = ReportSourceType.SERVICE; DateTime earliest_start_time = DateTime.Now.AddSeconds(-(int)service["RunTimeSpan"]); Record start_message = dbc["SELECT * FROM Messages WHERE Source=@Source AND Value LIKE '" + Service.MessageMark.STARTED + "%' ORDER BY Time DESC"].GetFirstRecord("@Source", service["Id"]); if (start_message == null || (DateTime)start_message["Time"] < earliest_start_time) { Service.SessionState state = (Service.SessionState)(int) dbc["SELECT _LastSessionState FROM Services WHERE Id=@Id"].GetSingleValue("@Id", service["Id"]); if (state == SessionState.STARTED) { report.MessageType = Log.MessageType.WARNING; report.Value = "LONG WORK"; report.Details = "Works longer than its RunTimeSpane"; continue; } report.MessageType = Log.MessageType.ERROR; report.Value = "NO START"; report.Details = "Not started within its RunTimeSpan"; continue; } Record end_message = dbc["SELECT * FROM Messages WHERE Source=@Source AND (Value LIKE '" + Service.MessageMark.ABORTED + "%' OR Value LIKE '" + Service.MessageMark.ERROR + "%' OR Value LIKE '" + Service.MessageMark.COMPLETED + "%') ORDER BY Time DESC"].GetFirstRecord("@Source", service["Id"]); if (end_message == null) { report.MessageType = Log.MessageType.INFORM; report.Value = "RUNNING"; report.Details = "Running"; continue; } if (Regex.IsMatch((string)end_message["Value"], @"^" + Service.MessageMark.ABORTED)) { report.MessageType = Log.MessageType.ERROR; report.Value = "ABORTED"; report.Details = "Last session is " + Service.MessageMark.ABORTED; continue; } if (Regex.IsMatch((string)end_message["Value"], @"^" + Service.MessageMark.ERROR)) { report.MessageType = Log.MessageType.ERROR; report.Value = "ERRORS"; report.Details = "Last session has errors"; continue; } if (Regex.IsMatch((string)end_message["Value"], @"^" + Service.MessageMark.COMPLETED)) { report.MessageType = Log.MessageType.INFORM; report.Value = "COMPLETED"; report.Details = "Completed"; continue; } report.MessageType = Log.MessageType.ERROR; report.Value = "SYSTEM ERROR"; report.Details = "Unknown MessageMark"; } return(reports); }
public static void Run() { //////////////////////////////////////////////////////////// //Killing disabled crawler processes //////////////////////////////////////////////////////////// Recordset rs = db[@"SELECT Id AS crawler_id, ISNULL(_LastStartTime, 0) AS _LastStartTime, ISNULL(_LastEndTime, 0) AS _LastEndTime, _LastProcessId, _LastLog, AdminEmails, _LastSessionState FROM Crawlers WHERE _LastSessionState=" + (int)Crawler.SessionState.STARTED + " AND State=" + (int)Crawler.State.DISABLED].GetRecordset(); foreach (Record r in rs) { string crawler_id = (string)r["crawler_id"]; Process p = ServiceManager.GetProcess((int?)r["_LastProcessId"], crawler_id); if (p == null) { continue; } Log.Main.Warning("Killing " + crawler_id + "as disabled"); p.Kill(); Thread.Sleep(2000); if (ServiceManager.IsProcessAlive((int?)r["_LastProcessId"], crawler_id)) { Log.Main.Error("Could not kill " + crawler_id); } else { db["UPDATE Crawlers SET _LastSessionState=" + (int)Crawler.SessionState.KILLED + ", _LastEndTime=GETDATE() WHERE Id=@Id"].Execute("@Id", crawler_id); } } //////////////////////////////////////////////////////////// //Process crawler commands //////////////////////////////////////////////////////////// rs = db[@"SELECT Id AS crawler_id, ISNULL(_LastStartTime, 0) AS _LastStartTime, ISNULL(_LastEndTime, 0) AS _LastEndTime, _LastProcessId, _LastLog, AdminEmails, _LastSessionState, Command FROM Crawlers WHERE State<>" + (int)Crawler.State.DISABLED + " AND Command<>" + (int)Crawler.Command.EMPTY].GetRecordset(); foreach (Record r in rs) { string crawler_id = (string)r["crawler_id"]; Process p = ServiceManager.GetProcess((int?)r["_LastProcessId"], crawler_id); Crawler.Command command = (Crawler.Command)(int) r["Command"]; switch (command) { case Crawler.Command.RESTART: if (p == null) { db["UPDATE Crawlers SET Command=" + (int)Crawler.Command.EMPTY + ", _NextStartTime=DATEADD(ss, -1, GETDATE()) WHERE Id=@Id"].Execute("@Id", crawler_id); break; } Log.Main.Warning("Killing " + crawler_id + " as marked " + command); p.Kill(); Thread.Sleep(2000); if (!ServiceManager.IsProcessAlive((int?)r["_LastProcessId"], crawler_id)) { db["UPDATE Crawlers SET Command=" + (int)Crawler.Command.FORCE + " WHERE Id=@Id"].Execute("@Id", crawler_id); } else { Log.Main.Error("Could not kill " + crawler_id); } break; case Crawler.Command.STOP: if (p == null) { break; } Log.Main.Warning("Killing " + crawler_id + " as marked " + command); p.Kill(); Thread.Sleep(2000); if (!ServiceManager.IsProcessAlive((int?)r["_LastProcessId"], crawler_id)) { db["UPDATE Crawlers SET _LastSessionState=" + (int)Crawler.SessionState.KILLED + ", _LastEndTime=GETDATE() WHERE Id=@Id"].Execute("@Id", crawler_id); } else { Log.Main.Error("Could not kill " + crawler_id); } break; case Crawler.Command.FORCE: //processed below break; case Crawler.Command.RESTART_WITH_CLEAR_SESSION: if (p != null) { Log.Main.Warning("Killing " + crawler_id + " as marked " + command); p.Kill(); Thread.Sleep(2000); if (ServiceManager.IsProcessAlive((int?)r["_LastProcessId"], crawler_id)) { Log.Main.Error("Could not kill " + crawler_id); } break; } break; default: throw new Exception("Crawler command " + command + " is not defined."); } } //////////////////////////////////////////////////////////// //Checking previously started sessions //////////////////////////////////////////////////////////// List <string> running_crawler_ids = new List <string>(); List <string> running_crawler_notifications = new List <string>(); rs = db[@"SELECT DATEDIFF(ss, ISNULL(_LastStartTime, 0), GETDATE()) AS duration, Id AS crawler_id, State, ISNULL(_LastStartTime, 0) AS _LastStartTime, ISNULL(_LastEndTime, 0) AS _LastEndTime, _LastProcessId, _LastLog, AdminEmails, _LastSessionState, CrawlProductTimeout FROM Crawlers WHERE _LastSessionState IN (" + (int)Crawler.SessionState.STARTED + ", " + (int)Crawler.SessionState._ERROR + ", " + (int)Crawler.SessionState._COMPLETED + ")"].GetRecordset(); foreach (Dictionary <string, object> r in rs) { string crawler_id = (string)r["crawler_id"]; string m1 = "\nStarted: " + r["_LastStartTime"] + "\nLog: " + r["_LastLog"]; Crawler.SessionState _LastSessionState = (Crawler.SessionState)(int) r["_LastSessionState"]; int duration = (int)r["duration"]; if (_LastSessionState == Crawler.SessionState._COMPLETED) { string m = "Crawler " + crawler_id + " completed successfully.\nTotal duration: " + (new TimeSpan(0, 0, duration)).ToString() + m1; Mailer.Send(db, m, ReportSourceType.CRAWLER, crawler_id, false); db["UPDATE Crawlers SET _LastSessionState=" + (int)Crawler.SessionState.COMPLETED + " WHERE Id=@Id"].Execute("@Id", crawler_id); continue; } if (_LastSessionState == Crawler.SessionState._ERROR) { Mailer.Send(db, "Crawler " + crawler_id + " exited with error" + m1, ReportSourceType.CRAWLER, crawler_id); db["UPDATE Crawlers SET _LastSessionState=" + (int)Crawler.SessionState.ERROR + " WHERE Id=@Id"].Execute("@Id", crawler_id); continue; } if (!ServiceManager.IsProcessAlive((int?)r["_LastProcessId"], crawler_id)) { Mailer.Send(db, "Crawler " + crawler_id + " was broken by unknown reason", ReportSourceType.CRAWLER, crawler_id); db["UPDATE Crawlers SET _LastSessionState=" + (int)Crawler.SessionState.BROKEN + ", _NextStartTime=DATEADD(ss, RestartDelayIfBroken, GETDATE()) WHERE Id=@Id"].Execute("@Id", crawler_id); continue; } if (duration >= (int)r["CrawlProductTimeout"]) { int last_crawled_product_elapsed_time = (int)db["SELECT ISNULL(DATEDIFF(ss, _LastProductTime, GETDATE()), -1) AS duration FROM Crawlers WHERE Id=@Id"].GetSingleValue("@Id", crawler_id); if (last_crawled_product_elapsed_time < 0 || last_crawled_product_elapsed_time > (int)r["CrawlProductTimeout"]) { Mailer.Send(db, "Crawler " + crawler_id + " is running but not crawling products during " + last_crawled_product_elapsed_time + " seconds. It will be killed. Total duration: " + (new TimeSpan(0, 0, duration)).ToString() + m1, ReportSourceType.CRAWLER, crawler_id); Process p = ServiceManager.GetProcess((int?)r["_LastProcessId"], crawler_id); Log.Main.Warning("Killing " + crawler_id); p.Kill(); Thread.Sleep(2000); if (!ServiceManager.IsProcessAlive((int?)r["_LastProcessId"], crawler_id)) { db["UPDATE Crawlers SET _LastSessionState=" + (int)Crawler.SessionState.KILLED + ", _NextStartTime=DATEADD(ss, RestartDelayIfBroken, GETDATE()), _LastEndTime=GETDATE() WHERE Id=@Id"].Execute("@Id", crawler_id); } else { Log.Main.Error("Could not kill " + crawler_id); } continue; } } running_crawler_ids.Add(crawler_id); running_crawler_notifications.Add(crawler_id + ", process id: " + r["_LastProcessId"]); } if (running_crawler_notifications.Count > 0) { Log.Main.Write("Already running: " + string.Join("\r\n", running_crawler_notifications)); } //////////////////////////////////////////////////////////// //Starting new crawlers //////////////////////////////////////////////////////////// List <string> remaining_crawler_ids = new List <string>(); rs = db[@"SELECT Id AS crawler_id, State, Command, AdminEmails FROM Crawlers WHERE (State<>" + (int)Crawler.State.DISABLED + " AND GETDATE()>=_NextStartTime AND Command<>" + (int)Crawler.Command.STOP + @") OR Command=" + (int)Crawler.Command.FORCE + " ORDER BY Command, _NextStartTime"].GetRecordset(); foreach (Dictionary <string, object> r in rs) { string crawler_id = (string)r["crawler_id"]; if ((int)r["Command"] == (int)Crawler.Command.FORCE) { Log.Main.Write("Forcing " + crawler_id); if ((int)r["State"] == (int)Crawler.State.DISABLED) { Log.Main.Error(crawler_id + " is disabled."); continue; } if (running_crawler_ids.Contains(crawler_id)) { Log.Main.Warning(crawler_id + " is running already."); db["UPDATE Crawlers SET Command=" + (int)Crawler.Command.EMPTY + " WHERE Id=@Id"].Execute("@Id", crawler_id); continue; } if (launch_crawler(crawler_id, running_crawler_ids)) { db["UPDATE Crawlers SET Command=" + (int)Crawler.Command.EMPTY + " WHERE Id=@Id"].Execute("@Id", crawler_id); } continue; } if (running_crawler_ids.Contains(crawler_id)) { continue; } if (running_crawler_ids.Count >= Properties.Settings.Default.CrawlerProcessMaxNumber) { remaining_crawler_ids.Add(crawler_id); continue; } launch_crawler(crawler_id, running_crawler_ids); } if (remaining_crawler_ids.Count > 0) { Log.Main.Warning("crawler_process_number reached " + Properties.Settings.Default.CrawlerProcessMaxNumber + " so no more crawler will be started.\nCrawlers remaining to start:\n" + string.Join("\r\n", remaining_crawler_ids)); } if (running_crawler_ids.Count > 0) { Log.Main.Write("Currently running Crawlers: " + running_crawler_ids.Count); } else { Log.Main.Write("Currently no crawler runs."); } }