Ejemplo n.º 1
0
        static List <Report> get_reports(DbConnection dbc)
        {
            List <Report> reports = new List <Report>();

            foreach (Record crawler in dbc.Get("SELECT * FROM Crawlers WHERE State<>" + (int)Crawler.State.DISABLED).GetRecordset())
            {
                Report report = new Report();
                reports.Add(report);
                report.Source     = (string)crawler["Id"];
                report.SourceType = ReportSourceType.CRAWLER;
                DateTime             earliest_start_time = DateTime.Now.AddSeconds(-(int)crawler["RunTimeSpan"]);
                Record               start_message       = dbc["SELECT * FROM Messages WHERE Source=@Source AND Value LIKE '" + CrawlerApi.MessageMark.STARTED + "%' ORDER BY Time DESC"].GetFirstRecord("@Source", crawler["Id"]);
                Crawler.SessionState state = (Crawler.SessionState)(int) dbc["SELECT _LastSessionState FROM Crawlers WHERE Id=@Id"].GetSingleValue("@Id", crawler["Id"]);
                if (start_message == null || (DateTime)start_message["Time"] < earliest_start_time)
                {
                    if (state == Crawler.SessionState.STARTED)
                    {
                        report.MessageType = Log.MessageType.WARNING;
                        report.Value       = "LONG WORK";
                        report.Details     = "Works longer than its RunTimeSpane";
                        continue;
                    }
                    report.MessageType = Log.MessageType.ERROR;
                    report.Value       = "NO START";
                    report.Details     = "Not started within its RunTimeSpan";
                    continue;
                }
                Record end_message = dbc["SELECT * FROM Messages WHERE Source=@Source AND (Value LIKE '" + CrawlerApi.MessageMark.ABORTED + "%' OR Value LIKE '" + CrawlerApi.MessageMark.UNCOMPLETED + "%' OR Value LIKE '" + CrawlerApi.MessageMark.COMPLETED + "%') ORDER BY Time DESC"].GetFirstRecord("@Source", crawler["Id"]);
                if (end_message == null)
                {
                    if (state == Crawler.SessionState.KILLED)
                    {
                        report.MessageType = Log.MessageType.ERROR;
                        report.Value       = "KILLED";
                        report.Details     = "KIlled by Manager.";
                        continue;
                    }
                    report.MessageType = Log.MessageType.INFORM;
                    report.Value       = "RUNNING";
                    report.Details     = "Running";
                    continue;
                }
                if (Regex.IsMatch((string)end_message["Value"], @"^" + CrawlerApi.MessageMark.ABORTED))
                {
                    report.MessageType = Log.MessageType.ERROR;
                    report.Value       = "ABORTED";
                    report.Details     = "Last session is " + CrawlerApi.MessageMark.ABORTED;
                    continue;
                }
                if (Regex.IsMatch((string)end_message["Value"], @"^" + CrawlerApi.MessageMark.UNCOMPLETED))
                {
                    report.MessageType = Log.MessageType.WARNING;
                    report.Value       = "UNCOMPLETED";
                    report.Details     = "Last session is " + CrawlerApi.MessageMark.UNCOMPLETED;
                    continue;
                }
                if (Regex.IsMatch((string)end_message["Value"], @"^" + CrawlerApi.MessageMark.COMPLETED))
                {
                    report.MessageType = Log.MessageType.INFORM;
                    report.Value       = "COMPLETED";
                    report.Details     = "Completed";
                    continue;
                }
                report.MessageType = Log.MessageType.ERROR;
                report.Value       = "SYSTEM ERROR";
                report.Details     = "Unknown MessageMark";
            }

            foreach (Record service in dbc.Get("SELECT * FROM Services WHERE State<>" + (int)Service.State.DISABLED).GetRecordset())
            {
                Report report = new Report();
                reports.Add(report);
                report.Source     = (string)service["Id"];
                report.SourceType = ReportSourceType.SERVICE;
                DateTime earliest_start_time = DateTime.Now.AddSeconds(-(int)service["RunTimeSpan"]);
                Record   start_message       = dbc["SELECT * FROM Messages WHERE Source=@Source AND Value LIKE '" + Service.MessageMark.STARTED + "%' ORDER BY Time DESC"].GetFirstRecord("@Source", service["Id"]);
                if (start_message == null || (DateTime)start_message["Time"] < earliest_start_time)
                {
                    Service.SessionState state = (Service.SessionState)(int) dbc["SELECT _LastSessionState FROM Services WHERE Id=@Id"].GetSingleValue("@Id", service["Id"]);
                    if (state == SessionState.STARTED)
                    {
                        report.MessageType = Log.MessageType.WARNING;
                        report.Value       = "LONG WORK";
                        report.Details     = "Works longer than its RunTimeSpane";
                        continue;
                    }
                    report.MessageType = Log.MessageType.ERROR;
                    report.Value       = "NO START";
                    report.Details     = "Not started within its RunTimeSpan";
                    continue;
                }
                Record end_message = dbc["SELECT * FROM Messages WHERE Source=@Source AND (Value LIKE '" + Service.MessageMark.ABORTED + "%' OR Value LIKE '" + Service.MessageMark.ERROR + "%' OR Value LIKE '" + Service.MessageMark.COMPLETED + "%') ORDER BY Time DESC"].GetFirstRecord("@Source", service["Id"]);
                if (end_message == null)
                {
                    report.MessageType = Log.MessageType.INFORM;
                    report.Value       = "RUNNING";
                    report.Details     = "Running";
                    continue;
                }
                if (Regex.IsMatch((string)end_message["Value"], @"^" + Service.MessageMark.ABORTED))
                {
                    report.MessageType = Log.MessageType.ERROR;
                    report.Value       = "ABORTED";
                    report.Details     = "Last session is " + Service.MessageMark.ABORTED;
                    continue;
                }
                if (Regex.IsMatch((string)end_message["Value"], @"^" + Service.MessageMark.ERROR))
                {
                    report.MessageType = Log.MessageType.ERROR;
                    report.Value       = "ERRORS";
                    report.Details     = "Last session has errors";
                    continue;
                }
                if (Regex.IsMatch((string)end_message["Value"], @"^" + Service.MessageMark.COMPLETED))
                {
                    report.MessageType = Log.MessageType.INFORM;
                    report.Value       = "COMPLETED";
                    report.Details     = "Completed";
                    continue;
                }
                report.MessageType = Log.MessageType.ERROR;
                report.Value       = "SYSTEM ERROR";
                report.Details     = "Unknown MessageMark";
            }

            return(reports);
        }
Ejemplo n.º 2
0
        public static void Run()
        {
            ////////////////////////////////////////////////////////////
            //Killing disabled crawler processes
            ////////////////////////////////////////////////////////////
            Recordset rs = db[@"SELECT Id AS crawler_id, 
ISNULL(_LastStartTime, 0) AS _LastStartTime, ISNULL(_LastEndTime, 0) AS _LastEndTime, 
_LastProcessId, _LastLog, AdminEmails, _LastSessionState FROM Crawlers WHERE _LastSessionState=" + (int)Crawler.SessionState.STARTED + " AND State=" + (int)Crawler.State.DISABLED].GetRecordset();

            foreach (Record r in rs)
            {
                string  crawler_id = (string)r["crawler_id"];
                Process p          = ServiceManager.GetProcess((int?)r["_LastProcessId"], crawler_id);
                if (p == null)
                {
                    continue;
                }

                Log.Main.Warning("Killing " + crawler_id + "as disabled");
                p.Kill();
                Thread.Sleep(2000);
                if (ServiceManager.IsProcessAlive((int?)r["_LastProcessId"], crawler_id))
                {
                    Log.Main.Error("Could not kill " + crawler_id);
                }
                else
                {
                    db["UPDATE Crawlers SET _LastSessionState=" + (int)Crawler.SessionState.KILLED + ", _LastEndTime=GETDATE() WHERE Id=@Id"].Execute("@Id", crawler_id);
                }
            }

            ////////////////////////////////////////////////////////////
            //Process crawler commands
            ////////////////////////////////////////////////////////////
            rs = db[@"SELECT Id AS crawler_id, 
ISNULL(_LastStartTime, 0) AS _LastStartTime, ISNULL(_LastEndTime, 0) AS _LastEndTime, 
    _LastProcessId, _LastLog, AdminEmails, _LastSessionState, Command FROM Crawlers WHERE State<>" + (int)Crawler.State.DISABLED + " AND Command<>" + (int)Crawler.Command.EMPTY].GetRecordset();
            foreach (Record r in rs)
            {
                string          crawler_id = (string)r["crawler_id"];
                Process         p          = ServiceManager.GetProcess((int?)r["_LastProcessId"], crawler_id);
                Crawler.Command command    = (Crawler.Command)(int) r["Command"];
                switch (command)
                {
                case Crawler.Command.RESTART:
                    if (p == null)
                    {
                        db["UPDATE Crawlers SET Command=" + (int)Crawler.Command.EMPTY + ", _NextStartTime=DATEADD(ss, -1, GETDATE()) WHERE Id=@Id"].Execute("@Id", crawler_id);
                        break;
                    }
                    Log.Main.Warning("Killing " + crawler_id + " as marked " + command);
                    p.Kill();
                    Thread.Sleep(2000);
                    if (!ServiceManager.IsProcessAlive((int?)r["_LastProcessId"], crawler_id))
                    {
                        db["UPDATE Crawlers SET Command=" + (int)Crawler.Command.FORCE + " WHERE Id=@Id"].Execute("@Id", crawler_id);
                    }
                    else
                    {
                        Log.Main.Error("Could not kill " + crawler_id);
                    }
                    break;

                case Crawler.Command.STOP:
                    if (p == null)
                    {
                        break;
                    }
                    Log.Main.Warning("Killing " + crawler_id + " as marked " + command);
                    p.Kill();
                    Thread.Sleep(2000);
                    if (!ServiceManager.IsProcessAlive((int?)r["_LastProcessId"], crawler_id))
                    {
                        db["UPDATE Crawlers SET _LastSessionState=" + (int)Crawler.SessionState.KILLED + ", _LastEndTime=GETDATE() WHERE Id=@Id"].Execute("@Id", crawler_id);
                    }
                    else
                    {
                        Log.Main.Error("Could not kill " + crawler_id);
                    }
                    break;

                case Crawler.Command.FORCE:
                    //processed below
                    break;

                case Crawler.Command.RESTART_WITH_CLEAR_SESSION:
                    if (p != null)
                    {
                        Log.Main.Warning("Killing " + crawler_id + " as marked " + command);
                        p.Kill();
                        Thread.Sleep(2000);
                        if (ServiceManager.IsProcessAlive((int?)r["_LastProcessId"], crawler_id))
                        {
                            Log.Main.Error("Could not kill " + crawler_id);
                        }
                        break;
                    }
                    break;

                default:
                    throw new Exception("Crawler command " + command + " is not defined.");
                }
            }

            ////////////////////////////////////////////////////////////
            //Checking previously started sessions
            ////////////////////////////////////////////////////////////
            List <string> running_crawler_ids           = new List <string>();
            List <string> running_crawler_notifications = new List <string>();

            rs = db[@"SELECT DATEDIFF(ss, ISNULL(_LastStartTime, 0), GETDATE()) AS duration, Id AS crawler_id, State, 
ISNULL(_LastStartTime, 0) AS _LastStartTime, ISNULL(_LastEndTime, 0) AS _LastEndTime, 
_LastProcessId, _LastLog, AdminEmails, _LastSessionState, CrawlProductTimeout 
FROM Crawlers 
WHERE _LastSessionState IN (" + (int)Crawler.SessionState.STARTED + ", " + (int)Crawler.SessionState._ERROR + ", " + (int)Crawler.SessionState._COMPLETED + ")"].GetRecordset();
            foreach (Dictionary <string, object> r in rs)
            {
                string crawler_id = (string)r["crawler_id"];
                string m1         = "\nStarted: " + r["_LastStartTime"] + "\nLog: " + r["_LastLog"];
                Crawler.SessionState _LastSessionState = (Crawler.SessionState)(int) r["_LastSessionState"];
                int duration = (int)r["duration"];
                if (_LastSessionState == Crawler.SessionState._COMPLETED)
                {
                    string m = "Crawler " + crawler_id + " completed successfully.\nTotal duration: " + (new TimeSpan(0, 0, duration)).ToString() + m1;
                    Mailer.Send(db, m, ReportSourceType.CRAWLER, crawler_id, false);
                    db["UPDATE Crawlers SET _LastSessionState=" + (int)Crawler.SessionState.COMPLETED + " WHERE Id=@Id"].Execute("@Id", crawler_id);
                    continue;
                }

                if (_LastSessionState == Crawler.SessionState._ERROR)
                {
                    Mailer.Send(db, "Crawler " + crawler_id + " exited with error" + m1, ReportSourceType.CRAWLER, crawler_id);
                    db["UPDATE Crawlers SET _LastSessionState=" + (int)Crawler.SessionState.ERROR + " WHERE Id=@Id"].Execute("@Id", crawler_id);
                    continue;
                }

                if (!ServiceManager.IsProcessAlive((int?)r["_LastProcessId"], crawler_id))
                {
                    Mailer.Send(db, "Crawler " + crawler_id + " was broken by unknown reason", ReportSourceType.CRAWLER, crawler_id);
                    db["UPDATE Crawlers SET _LastSessionState=" + (int)Crawler.SessionState.BROKEN + ", _NextStartTime=DATEADD(ss, RestartDelayIfBroken, GETDATE()) WHERE Id=@Id"].Execute("@Id", crawler_id);
                    continue;
                }

                if (duration >= (int)r["CrawlProductTimeout"])
                {
                    int last_crawled_product_elapsed_time = (int)db["SELECT ISNULL(DATEDIFF(ss, _LastProductTime, GETDATE()), -1) AS duration FROM Crawlers WHERE Id=@Id"].GetSingleValue("@Id", crawler_id);

                    if (last_crawled_product_elapsed_time < 0 || last_crawled_product_elapsed_time > (int)r["CrawlProductTimeout"])
                    {
                        Mailer.Send(db, "Crawler " + crawler_id + " is running but not crawling products during " + last_crawled_product_elapsed_time + " seconds. It will be killed. Total duration: " + (new TimeSpan(0, 0, duration)).ToString() + m1, ReportSourceType.CRAWLER, crawler_id);

                        Process p = ServiceManager.GetProcess((int?)r["_LastProcessId"], crawler_id);
                        Log.Main.Warning("Killing " + crawler_id);
                        p.Kill();
                        Thread.Sleep(2000);
                        if (!ServiceManager.IsProcessAlive((int?)r["_LastProcessId"], crawler_id))
                        {
                            db["UPDATE Crawlers SET _LastSessionState=" + (int)Crawler.SessionState.KILLED + ", _NextStartTime=DATEADD(ss, RestartDelayIfBroken, GETDATE()), _LastEndTime=GETDATE() WHERE Id=@Id"].Execute("@Id", crawler_id);
                        }
                        else
                        {
                            Log.Main.Error("Could not kill " + crawler_id);
                        }
                        continue;
                    }
                }

                running_crawler_ids.Add(crawler_id);
                running_crawler_notifications.Add(crawler_id + ", process id: " + r["_LastProcessId"]);
            }
            if (running_crawler_notifications.Count > 0)
            {
                Log.Main.Write("Already running: " + string.Join("\r\n", running_crawler_notifications));
            }

            ////////////////////////////////////////////////////////////
            //Starting new crawlers
            ////////////////////////////////////////////////////////////
            List <string> remaining_crawler_ids = new List <string>();

            rs = db[@"SELECT Id AS crawler_id, State, Command, AdminEmails FROM Crawlers 
WHERE (State<>" + (int)Crawler.State.DISABLED + " AND GETDATE()>=_NextStartTime AND Command<>" + (int)Crawler.Command.STOP + @") 
            OR Command=" + (int)Crawler.Command.FORCE + " ORDER BY Command, _NextStartTime"].GetRecordset();
            foreach (Dictionary <string, object> r in rs)
            {
                string crawler_id = (string)r["crawler_id"];

                if ((int)r["Command"] == (int)Crawler.Command.FORCE)
                {
                    Log.Main.Write("Forcing " + crawler_id);
                    if ((int)r["State"] == (int)Crawler.State.DISABLED)
                    {
                        Log.Main.Error(crawler_id + " is disabled.");
                        continue;
                    }
                    if (running_crawler_ids.Contains(crawler_id))
                    {
                        Log.Main.Warning(crawler_id + " is running already.");
                        db["UPDATE Crawlers SET Command=" + (int)Crawler.Command.EMPTY + " WHERE Id=@Id"].Execute("@Id", crawler_id);
                        continue;
                    }
                    if (launch_crawler(crawler_id, running_crawler_ids))
                    {
                        db["UPDATE Crawlers SET Command=" + (int)Crawler.Command.EMPTY + " WHERE Id=@Id"].Execute("@Id", crawler_id);
                    }
                    continue;
                }

                if (running_crawler_ids.Contains(crawler_id))
                {
                    continue;
                }
                if (running_crawler_ids.Count >= Properties.Settings.Default.CrawlerProcessMaxNumber)
                {
                    remaining_crawler_ids.Add(crawler_id);
                    continue;
                }
                launch_crawler(crawler_id, running_crawler_ids);
            }

            if (remaining_crawler_ids.Count > 0)
            {
                Log.Main.Warning("crawler_process_number reached " + Properties.Settings.Default.CrawlerProcessMaxNumber + " so no more crawler will be started.\nCrawlers remaining to start:\n" + string.Join("\r\n", remaining_crawler_ids));
            }

            if (running_crawler_ids.Count > 0)
            {
                Log.Main.Write("Currently running Crawlers: " + running_crawler_ids.Count);
            }
            else
            {
                Log.Main.Write("Currently no crawler runs.");
            }
        }