Beispiel #1
0
        private void ProcessAppTracer(AppTracer app)
        {
            // 应用是否需要告警
            if (app == null || !app.Enable || app.AlarmThreshold <= 0)
            {
                return;
            }

            var appId = app.ID;

            if (!RobotHelper.CanAlarm(app.Category, app.AlarmRobot))
            {
                return;
            }

            using var span = _tracer?.NewSpan($"Alarm:{nameof(AppTracer)}");

            // 最近一段时间的5分钟级数据
            var time   = DateTime.Now;
            var minute = time.Date.AddHours(time.Hour).AddMinutes(time.Minute / 5 * 5);
            var st     = AppMinuteStat.FindByAppIdAndTime(appId, minute);

            if (st == null)
            {
                return;
            }

            // 判断告警
            if (st.Errors >= app.AlarmThreshold)
            {
                // 一定时间内不要重复报错,除非错误翻倍
                var error2 = _cache.Get <Int32>("alarm:AppTracer:" + appId);
                if (error2 == 0 || st.Errors > error2 * 2)
                {
                    _cache.Set("alarm:AppTracer:" + appId, st.Errors, 5 * 60);

                    var msg = GetMarkdown(app, st, true);
                    RobotHelper.SendAlarm(app.Category, app.AlarmRobot, "系统告警", msg);
                }
            }
        }
Beispiel #2
0
        private void ProcessRedisData(RedisNode node)
        {
            if (!RobotHelper.CanAlarm(node.Category, node.WebHook))
            {
                return;
            }
            if (node.AlarmMemoryRate <= 0 || node.AlarmConnections == 0)
            {
                return;
            }

            // 最新数据
            var data = RedisData.FindLast(node.Id);

            if (data == null)
            {
                return;
            }

            using var span = _tracer?.NewSpan($"Alarm:{nameof(RedisNode)}");

            var actions = new List <Action <StringBuilder> >();

            // 内存告警
            var rate = data.UsedMemory * 100 / node.MaxMemory;

            if (rate >= node.AlarmMemoryRate)
            {
                // 一定时间内不要重复报错,除非错误翻倍
                var error2 = _cache.Get <Int32>("alarm:RedisMemory:" + node.Id);
                if (error2 == 0 || rate > error2 * 2)
                {
                    _cache.Set("alarm:RedisMemory:" + node.Id, rate, 5 * 60);

                    actions.Add(sb => sb.AppendLine($">**内存告警:**<font color=\"info\">{rate / 100:p0} >= {node.AlarmMemoryRate / 100:p0}</font>"));
                }
            }

            // 连接数告警
            var cs = data.ConnectedClients;

            if (node.AlarmConnections > 0 && cs >= node.AlarmConnections)
            {
                // 一定时间内不要重复报错,除非错误翻倍
                var error2 = _cache.Get <Int32>("alarm:RedisConnections:" + node.Id);
                if (error2 == 0 || cs > error2 * 2)
                {
                    _cache.Set("alarm:RedisConnections:" + node.Id, cs, 5 * 60);

                    actions.Add(sb => sb.AppendLine($">**连接数告警:**<font color=\"info\">{cs:n0} >= {node.AlarmConnections:n0}</font>"));
                }
            }

            // 速度告警
            var speed = data.Speed;

            if (node.AlarmSpeed > 0 && speed >= node.AlarmSpeed)
            {
                // 一定时间内不要重复报错,除非错误翻倍
                var error2 = _cache.Get <Int32>("alarm:RedisSpeed:" + node.Id);
                if (error2 == 0 || speed > error2 * 2)
                {
                    _cache.Set("alarm:RedisSpeed:" + node.Id, speed, 5 * 60);

                    actions.Add(sb => sb.AppendLine($">**速度告警:**<font color=\"info\">{speed:n0} >= {node.AlarmSpeed:n0}</font>"));
                }
            }

            // 入流量告警
            var input = data.InputKbps;

            if (node.AlarmInputKbps > 0 && input >= node.AlarmInputKbps)
            {
                // 一定时间内不要重复报错,除非错误翻倍
                var error2 = _cache.Get <Int32>("alarm:RedisInputKbps:" + node.Id);
                if (error2 == 0 || input > error2 * 2)
                {
                    _cache.Set("alarm:RedisInputKbps:" + node.Id, input, 5 * 60);

                    actions.Add(sb => sb.AppendLine($">**入流量告警:**<font color=\"info\">{input:n0} >= {node.AlarmInputKbps:n0}</font>"));
                }
            }

            // 出流量告警
            var output = data.OutputKbps;

            if (node.AlarmOutputKbps > 0 && output >= node.AlarmOutputKbps)
            {
                // 一定时间内不要重复报错,除非错误翻倍
                var error2 = _cache.Get <Int32>("alarm:RedisOutputKbps:" + node.Id);
                if (error2 == 0 || output > error2 * 2)
                {
                    _cache.Set("alarm:RedisOutputKbps:" + node.Id, output, 5 * 60);

                    actions.Add(sb => sb.AppendLine($">**出流量告警:**<font color=\"info\">{output:n0} >= {node.AlarmOutputKbps:n0}</font>"));
                }
            }

            if (actions.Count > 0)
            {
                var msg = GetMarkdown(node, data, "Redis告警", actions);
                RobotHelper.SendAlarm(node.Category, node.WebHook, "Redis告警", msg);
            }
        }
Beispiel #3
0
        private void ProcessRedisQueue(RedisNode node)
        {
            using var span = _tracer?.NewSpan($"Alarm:{nameof(RedisMessageQueue)}");

            // 所有队列
            var list = RedisMessageQueue.FindAllByRedisId(node.Id);

            foreach (var queue in list)
            {
                var groupName = !queue.Category.IsNullOrEmpty() ? queue.Category : node.Category;
                var webhook   = !queue.WebHook.IsNullOrEmpty() ? queue.WebHook : node.WebHook;

                // 判断告警
                if (queue.Enable && queue.MaxMessages > 0 && queue.Messages >= queue.MaxMessages && RobotHelper.CanAlarm(groupName, webhook))
                {
                    // 一定时间内不要重复报错,除非错误翻倍
                    var error2 = _cache.Get <Int32>("alarm:RedisMessageQueue:" + queue.Id);
                    if (error2 == 0 || queue.Messages > error2 * 2)
                    {
                        _cache.Set("alarm:RedisMessageQueue:" + queue.Id, queue.Messages, 5 * 60);

                        var msg = GetMarkdown(node, queue, true);
                        RobotHelper.SendAlarm(groupName, webhook, "消息队列告警", msg);
                    }
                }
            }
        }
Beispiel #4
0
        private void ProcessNode(Node node)
        {
            if (node == null || !node.Enable || !RobotHelper.CanAlarm(node.Category, node.WebHook))
            {
                return;
            }

            if (node.AlarmCpuRate <= 0 && node.AlarmMemoryRate <= 0 && node.AlarmDiskRate <= 0 && node.AlarmProcesses.IsNullOrEmpty())
            {
                return;
            }

            using var span = _tracer?.NewSpan($"Alarm:{nameof(Node)}");

            // 最新数据
            var data = NodeData.FindLast(node.ID);

            if (data == null)
            {
                return;
            }

            // CPU告警
            if (node.AlarmCpuRate > 0)
            {
                var rate = data.CpuRate * 100;
                if (rate >= node.AlarmCpuRate)
                {
                    // 一定时间内不要重复报错,除非错误翻倍
                    var error2 = _cache.Get <Int32>("alarm:CpuRate:" + node.ID);
                    if (error2 == 0 || rate > error2 * 2)
                    {
                        _cache.Set("alarm:CpuRate:" + node.ID, rate, 5 * 60);

                        SendAlarm("cpu", node, data, $"[{node.Name}]CPU告警");
                    }
                }
            }

            // 内存告警
            if (node.AlarmMemoryRate > 0 && node.Memory > 0)
            {
                var rate = (node.Memory - data.AvailableMemory) * 100d / node.Memory;
                if (rate >= node.AlarmMemoryRate)
                {
                    // 一定时间内不要重复报错,除非错误翻倍
                    var error2 = _cache.Get <Int32>("alarm:MemoryRate:" + node.ID);
                    if (error2 == 0 || rate > error2 * 2)
                    {
                        _cache.Set("alarm:MemoryRate:" + node.ID, rate, 5 * 60);

                        SendAlarm("memory", node, data, $"[{node.Name}]内存告警");
                    }
                }
            }

            // 磁盘告警
            if (node.AlarmDiskRate > 0 && node.TotalSize > 0)
            {
                var rate = (node.TotalSize - data.AvailableFreeSpace) * 100d / node.TotalSize;
                if (rate >= node.AlarmDiskRate)
                {
                    // 一定时间内不要重复报错,除非错误翻倍
                    var error2 = _cache.Get <Int32>("alarm:DiskRate:" + node.ID);
                    if (error2 == 0 || rate > error2 * 2)
                    {
                        _cache.Set("alarm:DiskRate:" + node.ID, rate, 5 * 60);

                        SendAlarm("disk", node, data, $"[{node.Name}]磁盘告警");
                    }
                }
            }

            // TCP告警
            if (node.AlarmTcp > 0)
            {
                var tcp = data.TcpConnections;
                if (tcp < data.TcpTimeWait)
                {
                    tcp = data.TcpTimeWait;
                }
                if (tcp < data.TcpCloseWait)
                {
                    tcp = data.TcpCloseWait;
                }
                if (tcp >= node.AlarmTcp)
                {
                    // 一定时间内不要重复报错,除非错误翻倍
                    var error2 = _cache.Get <Int32>("alarm:Tcp:" + node.ID);
                    if (error2 == 0 || tcp > error2 * 2)
                    {
                        _cache.Set("alarm:Tcp:" + node.ID, tcp, 5 * 60);

                        SendAlarm("tcp", node, data, $"[{node.Name}]Tcp告警");
                    }
                }
            }

            // 进程告警
            if (!node.AlarmProcesses.IsNullOrEmpty() && !data.Data.IsNullOrEmpty())
            {
                var alarms = node.AlarmProcesses.Split(",", StringSplitOptions.RemoveEmptyEntries);
                var dic    = JsonParser.Decode(data.Data);
                var ps     = (dic["Processes"] as String)?.Split(",", StringSplitOptions.RemoveEmptyEntries);
                if (alarms != null && alarms.Length > 0 && ps != null && ps.Length > 0)
                {
                    // 查找丢失的进程
                    var ps2 = alarms.Where(e => !ps.Contains(e)).ToList();
                    if (ps2.Count > 0)
                    {
                        // 一定时间内不要重复报错
                        var error2 = _cache.Get <Int32>("alarm:Process:" + node.ID);
                        if (error2 == 0 || ps2.Count > error2)
                        {
                            _cache.Set("alarm:Process:" + node.ID, ps2.Count, 5 * 60);

                            SendAlarm("process", node, data, $"[{node.Name}]进程守护告警", ps2.Join());
                        }
                    }
                }
            }
        }