private void ProcessAppTracer(AppTracer app) { // 应用是否需要告警 if (app == null || !app.Enable || app.AlarmThreshold <= 0) { return; } var appId = app.ID; if (!RobotHelper.CanAlarm(app.Category, app.AlarmRobot)) { return; } using var span = _tracer?.NewSpan($"Alarm:{nameof(AppTracer)}"); // 最近一段时间的5分钟级数据 var time = DateTime.Now; var minute = time.Date.AddHours(time.Hour).AddMinutes(time.Minute / 5 * 5); var st = AppMinuteStat.FindByAppIdAndTime(appId, minute); if (st == null) { return; } // 判断告警 if (st.Errors >= app.AlarmThreshold) { // 一定时间内不要重复报错,除非错误翻倍 var error2 = _cache.Get <Int32>("alarm:AppTracer:" + appId); if (error2 == 0 || st.Errors > error2 * 2) { _cache.Set("alarm:AppTracer:" + appId, st.Errors, 5 * 60); var msg = GetMarkdown(app, st, true); RobotHelper.SendAlarm(app.Category, app.AlarmRobot, "系统告警", msg); } } }
private void ProcessRedisData(RedisNode node) { if (!RobotHelper.CanAlarm(node.Category, node.WebHook)) { return; } if (node.AlarmMemoryRate <= 0 || node.AlarmConnections == 0) { return; } // 最新数据 var data = RedisData.FindLast(node.Id); if (data == null) { return; } using var span = _tracer?.NewSpan($"Alarm:{nameof(RedisNode)}"); var actions = new List <Action <StringBuilder> >(); // 内存告警 var rate = data.UsedMemory * 100 / node.MaxMemory; if (rate >= node.AlarmMemoryRate) { // 一定时间内不要重复报错,除非错误翻倍 var error2 = _cache.Get <Int32>("alarm:RedisMemory:" + node.Id); if (error2 == 0 || rate > error2 * 2) { _cache.Set("alarm:RedisMemory:" + node.Id, rate, 5 * 60); actions.Add(sb => sb.AppendLine($">**内存告警:**<font color=\"info\">{rate / 100:p0} >= {node.AlarmMemoryRate / 100:p0}</font>")); } } // 连接数告警 var cs = data.ConnectedClients; if (node.AlarmConnections > 0 && cs >= node.AlarmConnections) { // 一定时间内不要重复报错,除非错误翻倍 var error2 = _cache.Get <Int32>("alarm:RedisConnections:" + node.Id); if (error2 == 0 || cs > error2 * 2) { _cache.Set("alarm:RedisConnections:" + node.Id, cs, 5 * 60); actions.Add(sb => sb.AppendLine($">**连接数告警:**<font color=\"info\">{cs:n0} >= {node.AlarmConnections:n0}</font>")); } } // 速度告警 var speed = data.Speed; if (node.AlarmSpeed > 0 && speed >= node.AlarmSpeed) { // 一定时间内不要重复报错,除非错误翻倍 var error2 = _cache.Get <Int32>("alarm:RedisSpeed:" + node.Id); if (error2 == 0 || speed > error2 * 2) { _cache.Set("alarm:RedisSpeed:" + node.Id, speed, 5 * 60); actions.Add(sb => sb.AppendLine($">**速度告警:**<font color=\"info\">{speed:n0} >= {node.AlarmSpeed:n0}</font>")); } } // 入流量告警 var input = data.InputKbps; if (node.AlarmInputKbps > 0 && input >= node.AlarmInputKbps) { // 一定时间内不要重复报错,除非错误翻倍 var error2 = _cache.Get <Int32>("alarm:RedisInputKbps:" + node.Id); if (error2 == 0 || input > error2 * 2) { _cache.Set("alarm:RedisInputKbps:" + node.Id, input, 5 * 60); actions.Add(sb => sb.AppendLine($">**入流量告警:**<font color=\"info\">{input:n0} >= {node.AlarmInputKbps:n0}</font>")); } } // 出流量告警 var output = data.OutputKbps; if (node.AlarmOutputKbps > 0 && output >= node.AlarmOutputKbps) { // 一定时间内不要重复报错,除非错误翻倍 var error2 = _cache.Get <Int32>("alarm:RedisOutputKbps:" + node.Id); if (error2 == 0 || output > error2 * 2) { _cache.Set("alarm:RedisOutputKbps:" + node.Id, output, 5 * 60); actions.Add(sb => sb.AppendLine($">**出流量告警:**<font color=\"info\">{output:n0} >= {node.AlarmOutputKbps:n0}</font>")); } } if (actions.Count > 0) { var msg = GetMarkdown(node, data, "Redis告警", actions); RobotHelper.SendAlarm(node.Category, node.WebHook, "Redis告警", msg); } }
private void ProcessRedisQueue(RedisNode node) { using var span = _tracer?.NewSpan($"Alarm:{nameof(RedisMessageQueue)}"); // 所有队列 var list = RedisMessageQueue.FindAllByRedisId(node.Id); foreach (var queue in list) { var groupName = !queue.Category.IsNullOrEmpty() ? queue.Category : node.Category; var webhook = !queue.WebHook.IsNullOrEmpty() ? queue.WebHook : node.WebHook; // 判断告警 if (queue.Enable && queue.MaxMessages > 0 && queue.Messages >= queue.MaxMessages && RobotHelper.CanAlarm(groupName, webhook)) { // 一定时间内不要重复报错,除非错误翻倍 var error2 = _cache.Get <Int32>("alarm:RedisMessageQueue:" + queue.Id); if (error2 == 0 || queue.Messages > error2 * 2) { _cache.Set("alarm:RedisMessageQueue:" + queue.Id, queue.Messages, 5 * 60); var msg = GetMarkdown(node, queue, true); RobotHelper.SendAlarm(groupName, webhook, "消息队列告警", msg); } } } }
private void ProcessNode(Node node) { if (node == null || !node.Enable || !RobotHelper.CanAlarm(node.Category, node.WebHook)) { return; } if (node.AlarmCpuRate <= 0 && node.AlarmMemoryRate <= 0 && node.AlarmDiskRate <= 0 && node.AlarmProcesses.IsNullOrEmpty()) { return; } using var span = _tracer?.NewSpan($"Alarm:{nameof(Node)}"); // 最新数据 var data = NodeData.FindLast(node.ID); if (data == null) { return; } // CPU告警 if (node.AlarmCpuRate > 0) { var rate = data.CpuRate * 100; if (rate >= node.AlarmCpuRate) { // 一定时间内不要重复报错,除非错误翻倍 var error2 = _cache.Get <Int32>("alarm:CpuRate:" + node.ID); if (error2 == 0 || rate > error2 * 2) { _cache.Set("alarm:CpuRate:" + node.ID, rate, 5 * 60); SendAlarm("cpu", node, data, $"[{node.Name}]CPU告警"); } } } // 内存告警 if (node.AlarmMemoryRate > 0 && node.Memory > 0) { var rate = (node.Memory - data.AvailableMemory) * 100d / node.Memory; if (rate >= node.AlarmMemoryRate) { // 一定时间内不要重复报错,除非错误翻倍 var error2 = _cache.Get <Int32>("alarm:MemoryRate:" + node.ID); if (error2 == 0 || rate > error2 * 2) { _cache.Set("alarm:MemoryRate:" + node.ID, rate, 5 * 60); SendAlarm("memory", node, data, $"[{node.Name}]内存告警"); } } } // 磁盘告警 if (node.AlarmDiskRate > 0 && node.TotalSize > 0) { var rate = (node.TotalSize - data.AvailableFreeSpace) * 100d / node.TotalSize; if (rate >= node.AlarmDiskRate) { // 一定时间内不要重复报错,除非错误翻倍 var error2 = _cache.Get <Int32>("alarm:DiskRate:" + node.ID); if (error2 == 0 || rate > error2 * 2) { _cache.Set("alarm:DiskRate:" + node.ID, rate, 5 * 60); SendAlarm("disk", node, data, $"[{node.Name}]磁盘告警"); } } } // TCP告警 if (node.AlarmTcp > 0) { var tcp = data.TcpConnections; if (tcp < data.TcpTimeWait) { tcp = data.TcpTimeWait; } if (tcp < data.TcpCloseWait) { tcp = data.TcpCloseWait; } if (tcp >= node.AlarmTcp) { // 一定时间内不要重复报错,除非错误翻倍 var error2 = _cache.Get <Int32>("alarm:Tcp:" + node.ID); if (error2 == 0 || tcp > error2 * 2) { _cache.Set("alarm:Tcp:" + node.ID, tcp, 5 * 60); SendAlarm("tcp", node, data, $"[{node.Name}]Tcp告警"); } } } // 进程告警 if (!node.AlarmProcesses.IsNullOrEmpty() && !data.Data.IsNullOrEmpty()) { var alarms = node.AlarmProcesses.Split(",", StringSplitOptions.RemoveEmptyEntries); var dic = JsonParser.Decode(data.Data); var ps = (dic["Processes"] as String)?.Split(",", StringSplitOptions.RemoveEmptyEntries); if (alarms != null && alarms.Length > 0 && ps != null && ps.Length > 0) { // 查找丢失的进程 var ps2 = alarms.Where(e => !ps.Contains(e)).ToList(); if (ps2.Count > 0) { // 一定时间内不要重复报错 var error2 = _cache.Get <Int32>("alarm:Process:" + node.ID); if (error2 == 0 || ps2.Count > error2) { _cache.Set("alarm:Process:" + node.ID, ps2.Count, 5 * 60); SendAlarm("process", node, data, $"[{node.Name}]进程守护告警", ps2.Join()); } } } } }