From 2d1544548236b1e1ca870d53cb4497449491af7e Mon Sep 17 00:00:00 2001 From: ning1875 <907974064@qq.com> Date: Thu, 22 Jul 2021 15:23:49 +0800 Subject: [PATCH] =?UTF-8?q?=E6=96=B0=E5=A2=9Eblackbox=5Fexporter=E6=94=AF?= =?UTF-8?q?=E6=8C=81=20(#740)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * 1. notify.py 支持安装channel反射发送 2. 支持钉钉群发送 3. 生成告警模板信息 * 1. notify.py 支持安装channel反射发送 2. 支持钉钉群发送 3. 增加二开说明 * 1. notify.py 用户创建一个虚拟的用户保存上述im群 的机器人token信息 user的contacts map中 * 1. notify.py alerts目录改为原来的 * 1. notify.py dingtalk send continue匹配 * 1. push型告警支持多条件 任意一个触发就触发 * 1. prometheus查询接口 tag-keys tag-values支持 params为空的情况 * 1. prometheus查询接口 ident匹配全部改为精确匹配 2. tagKey 提示改为tag_key * 1. prometheus查询接口 支持instance_query 对外暴露 * 1. prometheus instance_query改名为instant-query 2. page group中去掉数据查询相关path * 1. prometheus range_query 时间戳改为秒级 2. 查询支持传入分辨率参数 * 1. 新增jmx_exporter内置大盘 * 1. 新增blackbox_exporter内置大盘 2. 新增blackbox_exporter内置告警策略 --- etc/alert_rule/blackbox_exporter | 191 ++++++++++++++++++++++++++ etc/dashboard/blackbox_exporter | 226 +++++++++++++++++++++++++++++++ 2 files changed, 417 insertions(+) create mode 100644 etc/alert_rule/blackbox_exporter create mode 100644 etc/dashboard/blackbox_exporter diff --git a/etc/alert_rule/blackbox_exporter b/etc/alert_rule/blackbox_exporter new file mode 100644 index 00000000..e5b7c9a7 --- /dev/null +++ b/etc/alert_rule/blackbox_exporter @@ -0,0 +1,191 @@ +[ + { + "name": "dns解析时间超过2秒", + "type": 1, + "expression": { + "evaluation_interval": 10, + "promql": "probe_dns_lookup_time_seconds>2" + }, + "status": 0, + "enable_stime": "00:00", + "enable_etime": "23:59", + "enable_days_of_week": "1 2 3 4 5 6 7", + "recovery_notify": 0, + "priority": 3, + "notify_channels": "", + "runbook_url": "", + "note": "", + "create_at": 1626935980, + "alert_duration": 60, + "notify_users_detail": null, + "notify_groups_detail": null + }, + { + "name": "https证书过期时间小于7天", + "type": 1, + "expression": { + "evaluation_interval": 10, + "promql": "(probe_ssl_earliest_cert_expiry - time()) / 3600 / 24 <7" + }, + "status": 0, + "enable_stime": "00:00", + "enable_etime": "23:59", + "enable_days_of_week": "1 2 3 4 5 6 7", + "recovery_notify": 0, + "priority": 1, + "notify_channels": "", + "runbook_url": "", + "note": "", + "create_at": 1626935909, + "alert_duration": 60, + "notify_users_detail": null, + "notify_groups_detail": null + }, + { + "name": "http响应数据传输占比超过70%", + "type": 1, + "expression": { + "evaluation_interval": 10, + "promql": "100 * avg(probe_http_duration_seconds{phase=\"transfer\"})by(instance) / sum(probe_http_duration_seconds) by(instance) >70" + }, + "status": 0, + "enable_stime": "00:00", + "enable_etime": "23:59", + "enable_days_of_week": "1 2 3 4 5 6 7", + "recovery_notify": 0, + "priority": 2, + "notify_channels": "", + "runbook_url": "", + "note": "", + "create_at": 1626936324, + "alert_duration": 60, + "notify_users_detail": null, + "notify_groups_detail": null + }, + { + "name": "http接口探测失败", + "type": 1, + "expression": { + "evaluation_interval": 10, + "promql": "probe_success{job=~\".*http.*\"}==0" + }, + "status": 0, + "enable_stime": "00:00", + "enable_etime": "23:59", + "enable_days_of_week": "1 2 3 4 5 6 7", + "recovery_notify": 0, + "priority": 1, + "notify_channels": "", + "runbook_url": "", + "note": "", + "create_at": 1626935627, + "alert_duration": 60, + "notify_users_detail": null, + "notify_groups_detail": null + }, + { + "name": "http接口探测耗时超过3秒", + "type": 1, + "expression": { + "evaluation_interval": 10, + "promql": "sum(probe_http_duration_seconds) by (instance) >3\n" + }, + "status": 0, + "enable_stime": "00:00", + "enable_etime": "23:59", + "enable_days_of_week": "1 2 3 4 5 6 7", + "recovery_notify": 0, + "priority": 3, + "notify_channels": "", + "runbook_url": "", + "note": "", + "create_at": 1626936059, + "alert_duration": 60, + "notify_users_detail": null, + "notify_groups_detail": null + }, + { + "name": "http接口返回状态码4xx/5xx错误", + "type": 1, + "expression": { + "evaluation_interval": 10, + "promql": "probe_http_status_code >=400" + }, + "status": 0, + "enable_stime": "00:00", + "enable_etime": "23:59", + "enable_days_of_week": "1 2 3 4 5 6 7", + "recovery_notify": 0, + "priority": 1, + "notify_channels": "", + "runbook_url": "", + "note": "", + "create_at": 1626936145, + "alert_duration": 60, + "notify_users_detail": null, + "notify_groups_detail": null + }, + { + "name": "icmp探测失败", + "type": 1, + "expression": { + "evaluation_interval": 10, + "promql": "probe_success{job=~\".*icmp.*\"}==0" + }, + "status": 0, + "enable_stime": "00:00", + "enable_etime": "23:59", + "enable_days_of_week": "1 2 3 4 5 6 7", + "recovery_notify": 0, + "priority": 1, + "notify_channels": "", + "runbook_url": "", + "note": "", + "create_at": 1626935855, + "alert_duration": 60, + "notify_users_detail": null, + "notify_groups_detail": null + }, + { + "name": "tcp端口探测失败", + "type": 1, + "expression": { + "evaluation_interval": 10, + "promql": "probe_success{job=~\".*tcp.*\"}==0" + }, + "status": 0, + "enable_stime": "00:00", + "enable_etime": "23:59", + "enable_days_of_week": "1 2 3 4 5 6 7", + "recovery_notify": 0, + "priority": 1, + "notify_channels": "", + "runbook_url": "", + "note": "", + "create_at": 1626935874, + "alert_duration": 60, + "notify_users_detail": null, + "notify_groups_detail": null + }, + { + "name": "机器ssh探测失败", + "type": 1, + "expression": { + "evaluation_interval": 10, + "promql": "probe_success{job=~\".*ssh.*\"}==0\n" + }, + "status": 0, + "enable_stime": "00:00", + "enable_etime": "23:59", + "enable_days_of_week": "1 2 3 4 5 6 7", + "recovery_notify": 0, + "priority": 1, + "notify_channels": "", + "runbook_url": "", + "note": "", + "create_at": 1626935827, + "alert_duration": 60, + "notify_users_detail": null, + "notify_groups_detail": null + } +] \ No newline at end of file diff --git a/etc/dashboard/blackbox_exporter b/etc/dashboard/blackbox_exporter new file mode 100644 index 00000000..84ff9a58 --- /dev/null +++ b/etc/dashboard/blackbox_exporter @@ -0,0 +1,226 @@ +[ + { + "id": 0, + "name": "blackbox_exporter", + "tags": "", + "configs": "{\"tags\":[{\"tagName\":\"http_probe_job\",\"key\":\"job\",\"value\":\"blackbox-http\",\"prefix\":false},{\"tagName\":\"http_probe_instance\",\"key\":\"instance\",\"value\":\"*\",\"prefix\":false}]}", + "chart_groups": [ + { + "id": 0, + "dashboard_id": 0, + "name": "http接口探测", + "weight": 0, + "charts": [ + { + "id": 440, + "group_id": 109, + "configs": "{\"name\":\"https的探测\",\"mode\":\"promethues\",\"prome_ql\":[\"probe_http_ssl==1\"],\"layout\":{\"h\":2,\"w\":6,\"x\":0,\"y\":0,\"i\":\"0\"}}", + "weight": 0 + }, + { + "id": 441, + "group_id": 109, + "configs": "{\"name\":\"http的探测\",\"mode\":\"promethues\",\"prome_ql\":[\"probe_http_ssl==0\"],\"layout\":{\"h\":2,\"w\":6,\"x\":6,\"y\":0,\"i\":\"1\"}}", + "weight": 0 + }, + { + "id": 442, + "group_id": 109, + "configs": "{\"name\":\"https探测目标个数\",\"mode\":\"promethues\",\"prome_ql\":[\"count(probe_http_ssl==1)\"],\"layout\":{\"h\":2,\"w\":6,\"x\":12,\"y\":0,\"i\":\"2\"}}", + "weight": 0 + }, + { + "id": 443, + "group_id": 109, + "configs": "{\"name\":\"http探测目标个数\",\"mode\":\"promethues\",\"prome_ql\":[\"count(probe_http_ssl==0)\"],\"layout\":{\"h\":2,\"w\":6,\"x\":18,\"y\":0,\"i\":\"3\"}}", + "weight": 0 + }, + { + "id": 446, + "group_id": 109, + "configs": "{\"name\":\"http探测成功个数\",\"mode\":\"promethues\",\"prome_ql\":[\"count(probe_success{job=~\\\".*http.*\\\"}==1)\"],\"layout\":{\"h\":2,\"w\":6,\"x\":6,\"y\":2,\"i\":\"4\"}}", + "weight": 0 + }, + { + "id": 447, + "group_id": 109, + "configs": "{\"name\":\"http探测失败列表\",\"mode\":\"promethues\",\"prome_ql\":[\"probe_success{job=~\\\".*http.*\\\"}==0\"],\"layout\":{\"h\":2,\"w\":6,\"x\":12,\"y\":2,\"i\":\"5\"}}", + "weight": 0 + }, + { + "id": 448, + "group_id": 109, + "configs": "{\"name\":\"http探测失败个数\",\"mode\":\"promethues\",\"prome_ql\":[\"count(probe_success{job=~\\\".*http.*\\\"}==0)\"],\"layout\":{\"h\":2,\"w\":6,\"x\":0,\"y\":2,\"i\":\"6\"}}", + "weight": 0 + }, + { + "id": 449, + "group_id": 109, + "configs": "{\"name\":\"http探测总耗时 单位秒\",\"mode\":\"promethues\",\"prome_ql\":[\"sum(probe_http_duration_seconds) by (instance)\"],\"layout\":{\"h\":2,\"w\":6,\"x\":18,\"y\":2,\"i\":\"7\"}}", + "weight": 0 + } + ] + }, + { + "id": 0, + "dashboard_id": 0, + "name": "https接口探测汇总", + "weight": 1, + "charts": [ + { + "id": 444, + "group_id": 110, + "configs": "{\"name\":\"tls版本信息\",\"mode\":\"promethues\",\"prome_ql\":[\"probe_tls_version_info\"],\"layout\":{\"h\":2,\"w\":6,\"x\":0,\"y\":0,\"i\":\"0\"}}", + "weight": 0 + }, + { + "id": 445, + "group_id": 110, + "configs": "{\"name\":\"tls证书过期时间 单位:天\",\"mode\":\"promethues\",\"prome_ql\":[\"(probe_ssl_earliest_cert_expiry - time()) / 3600 / 24\"],\"layout\":{\"h\":2,\"w\":6,\"x\":6,\"y\":0,\"i\":\"1\"}}", + "weight": 0 + } + ] + }, + { + "id": 0, + "dashboard_id": 0, + "name": "http接口各阶段耗时详情", + "weight": 2, + "charts": [ + { + "id": 450, + "group_id": 111, + "configs": "{\"name\":\"单个目标的各阶段耗时\",\"mode\":\"promethues\",\"prome_ql\":[\"probe_http_duration_seconds{instance=~\\\"$instance\\\"}\"],\"layout\":{\"h\":2,\"w\":6,\"x\":0,\"y\":0,\"i\":\"0\"}}", + "weight": 0 + }, + { + "id": 451, + "group_id": 111, + "configs": "{\"name\":\"[阶段1] dns解析时间\",\"mode\":\"promethues\",\"prome_ql\":[\"probe_http_duration_seconds{instance=~\\\"$instance\\\",phase=\\\"resolve\\\"}\"],\"layout\":{\"h\":2,\"w\":6,\"x\":6,\"y\":0,\"i\":\"1\"}}", + "weight": 0 + }, + { + "id": 452, + "group_id": 111, + "configs": "{\"name\":\"[可无]tls握手时间\",\"mode\":\"promethues\",\"prome_ql\":[\"probe_http_duration_seconds{instance=~\\\"$instance\\\",phase=\\\"tls\\\"}\"],\"layout\":{\"h\":2,\"w\":6,\"x\":12,\"y\":0,\"i\":\"2\"}}", + "weight": 0 + }, + { + "id": 453, + "group_id": 111, + "configs": "{\"name\":\"[阶段2] tcp连接耗时\",\"mode\":\"promethues\",\"prome_ql\":[\"probe_http_duration_seconds{instance=~\\\"$instance\\\",phase=\\\"connect\\\"}\"],\"layout\":{\"h\":2,\"w\":6,\"x\":18,\"y\":0,\"i\":\"3\"}}", + "weight": 0 + }, + { + "id": 454, + "group_id": 111, + "configs": "{\"name\":\"[阶段3] 服务端处理耗时\",\"mode\":\"promethues\",\"prome_ql\":[\"probe_http_duration_seconds{instance=~\\\"$instance\\\",phase=\\\"processing\\\"}\"],\"layout\":{\"h\":2,\"w\":6,\"x\":0,\"y\":2,\"i\":\"4\"}}", + "weight": 0 + }, + { + "id": 455, + "group_id": 111, + "configs": "{\"name\":\"[阶段4] 传输响应耗时\",\"mode\":\"promethues\",\"prome_ql\":[\"probe_http_duration_seconds{instance=~\\\"$instance\\\",phase=\\\"transfer\\\"}\"],\"layout\":{\"h\":2,\"w\":6,\"x\":6,\"y\":2,\"i\":\"5\"}}", + "weight": 0 + } + ] + }, + { + "id": 0, + "dashboard_id": 0, + "name": "ssh存活探测(配置了ssh探测job才有)", + "weight": 3, + "charts": [ + { + "id": 456, + "group_id": 112, + "configs": "{\"name\":\"ssh探测成功个数\",\"mode\":\"promethues\",\"prome_ql\":[\"count(probe_success{job=~\\\".*ssh.*\\\"}==1)\"],\"layout\":{\"h\":2,\"w\":6,\"x\":0,\"y\":0,\"i\":\"0\"}}", + "weight": 0 + }, + { + "id": 457, + "group_id": 112, + "configs": "{\"name\":\"ssh探测失败个数\",\"mode\":\"promethues\",\"prome_ql\":[\"count(probe_success{job=~\\\".*ssh.*\\\"}==0)\"],\"layout\":{\"h\":2,\"w\":6,\"x\":6,\"y\":0,\"i\":\"1\"}}", + "weight": 0 + }, + { + "id": 458, + "group_id": 112, + "configs": "{\"name\":\"ssh探测失败详情\",\"mode\":\"promethues\",\"prome_ql\":[\"probe_success{job=~\\\".*ssh.*\\\"}==0\"],\"layout\":{\"h\":2,\"w\":6,\"x\":12,\"y\":0,\"i\":\"2\"}}", + "weight": 0 + }, + { + "id": 459, + "group_id": 112, + "configs": "{\"name\":\"ssh探测耗时\",\"mode\":\"promethues\",\"prome_ql\":[\"probe_duration_seconds{job=~\\\".*ssh.*\\\"}\"],\"layout\":{\"h\":2,\"w\":6,\"x\":18,\"y\":0,\"i\":\"3\"}}", + "weight": 0 + } + ] + }, + { + "id": 0, + "dashboard_id": 0, + "name": "icmp探测(配置了icmp探测job才有)", + "weight": 4, + "charts": [ + { + "id": 460, + "group_id": 113, + "configs": "{\"name\":\"icmp探测成功个数\",\"mode\":\"promethues\",\"prome_ql\":[\"count(probe_success{job=~\\\".*icmp.*\\\"}==1)\"],\"layout\":{\"h\":2,\"w\":6,\"x\":0,\"y\":0,\"i\":\"0\"}}", + "weight": 0 + }, + { + "id": 461, + "group_id": 113, + "configs": "{\"name\":\"icmp探测失败个数\",\"mode\":\"promethues\",\"prome_ql\":[\"count(probe_success{job=~\\\".*icmp.*\\\"}==0)\"],\"layout\":{\"h\":2,\"w\":6,\"x\":6,\"y\":0,\"i\":\"1\"}}", + "weight": 0 + }, + { + "id": 462, + "group_id": 113, + "configs": "{\"name\":\"icmp探测失败详情\",\"mode\":\"promethues\",\"prome_ql\":[\"probe_success{job=~\\\".*icmp.*\\\"}==0\"],\"layout\":{\"h\":2,\"w\":6,\"x\":12,\"y\":0,\"i\":\"2\"}}", + "weight": 0 + }, + { + "id": 463, + "group_id": 113, + "configs": "{\"name\":\"icmp探测总耗时\",\"mode\":\"promethues\",\"prome_ql\":[\"probe_duration_seconds{job=~\\\".*icmp.*\\\"}\"],\"layout\":{\"h\":2,\"w\":6,\"x\":18,\"y\":0,\"i\":\"3\"}}", + "weight": 0 + } + ] + }, + { + "id": 0, + "dashboard_id": 0, + "name": "tcp端口探测(配置了tcp探测job才有)", + "weight": 5, + "charts": [ + { + "id": 464, + "group_id": 114, + "configs": "{\"name\":\"tcp端口探测成功个数\",\"mode\":\"promethues\",\"prome_ql\":[\"count(probe_success{job=~\\\".*tcp.*\\\"}==1)\"],\"layout\":{\"h\":2,\"w\":6,\"x\":0,\"y\":0,\"i\":\"0\"}}", + "weight": 0 + }, + { + "id": 465, + "group_id": 114, + "configs": "{\"name\":\"tcp端口探测失败个数\",\"mode\":\"promethues\",\"prome_ql\":[\"count(probe_success{job=~\\\".*tcp.*\\\"}==0)\"],\"layout\":{\"h\":2,\"w\":6,\"x\":6,\"y\":0,\"i\":\"1\"}}", + "weight": 0 + }, + { + "id": 466, + "group_id": 114, + "configs": "{\"name\":\"tcp端口探测失败列表\",\"mode\":\"promethues\",\"prome_ql\":[\"probe_success{job=~\\\".*tcp.*\\\"}==0\"],\"layout\":{\"h\":2,\"w\":6,\"x\":12,\"y\":0,\"i\":\"2\"}}", + "weight": 0 + }, + { + "id": 467, + "group_id": 114, + "configs": "{\"name\":\"tcp端口探测耗时\",\"mode\":\"promethues\",\"prome_ql\":[\"probe_duration_seconds{job=~\\\".*tcp.*\\\"}\"],\"layout\":{\"h\":2,\"w\":6,\"x\":18,\"y\":0,\"i\":\"3\"}}", + "weight": 0 + } + ] + } + ] + } +] \ No newline at end of file