运维开发网

k8s-promether报警规则及alertmanager报警配置(二)

运维开发网 https://www.qedev.com 2020-12-08 13:39 出处:51CTO 作者:wangyang_2008
接上一个部署文档,此篇文档是在上篇基础上增加报警时候显示当前值,及报警图形连接功能,点击链接就可查看具体的图形了只要在我上篇文档中更改app.py那个报警脚本,然后重新编译镜像即可。下面是完整代码:#!/usr/bin/envpythonimporttime,io,sys,arrow,ossys.stdout=io.TextIOWrapper(sys.stdout.detach(),encodin

接上一个部署文档,此篇文档是在上篇基础上增加报警时候显示当前值,及报警图形连接功能,点击链接就可查看具体的图形了

只要在我上篇文档中更改app.py那个报警脚本,然后重新编译镜像即可。下面是完整代码:

#!/usr/bin/env python

import time,io, sys,arrow,os

sys.stdout = io.TextIOWrapper(sys.stdout.detach(), encoding='utf-8')

sys.stderr = io.TextIOWrapper(sys.stderr.detach(), encoding='utf-8')

from flask import Flask, Response

from flask import request

import requests

import logging

import json

import locale

#locale.setlocale(locale.LC_ALL,"en_US.UTF-8")

app = Flask(name)

console = logging.StreamHandler()

fmt = '%(asctime)s - %(filename)s:%(lineno)s - %(name)s - %(message)s'

formatter = logging.Formatter(fmt)

console.setFormatter(formatter)

log = logging.getLogger("flask_webhook_dingtalk")

log.addHandler(console)

log.setLevel(logging.DEBUG)

EXCLUDE_LIST = ['prometheus', 'endpoint']

@app.route('/')

def index():

return 'Webhook Dingtalk by Billy https://blog.51cto.com/billy98'@app.route('/dingtalk/k8s/',methods=['POST'])
br/>@app.route('/dingtalk/k8s/',methods=['POST'])

profile_url = sys.argv[1]
post_data = request.get_data()
post_data = json.loads(post_data.decode("utf-8"))['alerts']
post_data = post_data[0]
messa_list = []
graph = post_data['generatorURL'] 
host = 'http://10.1.1.1:30100'
graph = graph.split(':9090')
graph = graph[1].replace('tab=1','tab=0')
graph_link = host + graph
if post_data['status'].upper() == "FIRING":
   messa_list.append('### 报警名称: Prometheus-alert')
   messa_list.append('**报警状态: 异常**')
   messa_list.append('**报警时间: %s**' % arrow.get(post_data['startsAt']).to('Asia/Shanghai').format('YYYY-MM-DD HH:mm:ss ZZ'))
   messa_list.append('**报警级别: %s**' % post_data['labels']['severity'])
   messa_list.append('**报警类型: %s**' % post_data['labels']['alertname'])
   messa_list.append('**报警详情: %s**' % post_data['annotations']['message'])
   messa_list.append('**报警图形: %s**' % graph_link)
   messa = (' \\n\\n > '.join(messa_list))
else:
   messa_list.append('### 报警名称: Prometheus-alert')
   messa_list.append('**报警状态: 恢复**')
   messa_list.append('**报警时间: %s**' % arrow.get(post_data['startsAt']).to('Asia/Shanghai').format('YYYY-MM-DD HH:mm:ss ZZ'))
   messa_list.append('**恢复时间: %s**' % arrow.get(post_data['endsAt']).to('Asia/Shanghai').format('YYYY-MM-DD HH:mm:ss ZZ'))
   messa_list.append('**报警级别: %s**' % post_data['labels']['severity'])
   messa_list.append('**报警类型: %s**' % post_data['labels']['alertname'])
   messa_list.append('**报警详情: %s**' % post_data['annotations']['message'])
   messa_list.append('**报警图形: %s**' % graph_link)
   messa = (' \\n\\n > '.join(messa_list))
status = alert_data(messa, post_data['labels']['alertname'], profile_url )
log.info(status)
return status

def alert_data(data,title,profile_url):

headers = {'Content-Type':'application/json'}

send_data = '{"msgtype": "markdown","markdown": {"title": \"%s\" ,"text": \"%s\" }}' %(title,data) # type: str

send_data = send_data.encode('utf-8')

reps = requests.post(url=profile_url, data=send_data, headers=headers)

return reps.text

@app.route('/dingtalk/w/',methods=['POST'])

def hander_session_w():

profile_url_w = sys.argv[2]
post_data_w = request.get_data()
post_data_w = json.loads(post_data_w.decode("utf-8"))['alerts']
post_data_w = post_data_w[0]
messa_list_w = []
if post_data_w['status'].upper() == "FIRING":
   messa_list_w.append('### 报警名称: Prometheus-alert')
   messa_list_w.append('**报警状态: 异常**')
   messa_list_w.append('**报警时间: %s**' % arrow.get(post_data_w['startsAt']).to('Asia/Shanghai').format('YYYY-MM-DD HH:mm:ss ZZ'))
   messa_list_w.append('**报警级别: %s**' % post_data_w['labels']['severity'])
   messa_list_w.append('**报警类型: %s**' % post_data_w['labels']['alertname'])
   messa_list_w.append('**报警详情: %s**' % post_data_w['annotations']['message'])
   messa_w = (' \\n\\n > '.join(messa_list_w))
else:
   messa_list_w.append('### 报警名称: Prometheus-alert')
   messa_list_w.append('**报警状态: 恢复**')
   messa_list_w.append('**报警时间: %s**' % arrow.get(post_data_w['startsAt']).to('Asia/Shanghai').format('YYYY-MM-DD HH:mm:ss ZZ'))
   messa_list_w.append('**恢复时间: %s**' % arrow.get(post_data_w['endsAt']).to('Asia/Shanghai').format('YYYY-MM-DD HH:mm:ss ZZ'))
   messa_list_w.append('**报警级别: %s**' % post_data_w['labels']['severity'])
   messa_list_w.append('**报警类型: %s**' % post_data_w['labels']['alertname'])
   messa_list_w.append('**报警详情: %s**' % post_data_w['annotations']['message'])
   messa_w = (' \\n\\n > '.join(messa_list_w))
status_w = alert_data_w(messa_w, post_data_w['labels']['alertname'], profile_url_w )
log.info(status_w)
return status_w

def alert_data_w(data_w,title_w,profile_url_w):

headers_w = {'Content-Type':'application/json'}

send_data_w = '{"msgtype": "markdown","markdown": {"title": \"%s\" ,"text": \"%s\" }}' %(title_w,data_w) # type: str

send_data_w = send_data_w.encode('utf-8')

reps_w = requests.post(url=profile_url_w, data=send_data_w, headers=headers_w)

return reps_w.text

if name == 'main':

app.debug = False

app.run(host='0.0.0.0', port='8080')

重新编译镜像即可。

注:此次也增加了支持多个钉钉报警的配置,我这里只是增加了两个一个是/dingtalk/k8s/和/dingtalk/w/这两个,大家根据需要更改即可。

k8s的alertmanager配置文件如下:

apiVersion: v1

data: {}

kind: Secret

metadata:

name: alertmanager-main

namespace: monitoring

stringData:

alertmanager.yaml: |-

global:

resolve_timeout: 5m #超时,默认5min

#邮箱smtp服务

smtp_smarthost: 'smtp.qiye.aliyun.com:465'

smtp_from: 'monitor-admin@abc.net'

smtp_auth_username: 'monitor-admin@abc.net'

smtp_auth_password: 'abc@2015'

smtp_require_tls: false

smtp_hello: 'abc.net'

#路由

route:

receiver: mail

group_by: ["instance"] # 分组名

group_wait: 30s # 当收到告警的时候,等待三十秒看是否还有告警,如果有就一起发出去

group_interval: 1m # 发送警告间隔时间

repeat_interval: 1m # 重复报警的间隔时间

routes:

  • receiver: dingding # 全局报警组,这个参数是必选的,和下面报警组名要相同

    group_wait: 10s

    match:

    namespace: wy

    receivers:

    • name: 'mail' # 报警组名

      email_configs:

  • to: 'abc@abc.net' # 发送给谁
    • name: 'dingding'

      webhook_configs:

  • url: 'http://webhook-dingtalk/dingtalk/k8s/' #这里也可以选择/w/那个,这样就可以发送给不同的接收钉钉机器人了。

    send_resolved: true

    type: Opaque

    dingding.yaml文件如下:

apiVersion: apps/v1

kind: Deployment

metadata:

labels:

app: webhook-dingtalk

name: webhook-dingtalk

namespace: monitoring

#需要和alertmanager在同一个namespace

spec:

replicas: 1

selector:

matchLabels:

app: webhook-dingtalk

template:

metadata:

labels:

app: webhook-dingtalk

spec:

containers:

  • image: di:v10

    name: webhook-dingtalk

    args:

    #这里就是要写入针对app.py脚本里的传入的参数了,第一个就是发送/dingtalk/k8s用的钉钉webhook

    • "https://oapi.dingtalk.com/robot/send?access_token=xxxx"

      #这里就是要写入针对app.py脚本里的传入的参数了,第一个就是发送/dingtalk/w用的钉钉webhook

    • "https://oapi.dingtalk.com/robot/send?access_token=yyyy"

      #上面创建的钉钉机器人hook

      ports:

    • containerPort: 8080

      protocol: TCP

      resources:

      requests:

      cpu: 100m

      memory: 100Mi

      limits:

      cpu: 500m

      memory: 500Mi

      livenessProbe:

      failureThreshold: 3

      initialDelaySeconds: 30

      periodSeconds: 10

      successThreshold: 1

      timeoutSeconds: 1

      tcpSocket:

      port: 8080

      readinessProbe:

      failureThreshold: 3

      initialDelaySeconds: 30

      periodSeconds: 10

      successThreshold: 1

      timeoutSeconds: 1

      httpGet:

      port: 8080

      path: /

      imagePullSecrets:

    • name: IfNotPresent

      apiVersion: v1

      kind: Service

      metadata:

      labels:

      app: webhook-dingtalk

      name: webhook-dingtalk

      namespace: monitoring

      #需要和alertmanager在同一个namespace

      spec:

      ports:

      • name: http

        port: 80

        protocol: TCP

        targetPort: 8080

        selector:

        app: webhook-dingtalk

        type: ClusterIP

更多相关kubernetes相关知识请移步[https://www.wangshuying.cn]

0

精彩评论

暂无评论...
验证码 换一张
取 消