You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

120 lines
2.4 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

package judge
import (
"fmt"
"os"
"sync"
"time"
"github.com/didi/nightingale/v5/models"
"github.com/toolkits/pkg/logger"
)
// rule_id -> hash_id -> *models.AlertEvent
type SafeEventMap struct {
sync.RWMutex
M map[int64]map[string]*models.AlertEvent
}
var (
LastEvents = &SafeEventMap{M: make(map[int64]map[string]*models.AlertEvent)}
)
func (s *SafeEventMap) Get(ruleId int64, hashId string) (*models.AlertEvent, bool) {
s.RLock()
defer s.RUnlock()
m, has := s.M[ruleId]
if !has {
return nil, false
}
event, has := m[hashId]
return event, has
}
func (s *SafeEventMap) Set(event *models.AlertEvent) {
s.Lock()
defer s.Unlock()
_, has := s.M[event.RuleId]
if !has {
m := make(map[string]*models.AlertEvent)
m[event.HashId] = event
s.M[event.RuleId] = m
} else {
s.M[event.RuleId][event.HashId] = event
}
}
func (s *SafeEventMap) Init() {
aes, err := models.AlertEventGetAll()
if err != nil {
fmt.Println("load all alert_event fail:", err)
os.Exit(1)
}
if len(aes) == 0 {
return
}
data := make(map[int64]map[string]*models.AlertEvent)
for i := 0; i < len(aes); i++ {
event := aes[i]
_, has := data[event.RuleId]
if !has {
m := make(map[string]*models.AlertEvent)
m[event.HashId] = event
data[event.RuleId] = m
} else {
data[event.RuleId][event.HashId] = event
}
}
s.Lock()
s.M = data
s.Unlock()
}
func (s *SafeEventMap) Del(ruleId int64, hashId string) {
s.Lock()
defer s.Unlock()
_, has := s.M[ruleId]
if !has {
return
}
delete(s.M[ruleId], hashId)
}
func (s *SafeEventMap) DeleteOrSendRecovery(ruleId int64, toKeepKeys map[string]struct{}) {
s.Lock()
defer s.Unlock()
m, has := s.M[ruleId]
if !has {
return
}
for k, ev := range m {
if _, loaded := toKeepKeys[k]; loaded {
continue
}
// 如果因为promql修改导致本来是告警状态变成了恢复也接受
logger.Debugf("[to_del][ev.IsRecovery:%+v][ev.LastSend:%+v]", ev.IsRecovery, ev.LastSend)
// promql 没查询到结果,需要将告警标记为已恢复并发送
// 同时需要满足 已经发送过触发信息,并且时间差满足 大于AlertDuration
// 为了避免 发送告警后 一个点 断点了就立即发送恢复信息的case
now := time.Now().Unix()
if ev.IsAlert() && ev.LastSend && now-ev.TriggerTime > ev.AlertDuration {
logger.Debugf("[prom.alert.MarkRecov][ev.RuleName:%v]", ev.RuleName)
ev.MarkRecov()
EventQueue.PushFront(ev)
delete(s.M[ruleId], k)
}
}
}