Files
new_lottery_bot/monitoring/alerts.yml
2025-11-16 12:36:02 +09:00

67 lines
2.3 KiB
YAML

# Правила алертов для мониторинга Lottery Bot
groups:
- name: lottery_bot_alerts
rules:
# Алерт при падении сервиса
- alert: LotteryBotDown
expr: up{job="lottery-bot"} == 0
for: 1m
labels:
severity: critical
annotations:
summary: "Lottery Bot is down"
description: "Lottery Bot has been down for more than 1 minute."
# Алерт при высокой нагрузке на память
- alert: HighMemoryUsage
expr: (process_resident_memory_bytes / process_virtual_memory_max_bytes) > 0.9
for: 5m
labels:
severity: warning
annotations:
summary: "High memory usage detected"
description: "Memory usage is above 90% for more than 5 minutes."
- name: database_alerts
rules:
# Алерт при недоступности PostgreSQL
- alert: PostgreSQLDown
expr: up{job="postgres"} == 0
for: 2m
labels:
severity: critical
annotations:
summary: "PostgreSQL is down"
description: "PostgreSQL database has been down for more than 2 minutes."
# Алерт при недоступности Redis
- alert: RedisDown
expr: up{job="redis"} == 0
for: 2m
labels:
severity: warning
annotations:
summary: "Redis is down"
description: "Redis cache has been down for more than 2 minutes."
- name: performance_alerts
rules:
# Алерт при высокой загрузке CPU
- alert: HighCPUUsage
expr: rate(process_cpu_seconds_total[5m]) * 100 > 80
for: 10m
labels:
severity: warning
annotations:
summary: "High CPU usage detected"
description: "CPU usage is above 80% for more than 10 minutes."
# Алерт при большом количестве ошибок
- alert: HighErrorRate
expr: rate(lottery_bot_errors_total[5m]) > 0.1
for: 3m
labels:
severity: warning
annotations:
summary: "High error rate detected"
description: "Error rate is above 0.1 errors per second for more than 3 minutes."