This commit is contained in:
2025-11-16 12:36:02 +09:00
parent 3a25e6a4cb
commit eb3f3807fd
61 changed files with 1438 additions and 1139 deletions

67
monitoring/alerts.yml Normal file
View File

@@ -0,0 +1,67 @@
# Правила алертов для мониторинга Lottery Bot
groups:
- name: lottery_bot_alerts
rules:
# Алерт при падении сервиса
- alert: LotteryBotDown
expr: up{job="lottery-bot"} == 0
for: 1m
labels:
severity: critical
annotations:
summary: "Lottery Bot is down"
description: "Lottery Bot has been down for more than 1 minute."
# Алерт при высокой нагрузке на память
- alert: HighMemoryUsage
expr: (process_resident_memory_bytes / process_virtual_memory_max_bytes) > 0.9
for: 5m
labels:
severity: warning
annotations:
summary: "High memory usage detected"
description: "Memory usage is above 90% for more than 5 minutes."
- name: database_alerts
rules:
# Алерт при недоступности PostgreSQL
- alert: PostgreSQLDown
expr: up{job="postgres"} == 0
for: 2m
labels:
severity: critical
annotations:
summary: "PostgreSQL is down"
description: "PostgreSQL database has been down for more than 2 minutes."
# Алерт при недоступности Redis
- alert: RedisDown
expr: up{job="redis"} == 0
for: 2m
labels:
severity: warning
annotations:
summary: "Redis is down"
description: "Redis cache has been down for more than 2 minutes."
- name: performance_alerts
rules:
# Алерт при высокой загрузке CPU
- alert: HighCPUUsage
expr: rate(process_cpu_seconds_total[5m]) * 100 > 80
for: 10m
labels:
severity: warning
annotations:
summary: "High CPU usage detected"
description: "CPU usage is above 80% for more than 10 minutes."
# Алерт при большом количестве ошибок
- alert: HighErrorRate
expr: rate(lottery_bot_errors_total[5m]) > 0.1
for: 3m
labels:
severity: warning
annotations:
summary: "High error rate detected"
description: "Error rate is above 0.1 errors per second for more than 3 minutes."