# Правила алертов для мониторинга Lottery Bot groups: - name: lottery_bot_alerts rules: # Алерт при падении сервиса - alert: LotteryBotDown expr: up{job="lottery-bot"} == 0 for: 1m labels: severity: critical annotations: summary: "Lottery Bot is down" description: "Lottery Bot has been down for more than 1 minute." # Алерт при высокой нагрузке на память - alert: HighMemoryUsage expr: (process_resident_memory_bytes / process_virtual_memory_max_bytes) > 0.9 for: 5m labels: severity: warning annotations: summary: "High memory usage detected" description: "Memory usage is above 90% for more than 5 minutes." - name: database_alerts rules: # Алерт при недоступности PostgreSQL - alert: PostgreSQLDown expr: up{job="postgres"} == 0 for: 2m labels: severity: critical annotations: summary: "PostgreSQL is down" description: "PostgreSQL database has been down for more than 2 minutes." # Алерт при недоступности Redis - alert: RedisDown expr: up{job="redis"} == 0 for: 2m labels: severity: warning annotations: summary: "Redis is down" description: "Redis cache has been down for more than 2 minutes." - name: performance_alerts rules: # Алерт при высокой загрузке CPU - alert: HighCPUUsage expr: rate(process_cpu_seconds_total[5m]) * 100 > 80 for: 10m labels: severity: warning annotations: summary: "High CPU usage detected" description: "CPU usage is above 80% for more than 10 minutes." # Алерт при большом количестве ошибок - alert: HighErrorRate expr: rate(lottery_bot_errors_total[5m]) > 0.1 for: 3m labels: severity: warning annotations: summary: "High error rate detected" description: "Error rate is above 0.1 errors per second for more than 3 minutes."