This commit is contained in:
2025-11-16 12:36:02 +09:00
parent 3a25e6a4cb
commit eb3f3807fd
61 changed files with 1438 additions and 1139 deletions

67
monitoring/alerts.yml Normal file
View File

@@ -0,0 +1,67 @@
# Правила алертов для мониторинга Lottery Bot
groups:
- name: lottery_bot_alerts
rules:
# Алерт при падении сервиса
- alert: LotteryBotDown
expr: up{job="lottery-bot"} == 0
for: 1m
labels:
severity: critical
annotations:
summary: "Lottery Bot is down"
description: "Lottery Bot has been down for more than 1 minute."
# Алерт при высокой нагрузке на память
- alert: HighMemoryUsage
expr: (process_resident_memory_bytes / process_virtual_memory_max_bytes) > 0.9
for: 5m
labels:
severity: warning
annotations:
summary: "High memory usage detected"
description: "Memory usage is above 90% for more than 5 minutes."
- name: database_alerts
rules:
# Алерт при недоступности PostgreSQL
- alert: PostgreSQLDown
expr: up{job="postgres"} == 0
for: 2m
labels:
severity: critical
annotations:
summary: "PostgreSQL is down"
description: "PostgreSQL database has been down for more than 2 minutes."
# Алерт при недоступности Redis
- alert: RedisDown
expr: up{job="redis"} == 0
for: 2m
labels:
severity: warning
annotations:
summary: "Redis is down"
description: "Redis cache has been down for more than 2 minutes."
- name: performance_alerts
rules:
# Алерт при высокой загрузке CPU
- alert: HighCPUUsage
expr: rate(process_cpu_seconds_total[5m]) * 100 > 80
for: 10m
labels:
severity: warning
annotations:
summary: "High CPU usage detected"
description: "CPU usage is above 80% for more than 10 minutes."
# Алерт при большом количестве ошибок
- alert: HighErrorRate
expr: rate(lottery_bot_errors_total[5m]) > 0.1
for: 3m
labels:
severity: warning
annotations:
summary: "High error rate detected"
description: "Error rate is above 0.1 errors per second for more than 3 minutes."

48
monitoring/prometheus.yml Normal file
View File

@@ -0,0 +1,48 @@
# Prometheus configuration для мониторинга Lottery Bot
global:
scrape_interval: 15s
evaluation_interval: 15s
# Правила алертов
rule_files:
- "alerts.yml"
# Настройки Alertmanager
alerting:
alertmanagers:
- static_configs:
- targets:
# - alertmanager:9093
# Targets для мониторинга
scrape_configs:
# Prometheus self-monitoring
- job_name: 'prometheus'
static_configs:
- targets: ['localhost:9090']
scrape_interval: 5s
# Мониторинг Lottery Bot (если добавите метрики)
- job_name: 'lottery-bot'
static_configs:
- targets: ['lottery-bot:8000']
scrape_interval: 10s
metrics_path: /metrics
# Мониторинг PostgreSQL
- job_name: 'postgres'
static_configs:
- targets: ['postgres:5432']
scrape_interval: 30s
# Мониторинг Redis
- job_name: 'redis'
static_configs:
- targets: ['redis:6379']
scrape_interval: 30s
# Node exporter (если добавите)
- job_name: 'node-exporter'
static_configs:
- targets: ['node-exporter:9100']
scrape_interval: 15s