refactor
This commit is contained in:
67
monitoring/alerts.yml
Normal file
67
monitoring/alerts.yml
Normal file
@@ -0,0 +1,67 @@
|
||||
# Правила алертов для мониторинга Lottery Bot
|
||||
groups:
|
||||
- name: lottery_bot_alerts
|
||||
rules:
|
||||
# Алерт при падении сервиса
|
||||
- alert: LotteryBotDown
|
||||
expr: up{job="lottery-bot"} == 0
|
||||
for: 1m
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
summary: "Lottery Bot is down"
|
||||
description: "Lottery Bot has been down for more than 1 minute."
|
||||
|
||||
# Алерт при высокой нагрузке на память
|
||||
- alert: HighMemoryUsage
|
||||
expr: (process_resident_memory_bytes / process_virtual_memory_max_bytes) > 0.9
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "High memory usage detected"
|
||||
description: "Memory usage is above 90% for more than 5 minutes."
|
||||
|
||||
- name: database_alerts
|
||||
rules:
|
||||
# Алерт при недоступности PostgreSQL
|
||||
- alert: PostgreSQLDown
|
||||
expr: up{job="postgres"} == 0
|
||||
for: 2m
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
summary: "PostgreSQL is down"
|
||||
description: "PostgreSQL database has been down for more than 2 minutes."
|
||||
|
||||
# Алерт при недоступности Redis
|
||||
- alert: RedisDown
|
||||
expr: up{job="redis"} == 0
|
||||
for: 2m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "Redis is down"
|
||||
description: "Redis cache has been down for more than 2 minutes."
|
||||
|
||||
- name: performance_alerts
|
||||
rules:
|
||||
# Алерт при высокой загрузке CPU
|
||||
- alert: HighCPUUsage
|
||||
expr: rate(process_cpu_seconds_total[5m]) * 100 > 80
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "High CPU usage detected"
|
||||
description: "CPU usage is above 80% for more than 10 minutes."
|
||||
|
||||
# Алерт при большом количестве ошибок
|
||||
- alert: HighErrorRate
|
||||
expr: rate(lottery_bot_errors_total[5m]) > 0.1
|
||||
for: 3m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "High error rate detected"
|
||||
description: "Error rate is above 0.1 errors per second for more than 3 minutes."
|
||||
48
monitoring/prometheus.yml
Normal file
48
monitoring/prometheus.yml
Normal file
@@ -0,0 +1,48 @@
|
||||
# Prometheus configuration для мониторинга Lottery Bot
|
||||
global:
|
||||
scrape_interval: 15s
|
||||
evaluation_interval: 15s
|
||||
|
||||
# Правила алертов
|
||||
rule_files:
|
||||
- "alerts.yml"
|
||||
|
||||
# Настройки Alertmanager
|
||||
alerting:
|
||||
alertmanagers:
|
||||
- static_configs:
|
||||
- targets:
|
||||
# - alertmanager:9093
|
||||
|
||||
# Targets для мониторинга
|
||||
scrape_configs:
|
||||
# Prometheus self-monitoring
|
||||
- job_name: 'prometheus'
|
||||
static_configs:
|
||||
- targets: ['localhost:9090']
|
||||
scrape_interval: 5s
|
||||
|
||||
# Мониторинг Lottery Bot (если добавите метрики)
|
||||
- job_name: 'lottery-bot'
|
||||
static_configs:
|
||||
- targets: ['lottery-bot:8000']
|
||||
scrape_interval: 10s
|
||||
metrics_path: /metrics
|
||||
|
||||
# Мониторинг PostgreSQL
|
||||
- job_name: 'postgres'
|
||||
static_configs:
|
||||
- targets: ['postgres:5432']
|
||||
scrape_interval: 30s
|
||||
|
||||
# Мониторинг Redis
|
||||
- job_name: 'redis'
|
||||
static_configs:
|
||||
- targets: ['redis:6379']
|
||||
scrape_interval: 30s
|
||||
|
||||
# Node exporter (если добавите)
|
||||
- job_name: 'node-exporter'
|
||||
static_configs:
|
||||
- targets: ['node-exporter:9100']
|
||||
scrape_interval: 15s
|
||||
Reference in New Issue
Block a user