Files
drivers_bot/app/services/calculations.py
2026-05-14 19:33:25 +09:00

393 lines
16 KiB
Python

import calendar
from datetime import date, timedelta
from decimal import Decimal
import pandas as pd
from sqlalchemy import Select, func, or_, select
from sqlalchemy.ext.asyncio import AsyncSession
from app.models.car import Car
from app.models.expense import ExpenseCategory, ExpenseEntry, FuelEntry, ServiceEntry
from app.schemas.expense import OdometerPrediction, OwnershipStats
async def get_ownership_stats(
session: AsyncSession, car_id: int, date_from: date, date_to: date
) -> OwnershipStats:
fuel_totals = await session.execute(
select(
func.coalesce(func.sum(FuelEntry.total_cost), 0),
func.coalesce(func.sum(FuelEntry.liters), 0),
func.count(FuelEntry.id),
func.min(FuelEntry.odometer),
func.max(FuelEntry.odometer),
).where(
FuelEntry.car_id == car_id,
FuelEntry.entry_date >= date_from,
FuelEntry.entry_date <= date_to,
)
)
fuel_cost, liters, fuel_count, min_odo, max_odo = fuel_totals.one()
service_totals = await session.execute(
select(func.coalesce(func.sum(ServiceEntry.total_cost), 0), func.count(ServiceEntry.id)).where(
ServiceEntry.car_id == car_id,
ServiceEntry.entry_date >= date_from,
ServiceEntry.entry_date <= date_to,
)
)
service_cost, service_count = service_totals.one()
odometer_values = [min_odo, max_odo]
service_odo = await session.execute(
select(func.min(ServiceEntry.odometer), func.max(ServiceEntry.odometer)).where(
ServiceEntry.car_id == car_id,
ServiceEntry.odometer.is_not(None),
ServiceEntry.entry_date >= date_from,
ServiceEntry.entry_date <= date_to,
)
)
expense_odo = await session.execute(
select(func.min(ExpenseEntry.odometer), func.max(ExpenseEntry.odometer)).where(
ExpenseEntry.car_id == car_id,
ExpenseEntry.odometer.is_not(None),
ExpenseEntry.entry_date >= date_from,
ExpenseEntry.entry_date <= date_to,
)
)
odometer_values.extend(service_odo.one())
odometer_values.extend(expense_odo.one())
odometer_values = [value for value in odometer_values if value is not None]
distance_km = int(max(odometer_values) - min(odometer_values)) if len(odometer_values) >= 2 else 0
expense_cost, recurring_cost, _expense_count, expense_categories = await expense_period_totals(
session, car_id, date_from, date_to
)
car = await session.get(Car, car_id)
depreciation_cost = calculate_depreciation(car, date_from, date_to) if car else Decimal("0")
total_cost = Decimal(fuel_cost) + Decimal(service_cost) + expense_cost + depreciation_cost
avg_consumption = await full_tank_consumption(session, car_id, date_from, date_to)
cost_per_km = float(total_cost / distance_km) if distance_km else None
months = max(Decimal(period_days(date_from, date_to)) / Decimal("30.4375"), Decimal("0.033"))
cost_per_month = (total_cost / months).quantize(Decimal("0.01"))
recurring_total = (recurring_cost + depreciation_cost).quantize(Decimal("0.01"))
one_time_costs = max(total_cost - recurring_total, Decimal("0")).quantize(Decimal("0.01"))
recurring_monthly = (recurring_total / months).quantize(Decimal("0.01"))
forecast_next_month = max(cost_per_month, recurring_monthly).quantize(Decimal("0.01"))
cost_by_category = {
"fuel": Decimal(fuel_cost),
"service": Decimal(service_cost),
**expense_categories,
}
if depreciation_cost:
cost_by_category["depreciation"] = depreciation_cost
categories = [
{"category": key, "total_cost": value, "entries_count": 0}
for key, value in sorted(cost_by_category.items())
if value
]
return OwnershipStats(
car_id=car_id,
date_from=date_from,
date_to=date_to,
fuel_cost=fuel_cost,
service_cost=service_cost,
expenses_cost=expense_cost,
total_cost=total_cost,
recurring_costs=recurring_total,
one_time_costs=one_time_costs,
forecast_next_month=forecast_next_month,
depreciation_cost=depreciation_cost,
cost_per_month=cost_per_month,
cost_by_category=cost_by_category,
categories=categories,
liters=liters,
distance_km=distance_km,
avg_consumption_l_per_100km=avg_consumption,
cost_per_km=cost_per_km,
fuel_entries_count=fuel_count,
service_entries_count=service_count,
)
def period_days(date_from: date, date_to: date) -> int:
return max((date_to - date_from).days + 1, 1)
def add_months(value: date, months: int) -> date:
month = value.month - 1 + months
year = value.year + month // 12
month = month % 12 + 1
day = min(value.day, calendar.monthrange(year, month)[1])
return date(year, month, day)
def overlap_days(left_start: date, left_end: date, right_start: date, right_end: date) -> int:
start = max(left_start, right_start)
end = min(left_end, right_end)
if end < start:
return 0
return period_days(start, end)
def expense_window(entry: ExpenseEntry) -> tuple[date, date]:
if entry.period_start and entry.period_end:
return entry.period_start, entry.period_end
if entry.period_start and entry.period_months:
return entry.period_start, add_months(entry.period_start, entry.period_months) - timedelta(days=1)
if entry.period_months:
return entry.entry_date, add_months(entry.entry_date, entry.period_months) - timedelta(days=1)
return entry.entry_date, entry.entry_date
def allocated_expense_cost(entry: ExpenseEntry, date_from: date, date_to: date) -> Decimal:
start, end = expense_window(entry)
total_days = period_days(start, end)
matched_days = overlap_days(start, end, date_from, date_to)
if matched_days <= 0:
return Decimal("0")
if total_days <= 1 and start == entry.entry_date:
return Decimal(entry.total_cost)
return (Decimal(entry.total_cost) * Decimal(matched_days) / Decimal(total_days)).quantize(Decimal("0.01"))
async def expense_period_totals(
session: AsyncSession, car_id: int, date_from: date, date_to: date
) -> tuple[Decimal, Decimal, int, dict[str, Decimal]]:
result = await session.execute(
select(ExpenseEntry)
.where(
ExpenseEntry.car_id == car_id,
or_(
ExpenseEntry.entry_date.between(date_from, date_to),
ExpenseEntry.period_start.between(date_from, date_to),
ExpenseEntry.period_end.between(date_from, date_to),
(ExpenseEntry.period_start <= date_from) & (ExpenseEntry.period_end >= date_to),
),
)
.order_by(ExpenseEntry.entry_date.asc(), ExpenseEntry.id.asc())
)
total = Decimal("0")
recurring = Decimal("0")
categories: dict[str, Decimal] = {}
count = 0
for entry in result.scalars():
amount = allocated_expense_cost(entry, date_from, date_to)
if amount <= 0:
continue
count += 1
total += amount
category = entry.category.value if isinstance(entry.category, ExpenseCategory) else str(entry.category)
categories[category] = categories.get(category, Decimal("0")) + amount
if entry.is_recurring or entry.category in {ExpenseCategory.insurance, ExpenseCategory.loan_payment, ExpenseCategory.loan_interest}:
recurring += amount
return total.quantize(Decimal("0.01")), recurring.quantize(Decimal("0.01")), count, categories
def calculate_depreciation(car: Car, date_from: date, date_to: date) -> Decimal:
if not car.include_depreciation or not car.purchase_price or not car.purchase_date:
return Decimal("0")
depreciation_start = car.purchase_date
depreciation_end = add_months(car.purchase_date, 60) - timedelta(days=1)
matched_days = overlap_days(depreciation_start, depreciation_end, date_from, date_to)
if matched_days <= 0:
return Decimal("0")
daily_cost = Decimal(car.purchase_price) / Decimal(period_days(depreciation_start, depreciation_end))
return (daily_cost * Decimal(matched_days)).quantize(Decimal("0.01"))
async def full_tank_consumption(
session: AsyncSession, car_id: int, date_from: date, date_to: date
) -> float | None:
result = await session.execute(
select(FuelEntry)
.where(
FuelEntry.car_id == car_id,
FuelEntry.entry_date <= date_to,
)
.order_by(FuelEntry.entry_date.asc(), FuelEntry.odometer.asc(), FuelEntry.id.asc())
)
entries = list(result.scalars())
full_indexes = [index for index, entry in enumerate(entries) if entry.is_full_tank]
if len(full_indexes) < 2:
return None
total_liters = Decimal("0")
total_distance = 0
previous_full_index = full_indexes[0]
for current_full_index in full_indexes[1:]:
previous = entries[previous_full_index]
current = entries[current_full_index]
if current.entry_date < date_from:
previous_full_index = current_full_index
continue
distance = current.odometer - previous.odometer
if distance <= 0:
previous_full_index = current_full_index
continue
interval_liters = sum(
Decimal(entry.liters) for entry in entries[previous_full_index + 1 : current_full_index + 1]
)
if interval_liters > 0:
total_liters += interval_liters
total_distance += distance
previous_full_index = current_full_index
if total_distance <= 0 or total_liters <= 0:
return None
return float(total_liters * Decimal(100) / Decimal(total_distance))
async def dataframe_from_query(session: AsyncSession, stmt: Select) -> pd.DataFrame:
result = await session.execute(stmt)
rows = result.mappings().all()
return pd.DataFrame(rows)
async def predict_odometer(session: AsyncSession, car_id: int) -> OdometerPrediction:
price_prediction = await predict_fuel_price(session, car_id)
fuel = await dataframe_from_query(
session,
select(FuelEntry.entry_date.label("date"), FuelEntry.odometer.label("odometer")).where(
FuelEntry.car_id == car_id
),
)
service = await dataframe_from_query(
session,
select(ServiceEntry.entry_date.label("date"), ServiceEntry.odometer.label("odometer")).where(
ServiceEntry.car_id == car_id, ServiceEntry.odometer.is_not(None)
),
)
if fuel.empty and service.empty:
return OdometerPrediction(
car_id=car_id,
samples=0,
current_odometer=None,
predicted_today=None,
predicted_30_days=None,
avg_km_per_day=None,
avg_km_per_month=None,
**price_prediction,
confidence=0,
insight="Недостаточно данных: добавь одометр в заправках или сервисных записях.",
)
df = pd.concat([fuel, service]).dropna().drop_duplicates().sort_values("date")
df["date"] = pd.to_datetime(df["date"])
df = df[df["odometer"] >= 0]
df = df.sort_values(["date", "odometer"]).drop_duplicates(subset=["date"], keep="last")
df = df[df["odometer"].diff().fillna(0) >= 0]
if len(df) < 2:
current = int(df.iloc[-1]["odometer"])
return OdometerPrediction(
car_id=car_id,
samples=len(df),
current_odometer=current,
predicted_today=current,
predicted_30_days=None,
avg_km_per_day=None,
avg_km_per_month=None,
**price_prediction,
confidence=0.2,
insight="Есть только одна точка пробега. Для прогноза нужны минимум две записи.",
)
last = df.iloc[-1]
df["days_delta"] = df["date"].diff().dt.days
df["km_delta"] = df["odometer"].diff()
intervals = df[(df["days_delta"] > 0) & (df["km_delta"] >= 0)].copy()
intervals["km_per_day"] = intervals["km_delta"] / intervals["days_delta"]
intervals = intervals[(intervals["km_per_day"] >= 0) & (intervals["km_per_day"] <= 500)]
if intervals.empty:
km_per_day = 0
else:
recent = intervals.tail(6).copy()
recent["weight"] = range(1, len(recent) + 1)
weighted = (recent["km_per_day"] * recent["weight"]).sum() / recent["weight"].sum()
median = recent["km_per_day"].median()
km_per_day = float((weighted * 0.7) + (median * 0.3))
today = pd.Timestamp.utcnow().tz_localize(None).normalize()
days_since_last = max((today - last["date"]).days, 0)
predicted_today = int(last["odometer"] + km_per_day * days_since_last)
predicted_30 = int(predicted_today + km_per_day * 30)
span_days = max((last["date"] - df.iloc[0]["date"]).days, 1)
interval_count = len(intervals)
variability = 0 if interval_count < 3 or km_per_day == 0 else min(
float(intervals["km_per_day"].std() / max(km_per_day, 1)),
1,
)
confidence = min(
0.95,
max(0.25, 0.3 + interval_count * 0.055 + min(span_days, 365) / 900 - variability * 0.18),
)
insight = (
"Пробег стабилен, прогноз надежный."
if confidence >= 0.75
else "Прогноз предварительный: точность вырастет после регулярных записей одометра."
)
return OdometerPrediction(
car_id=car_id,
samples=len(df),
current_odometer=int(last["odometer"]),
predicted_today=predicted_today,
predicted_30_days=predicted_30,
avg_km_per_day=round(km_per_day, 1),
avg_km_per_month=round(km_per_day * 30.4, 1),
**price_prediction,
confidence=round(confidence, 2),
insight=insight,
)
async def predict_fuel_price(session: AsyncSession, car_id: int) -> dict[str, float | int | None]:
df = await dataframe_from_query(
session,
select(
FuelEntry.entry_date.label("date"),
FuelEntry.price_per_liter.label("price"),
).where(FuelEntry.car_id == car_id),
)
empty = {
"current_price_per_liter": None,
"predicted_price_per_liter_30_days": None,
"avg_price_per_liter": None,
"price_samples": 0,
"price_confidence": 0,
}
if df.empty:
return empty
df = df.dropna().copy()
if df.empty:
return empty
df["date"] = pd.to_datetime(df["date"])
df["price"] = pd.to_numeric(df["price"], errors="coerce")
df = df[(df["price"] > 0) & (df["price"] < 10000)].sort_values("date")
if df.empty:
return empty
recent = df.tail(8).copy()
current = float(recent.iloc[-1]["price"])
avg = float(recent["price"].mean())
predicted = current
confidence = min(0.72, 0.22 + len(recent) * 0.055)
if len(recent) >= 2:
span_days = max((recent.iloc[-1]["date"] - recent.iloc[0]["date"]).days, 1)
change_per_day = float((recent.iloc[-1]["price"] - recent.iloc[0]["price"]) / span_days)
predicted = current + change_per_day * 30
predicted = (predicted * 0.65) + (avg * 0.35)
volatility = float(recent["price"].std() / max(avg, 1)) if len(recent) >= 3 else 0
confidence = min(0.9, max(0.3, confidence + min(span_days, 180) / 600 - volatility))
return {
"current_price_per_liter": round(current, 2),
"predicted_price_per_liter_30_days": round(max(predicted, 0), 2),
"avg_price_per_liter": round(avg, 2),
"price_samples": int(len(df)),
"price_confidence": round(confidence, 2),
}