393 lines
16 KiB
Python
393 lines
16 KiB
Python
import calendar
|
|
from datetime import date, timedelta
|
|
from decimal import Decimal
|
|
|
|
import pandas as pd
|
|
from sqlalchemy import Select, func, or_, select
|
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
|
|
from app.models.car import Car
|
|
from app.models.expense import ExpenseCategory, ExpenseEntry, FuelEntry, ServiceEntry
|
|
from app.schemas.expense import OdometerPrediction, OwnershipStats
|
|
|
|
|
|
async def get_ownership_stats(
|
|
session: AsyncSession, car_id: int, date_from: date, date_to: date
|
|
) -> OwnershipStats:
|
|
fuel_totals = await session.execute(
|
|
select(
|
|
func.coalesce(func.sum(FuelEntry.total_cost), 0),
|
|
func.coalesce(func.sum(FuelEntry.liters), 0),
|
|
func.count(FuelEntry.id),
|
|
func.min(FuelEntry.odometer),
|
|
func.max(FuelEntry.odometer),
|
|
).where(
|
|
FuelEntry.car_id == car_id,
|
|
FuelEntry.entry_date >= date_from,
|
|
FuelEntry.entry_date <= date_to,
|
|
)
|
|
)
|
|
fuel_cost, liters, fuel_count, min_odo, max_odo = fuel_totals.one()
|
|
|
|
service_totals = await session.execute(
|
|
select(func.coalesce(func.sum(ServiceEntry.total_cost), 0), func.count(ServiceEntry.id)).where(
|
|
ServiceEntry.car_id == car_id,
|
|
ServiceEntry.entry_date >= date_from,
|
|
ServiceEntry.entry_date <= date_to,
|
|
)
|
|
)
|
|
service_cost, service_count = service_totals.one()
|
|
|
|
odometer_values = [min_odo, max_odo]
|
|
service_odo = await session.execute(
|
|
select(func.min(ServiceEntry.odometer), func.max(ServiceEntry.odometer)).where(
|
|
ServiceEntry.car_id == car_id,
|
|
ServiceEntry.odometer.is_not(None),
|
|
ServiceEntry.entry_date >= date_from,
|
|
ServiceEntry.entry_date <= date_to,
|
|
)
|
|
)
|
|
expense_odo = await session.execute(
|
|
select(func.min(ExpenseEntry.odometer), func.max(ExpenseEntry.odometer)).where(
|
|
ExpenseEntry.car_id == car_id,
|
|
ExpenseEntry.odometer.is_not(None),
|
|
ExpenseEntry.entry_date >= date_from,
|
|
ExpenseEntry.entry_date <= date_to,
|
|
)
|
|
)
|
|
odometer_values.extend(service_odo.one())
|
|
odometer_values.extend(expense_odo.one())
|
|
odometer_values = [value for value in odometer_values if value is not None]
|
|
distance_km = int(max(odometer_values) - min(odometer_values)) if len(odometer_values) >= 2 else 0
|
|
|
|
expense_cost, recurring_cost, _expense_count, expense_categories = await expense_period_totals(
|
|
session, car_id, date_from, date_to
|
|
)
|
|
car = await session.get(Car, car_id)
|
|
depreciation_cost = calculate_depreciation(car, date_from, date_to) if car else Decimal("0")
|
|
|
|
total_cost = Decimal(fuel_cost) + Decimal(service_cost) + expense_cost + depreciation_cost
|
|
avg_consumption = await full_tank_consumption(session, car_id, date_from, date_to)
|
|
cost_per_km = float(total_cost / distance_km) if distance_km else None
|
|
months = max(Decimal(period_days(date_from, date_to)) / Decimal("30.4375"), Decimal("0.033"))
|
|
cost_per_month = (total_cost / months).quantize(Decimal("0.01"))
|
|
recurring_total = (recurring_cost + depreciation_cost).quantize(Decimal("0.01"))
|
|
one_time_costs = max(total_cost - recurring_total, Decimal("0")).quantize(Decimal("0.01"))
|
|
recurring_monthly = (recurring_total / months).quantize(Decimal("0.01"))
|
|
forecast_next_month = max(cost_per_month, recurring_monthly).quantize(Decimal("0.01"))
|
|
|
|
cost_by_category = {
|
|
"fuel": Decimal(fuel_cost),
|
|
"service": Decimal(service_cost),
|
|
**expense_categories,
|
|
}
|
|
if depreciation_cost:
|
|
cost_by_category["depreciation"] = depreciation_cost
|
|
categories = [
|
|
{"category": key, "total_cost": value, "entries_count": 0}
|
|
for key, value in sorted(cost_by_category.items())
|
|
if value
|
|
]
|
|
|
|
return OwnershipStats(
|
|
car_id=car_id,
|
|
date_from=date_from,
|
|
date_to=date_to,
|
|
fuel_cost=fuel_cost,
|
|
service_cost=service_cost,
|
|
expenses_cost=expense_cost,
|
|
total_cost=total_cost,
|
|
recurring_costs=recurring_total,
|
|
one_time_costs=one_time_costs,
|
|
forecast_next_month=forecast_next_month,
|
|
depreciation_cost=depreciation_cost,
|
|
cost_per_month=cost_per_month,
|
|
cost_by_category=cost_by_category,
|
|
categories=categories,
|
|
liters=liters,
|
|
distance_km=distance_km,
|
|
avg_consumption_l_per_100km=avg_consumption,
|
|
cost_per_km=cost_per_km,
|
|
fuel_entries_count=fuel_count,
|
|
service_entries_count=service_count,
|
|
)
|
|
|
|
|
|
def period_days(date_from: date, date_to: date) -> int:
|
|
return max((date_to - date_from).days + 1, 1)
|
|
|
|
|
|
def add_months(value: date, months: int) -> date:
|
|
month = value.month - 1 + months
|
|
year = value.year + month // 12
|
|
month = month % 12 + 1
|
|
day = min(value.day, calendar.monthrange(year, month)[1])
|
|
return date(year, month, day)
|
|
|
|
|
|
def overlap_days(left_start: date, left_end: date, right_start: date, right_end: date) -> int:
|
|
start = max(left_start, right_start)
|
|
end = min(left_end, right_end)
|
|
if end < start:
|
|
return 0
|
|
return period_days(start, end)
|
|
|
|
|
|
def expense_window(entry: ExpenseEntry) -> tuple[date, date]:
|
|
if entry.period_start and entry.period_end:
|
|
return entry.period_start, entry.period_end
|
|
if entry.period_start and entry.period_months:
|
|
return entry.period_start, add_months(entry.period_start, entry.period_months) - timedelta(days=1)
|
|
if entry.period_months:
|
|
return entry.entry_date, add_months(entry.entry_date, entry.period_months) - timedelta(days=1)
|
|
return entry.entry_date, entry.entry_date
|
|
|
|
|
|
def allocated_expense_cost(entry: ExpenseEntry, date_from: date, date_to: date) -> Decimal:
|
|
start, end = expense_window(entry)
|
|
total_days = period_days(start, end)
|
|
matched_days = overlap_days(start, end, date_from, date_to)
|
|
if matched_days <= 0:
|
|
return Decimal("0")
|
|
if total_days <= 1 and start == entry.entry_date:
|
|
return Decimal(entry.total_cost)
|
|
return (Decimal(entry.total_cost) * Decimal(matched_days) / Decimal(total_days)).quantize(Decimal("0.01"))
|
|
|
|
|
|
async def expense_period_totals(
|
|
session: AsyncSession, car_id: int, date_from: date, date_to: date
|
|
) -> tuple[Decimal, Decimal, int, dict[str, Decimal]]:
|
|
result = await session.execute(
|
|
select(ExpenseEntry)
|
|
.where(
|
|
ExpenseEntry.car_id == car_id,
|
|
or_(
|
|
ExpenseEntry.entry_date.between(date_from, date_to),
|
|
ExpenseEntry.period_start.between(date_from, date_to),
|
|
ExpenseEntry.period_end.between(date_from, date_to),
|
|
(ExpenseEntry.period_start <= date_from) & (ExpenseEntry.period_end >= date_to),
|
|
),
|
|
)
|
|
.order_by(ExpenseEntry.entry_date.asc(), ExpenseEntry.id.asc())
|
|
)
|
|
total = Decimal("0")
|
|
recurring = Decimal("0")
|
|
categories: dict[str, Decimal] = {}
|
|
count = 0
|
|
for entry in result.scalars():
|
|
amount = allocated_expense_cost(entry, date_from, date_to)
|
|
if amount <= 0:
|
|
continue
|
|
count += 1
|
|
total += amount
|
|
category = entry.category.value if isinstance(entry.category, ExpenseCategory) else str(entry.category)
|
|
categories[category] = categories.get(category, Decimal("0")) + amount
|
|
if entry.is_recurring or entry.category in {ExpenseCategory.insurance, ExpenseCategory.loan_payment, ExpenseCategory.loan_interest}:
|
|
recurring += amount
|
|
return total.quantize(Decimal("0.01")), recurring.quantize(Decimal("0.01")), count, categories
|
|
|
|
|
|
def calculate_depreciation(car: Car, date_from: date, date_to: date) -> Decimal:
|
|
if not car.include_depreciation or not car.purchase_price or not car.purchase_date:
|
|
return Decimal("0")
|
|
depreciation_start = car.purchase_date
|
|
depreciation_end = add_months(car.purchase_date, 60) - timedelta(days=1)
|
|
matched_days = overlap_days(depreciation_start, depreciation_end, date_from, date_to)
|
|
if matched_days <= 0:
|
|
return Decimal("0")
|
|
daily_cost = Decimal(car.purchase_price) / Decimal(period_days(depreciation_start, depreciation_end))
|
|
return (daily_cost * Decimal(matched_days)).quantize(Decimal("0.01"))
|
|
|
|
|
|
async def full_tank_consumption(
|
|
session: AsyncSession, car_id: int, date_from: date, date_to: date
|
|
) -> float | None:
|
|
result = await session.execute(
|
|
select(FuelEntry)
|
|
.where(
|
|
FuelEntry.car_id == car_id,
|
|
FuelEntry.entry_date <= date_to,
|
|
)
|
|
.order_by(FuelEntry.entry_date.asc(), FuelEntry.odometer.asc(), FuelEntry.id.asc())
|
|
)
|
|
entries = list(result.scalars())
|
|
full_indexes = [index for index, entry in enumerate(entries) if entry.is_full_tank]
|
|
if len(full_indexes) < 2:
|
|
return None
|
|
|
|
total_liters = Decimal("0")
|
|
total_distance = 0
|
|
previous_full_index = full_indexes[0]
|
|
for current_full_index in full_indexes[1:]:
|
|
previous = entries[previous_full_index]
|
|
current = entries[current_full_index]
|
|
if current.entry_date < date_from:
|
|
previous_full_index = current_full_index
|
|
continue
|
|
distance = current.odometer - previous.odometer
|
|
if distance <= 0:
|
|
previous_full_index = current_full_index
|
|
continue
|
|
interval_liters = sum(
|
|
Decimal(entry.liters) for entry in entries[previous_full_index + 1 : current_full_index + 1]
|
|
)
|
|
if interval_liters > 0:
|
|
total_liters += interval_liters
|
|
total_distance += distance
|
|
previous_full_index = current_full_index
|
|
|
|
if total_distance <= 0 or total_liters <= 0:
|
|
return None
|
|
return float(total_liters * Decimal(100) / Decimal(total_distance))
|
|
|
|
|
|
async def dataframe_from_query(session: AsyncSession, stmt: Select) -> pd.DataFrame:
|
|
result = await session.execute(stmt)
|
|
rows = result.mappings().all()
|
|
return pd.DataFrame(rows)
|
|
|
|
|
|
async def predict_odometer(session: AsyncSession, car_id: int) -> OdometerPrediction:
|
|
price_prediction = await predict_fuel_price(session, car_id)
|
|
fuel = await dataframe_from_query(
|
|
session,
|
|
select(FuelEntry.entry_date.label("date"), FuelEntry.odometer.label("odometer")).where(
|
|
FuelEntry.car_id == car_id
|
|
),
|
|
)
|
|
service = await dataframe_from_query(
|
|
session,
|
|
select(ServiceEntry.entry_date.label("date"), ServiceEntry.odometer.label("odometer")).where(
|
|
ServiceEntry.car_id == car_id, ServiceEntry.odometer.is_not(None)
|
|
),
|
|
)
|
|
if fuel.empty and service.empty:
|
|
return OdometerPrediction(
|
|
car_id=car_id,
|
|
samples=0,
|
|
current_odometer=None,
|
|
predicted_today=None,
|
|
predicted_30_days=None,
|
|
avg_km_per_day=None,
|
|
avg_km_per_month=None,
|
|
**price_prediction,
|
|
confidence=0,
|
|
insight="Недостаточно данных: добавь одометр в заправках или сервисных записях.",
|
|
)
|
|
|
|
df = pd.concat([fuel, service]).dropna().drop_duplicates().sort_values("date")
|
|
df["date"] = pd.to_datetime(df["date"])
|
|
df = df[df["odometer"] >= 0]
|
|
df = df.sort_values(["date", "odometer"]).drop_duplicates(subset=["date"], keep="last")
|
|
df = df[df["odometer"].diff().fillna(0) >= 0]
|
|
if len(df) < 2:
|
|
current = int(df.iloc[-1]["odometer"])
|
|
return OdometerPrediction(
|
|
car_id=car_id,
|
|
samples=len(df),
|
|
current_odometer=current,
|
|
predicted_today=current,
|
|
predicted_30_days=None,
|
|
avg_km_per_day=None,
|
|
avg_km_per_month=None,
|
|
**price_prediction,
|
|
confidence=0.2,
|
|
insight="Есть только одна точка пробега. Для прогноза нужны минимум две записи.",
|
|
)
|
|
|
|
last = df.iloc[-1]
|
|
df["days_delta"] = df["date"].diff().dt.days
|
|
df["km_delta"] = df["odometer"].diff()
|
|
intervals = df[(df["days_delta"] > 0) & (df["km_delta"] >= 0)].copy()
|
|
intervals["km_per_day"] = intervals["km_delta"] / intervals["days_delta"]
|
|
intervals = intervals[(intervals["km_per_day"] >= 0) & (intervals["km_per_day"] <= 500)]
|
|
if intervals.empty:
|
|
km_per_day = 0
|
|
else:
|
|
recent = intervals.tail(6).copy()
|
|
recent["weight"] = range(1, len(recent) + 1)
|
|
weighted = (recent["km_per_day"] * recent["weight"]).sum() / recent["weight"].sum()
|
|
median = recent["km_per_day"].median()
|
|
km_per_day = float((weighted * 0.7) + (median * 0.3))
|
|
today = pd.Timestamp.utcnow().tz_localize(None).normalize()
|
|
days_since_last = max((today - last["date"]).days, 0)
|
|
predicted_today = int(last["odometer"] + km_per_day * days_since_last)
|
|
predicted_30 = int(predicted_today + km_per_day * 30)
|
|
span_days = max((last["date"] - df.iloc[0]["date"]).days, 1)
|
|
interval_count = len(intervals)
|
|
variability = 0 if interval_count < 3 or km_per_day == 0 else min(
|
|
float(intervals["km_per_day"].std() / max(km_per_day, 1)),
|
|
1,
|
|
)
|
|
confidence = min(
|
|
0.95,
|
|
max(0.25, 0.3 + interval_count * 0.055 + min(span_days, 365) / 900 - variability * 0.18),
|
|
)
|
|
insight = (
|
|
"Пробег стабилен, прогноз надежный."
|
|
if confidence >= 0.75
|
|
else "Прогноз предварительный: точность вырастет после регулярных записей одометра."
|
|
)
|
|
return OdometerPrediction(
|
|
car_id=car_id,
|
|
samples=len(df),
|
|
current_odometer=int(last["odometer"]),
|
|
predicted_today=predicted_today,
|
|
predicted_30_days=predicted_30,
|
|
avg_km_per_day=round(km_per_day, 1),
|
|
avg_km_per_month=round(km_per_day * 30.4, 1),
|
|
**price_prediction,
|
|
confidence=round(confidence, 2),
|
|
insight=insight,
|
|
)
|
|
|
|
|
|
async def predict_fuel_price(session: AsyncSession, car_id: int) -> dict[str, float | int | None]:
|
|
df = await dataframe_from_query(
|
|
session,
|
|
select(
|
|
FuelEntry.entry_date.label("date"),
|
|
FuelEntry.price_per_liter.label("price"),
|
|
).where(FuelEntry.car_id == car_id),
|
|
)
|
|
empty = {
|
|
"current_price_per_liter": None,
|
|
"predicted_price_per_liter_30_days": None,
|
|
"avg_price_per_liter": None,
|
|
"price_samples": 0,
|
|
"price_confidence": 0,
|
|
}
|
|
if df.empty:
|
|
return empty
|
|
|
|
df = df.dropna().copy()
|
|
if df.empty:
|
|
return empty
|
|
df["date"] = pd.to_datetime(df["date"])
|
|
df["price"] = pd.to_numeric(df["price"], errors="coerce")
|
|
df = df[(df["price"] > 0) & (df["price"] < 10000)].sort_values("date")
|
|
if df.empty:
|
|
return empty
|
|
|
|
recent = df.tail(8).copy()
|
|
current = float(recent.iloc[-1]["price"])
|
|
avg = float(recent["price"].mean())
|
|
predicted = current
|
|
confidence = min(0.72, 0.22 + len(recent) * 0.055)
|
|
|
|
if len(recent) >= 2:
|
|
span_days = max((recent.iloc[-1]["date"] - recent.iloc[0]["date"]).days, 1)
|
|
change_per_day = float((recent.iloc[-1]["price"] - recent.iloc[0]["price"]) / span_days)
|
|
predicted = current + change_per_day * 30
|
|
predicted = (predicted * 0.65) + (avg * 0.35)
|
|
volatility = float(recent["price"].std() / max(avg, 1)) if len(recent) >= 3 else 0
|
|
confidence = min(0.9, max(0.3, confidence + min(span_days, 180) / 600 - volatility))
|
|
|
|
return {
|
|
"current_price_per_liter": round(current, 2),
|
|
"predicted_price_per_liter_30_days": round(max(predicted, 0), 2),
|
|
"avg_price_per_liter": round(avg, 2),
|
|
"price_samples": int(len(df)),
|
|
"price_confidence": round(confidence, 2),
|
|
}
|