Complete CarPass product flows
This commit is contained in:
193
app/services/record_parser.py
Normal file
193
app/services/record_parser.py
Normal file
@@ -0,0 +1,193 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from decimal import Decimal
|
||||
from typing import Any
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from app.services.vehicle_identity import normalize_license_plate, validate_vin
|
||||
|
||||
FULL_TANK_RE = re.compile(r"(до\s+полного|полный\s+бак|залил\s+полный|full\s+tank)", re.I)
|
||||
NUMBER_RE = re.compile(r"(\d+(?:[.,]\d+)?)")
|
||||
|
||||
|
||||
class ParsedRecord(BaseModel):
|
||||
event_type: str
|
||||
confidence: float = Field(ge=0, le=1)
|
||||
missing_fields: list[str] = Field(default_factory=list)
|
||||
warnings: list[str] = Field(default_factory=list)
|
||||
data: dict[str, Any] = Field(default_factory=dict)
|
||||
|
||||
|
||||
def decimal_from_match(value: str | None) -> Decimal | None:
|
||||
if not value:
|
||||
return None
|
||||
return Decimal(value.replace(",", "."))
|
||||
|
||||
|
||||
def parse_record_text(text: str) -> ParsedRecord:
|
||||
source = " ".join(text.strip().split())
|
||||
lower = source.lower()
|
||||
if not source:
|
||||
return ParsedRecord(event_type="unknown", confidence=0, missing_fields=["text"])
|
||||
|
||||
vin = extract_vin(source)
|
||||
plate = extract_license_plate(source)
|
||||
|
||||
if any(word in lower for word in ("купил", "покупка", "кредит", "loan", "lease")):
|
||||
return parse_purchase(source, vin, plate)
|
||||
if any(word in lower for word in ("заправ", "литр", "л ", "full tank", "бак")):
|
||||
return parse_fuel(source, vin, plate)
|
||||
if any(word in lower for word in ("страхов", "полис", "osago", "каско")):
|
||||
return parse_expense(source, "insurance", vin, plate)
|
||||
if any(word in lower for word in ("штраф", "fine")):
|
||||
return parse_expense(source, "fine", vin, plate)
|
||||
if any(word in lower for word in ("налог", "tax")):
|
||||
return parse_expense(source, "tax", vin, plate)
|
||||
if any(word in lower for word in ("то", "сервис", "ремонт", "масл", "diagnostics", "repair")):
|
||||
return parse_service(source, vin, plate)
|
||||
|
||||
return ParsedRecord(
|
||||
event_type="unknown",
|
||||
confidence=0.2,
|
||||
warnings=["Не удалось надежно определить тип записи. Откройте ручной ввод."],
|
||||
data=identity_payload(vin, plate),
|
||||
)
|
||||
|
||||
|
||||
def parse_fuel(source: str, vin: str | None, plate: str | None) -> ParsedRecord:
|
||||
liters = find_decimal(r"(\d+(?:[.,]\d+)?)\s*(?:л|литр|liter|l)\b", source)
|
||||
amount = find_decimal(r"(?:на|сумма|total|amount)\s*(\d+(?:[.,]\d+)?)", source)
|
||||
if amount is None:
|
||||
amount = largest_money_like_number(source, exclude={liters})
|
||||
odometer = find_int(r"(?:пробег|одометр|odo|km|км)\s*(\d{2,7})", source)
|
||||
price_per_liter = None
|
||||
if liters and amount:
|
||||
price_per_liter = (amount / liters).quantize(Decimal("0.01"))
|
||||
missing = []
|
||||
if liters is None:
|
||||
missing.append("fuel_liters")
|
||||
if amount is None:
|
||||
missing.append("amount")
|
||||
if odometer is None:
|
||||
missing.append("odometer_km")
|
||||
return ParsedRecord(
|
||||
event_type="fuel",
|
||||
confidence=0.9 if not missing else 0.55,
|
||||
missing_fields=missing,
|
||||
data={
|
||||
**identity_payload(vin, plate),
|
||||
"is_full_tank": bool(FULL_TANK_RE.search(source)),
|
||||
"fuel_liters": float(liters) if liters is not None else None,
|
||||
"amount": float(amount) if amount is not None else None,
|
||||
"price_per_liter": float(price_per_liter) if price_per_liter is not None else None,
|
||||
"odometer_km": odometer,
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
def parse_purchase(source: str, vin: str | None, plate: str | None) -> ParsedRecord:
|
||||
purchase_price = find_decimal(r"(?:за|стоимость|цена)\s*(\d+(?:[.,]\d+)?)", source)
|
||||
loan_principal = find_decimal(r"(?:кредит|loan)\s*(\d+(?:[.,]\d+)?)", source)
|
||||
term = find_int(r"(?:на|срок)\s*(\d{1,3})\s*(?:мес|месяц|months)", source)
|
||||
rate = find_decimal(r"(?:под|ставк[аи]|rate)\s*(\d+(?:[.,]\d+)?)\s*%?", source)
|
||||
currency = detect_currency(source)
|
||||
missing = []
|
||||
if purchase_price is None:
|
||||
missing.append("purchase_price")
|
||||
return ParsedRecord(
|
||||
event_type="vehicle_purchase",
|
||||
confidence=0.86 if purchase_price is not None else 0.45,
|
||||
missing_fields=missing,
|
||||
data={
|
||||
**identity_payload(vin, plate),
|
||||
"purchase_price": float(purchase_price) if purchase_price is not None else None,
|
||||
"purchase_currency": currency,
|
||||
"purchase_type": "credit" if loan_principal else "cash",
|
||||
"loan_principal": float(loan_principal) if loan_principal is not None else None,
|
||||
"loan_term_months": term,
|
||||
"annual_interest_rate": float(rate) if rate is not None else None,
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
def parse_expense(source: str, category: str, vin: str | None, plate: str | None) -> ParsedRecord:
|
||||
amount = find_decimal(r"(?:на|сумма|оплатил|total|amount)\s*(\d+(?:[.,]\d+)?)", source) or largest_money_like_number(source)
|
||||
return ParsedRecord(
|
||||
event_type=category,
|
||||
confidence=0.75 if amount is not None else 0.5,
|
||||
missing_fields=[] if amount is not None else ["amount"],
|
||||
data={
|
||||
**identity_payload(vin, plate),
|
||||
"category": category,
|
||||
"amount": float(amount) if amount is not None else None,
|
||||
"currency": detect_currency(source),
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
def parse_service(source: str, vin: str | None, plate: str | None) -> ParsedRecord:
|
||||
amount = find_decimal(r"(?:на|сумма|стоимость|total|amount)\s*(\d+(?:[.,]\d+)?)", source)
|
||||
odometer = find_int(r"(?:пробег|одометр|odo|km|км)\s*(\d{2,7})", source)
|
||||
title = "Замена масла" if re.search(r"масл", source, re.I) else "Сервисная запись"
|
||||
return ParsedRecord(
|
||||
event_type="service",
|
||||
confidence=0.72,
|
||||
missing_fields=[] if odometer is not None else ["odometer_km"],
|
||||
data={
|
||||
**identity_payload(vin, plate),
|
||||
"title": title,
|
||||
"amount": float(amount) if amount is not None else 0,
|
||||
"odometer_km": odometer,
|
||||
"service_type": "maintenance" if title == "Замена масла" else "repair",
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
def identity_payload(vin: str | None, plate: str | None) -> dict[str, str | None]:
|
||||
return {"vin": vin, "license_plate": plate}
|
||||
|
||||
|
||||
def extract_vin(source: str) -> str | None:
|
||||
for candidate in re.findall(r"[A-HJ-NPR-Z0-9][A-HJ-NPR-Z0-9\s-]{15,25}[A-HJ-NPR-Z0-9]", source.upper()):
|
||||
try:
|
||||
return validate_vin(candidate)
|
||||
except ValueError:
|
||||
continue
|
||||
return None
|
||||
|
||||
|
||||
def extract_license_plate(source: str) -> str | None:
|
||||
match = re.search(r"(?:номер|госномер|plate)\s*[:#]?\s*([A-ZА-Я0-9가-힣\-\s]{4,14})", source, re.I)
|
||||
return normalize_license_plate(match.group(1)) if match else None
|
||||
|
||||
|
||||
def find_decimal(pattern: str, source: str) -> Decimal | None:
|
||||
match = re.search(pattern, source, re.I)
|
||||
return decimal_from_match(match.group(1)) if match else None
|
||||
|
||||
|
||||
def find_int(pattern: str, source: str) -> int | None:
|
||||
match = re.search(pattern, source, re.I)
|
||||
return int(match.group(1)) if match else None
|
||||
|
||||
|
||||
def largest_money_like_number(source: str, exclude: set[Decimal | None] | None = None) -> Decimal | None:
|
||||
excluded = {item for item in (exclude or set()) if item is not None}
|
||||
values = [decimal_from_match(match.group(1)) for match in NUMBER_RE.finditer(source)]
|
||||
candidates = [value for value in values if value is not None and value not in excluded]
|
||||
if not candidates:
|
||||
return None
|
||||
return max(candidates)
|
||||
|
||||
|
||||
def detect_currency(source: str) -> str:
|
||||
lower = source.lower()
|
||||
if "вон" in lower or "krw" in lower or "₩" in lower:
|
||||
return "KRW"
|
||||
if "usd" in lower or "$" in lower:
|
||||
return "USD"
|
||||
if "eur" in lower or "€" in lower:
|
||||
return "EUR"
|
||||
return "RUB"
|
||||
Reference in New Issue
Block a user