import re from decimal import Decimal from fastapi import APIRouter, File, UploadFile from pydantic import BaseModel router = APIRouter(prefix="/ocr", tags=["ocr"]) class ReceiptSuggestion(BaseModel): total_cost: Decimal | None = None liters: Decimal | None = None price_per_liter: Decimal | None = None station: str | None = None confidence: float message: str @router.post("/fuel-receipt", response_model=ReceiptSuggestion) async def scan_fuel_receipt(file: UploadFile = File(...)) -> ReceiptSuggestion: content = await file.read() text = " ".join( [ file.filename or "", content.decode("utf-8", errors="ignore"), ] ) normalized = text.replace("\xa0", " ").replace(",", ".") compact = re.sub(r"\s+", " ", normalized).strip() numbers = [Decimal(item) for item in re.findall(r"\d+(?:\.\d+)?", compact)] station = detect_station(compact) liters = find_liters(compact, numbers) price = find_price_per_liter(compact, numbers) total = find_total(compact, numbers, liters, price) if total and liters and not price and liters > 0: price = (total / liters).quantize(Decimal("0.01")) if liters and price and not total: total = (liters * price).quantize(Decimal("0.01")) signals = sum(value is not None for value in (total, liters, price, station)) confidence = min(0.88, 0.18 + signals * 0.17 + min(len(numbers), 12) * 0.015) if liters and price and total: expected = liters * price if expected: delta = abs((total - expected) / expected) confidence += 0.1 if delta <= Decimal("0.08") else -0.08 confidence = max(0, min(float(confidence), 0.95)) return ReceiptSuggestion( total_cost=total, liters=liters, price_per_liter=price, station=station, confidence=round(confidence, 2) if numbers else 0, message=( "Распознал данные чека и заполнил форму. Проверь значения перед сохранением." if numbers else "Не удалось прочитать данные чека. Попробуй фото крупнее или заполни поля вручную." ), ) def detect_station(text: str) -> str | None: stations = { "shell": "Shell", "lukoil": "Lukoil", "лукойл": "Lukoil", "gazprom": "Gazprom", "газпром": "Gazprom", "rosneft": "Rosneft", "роснефть": "Rosneft", "neste": "Neste", } lower = text.lower() for needle, name in stations.items(): if needle in lower: return name return None def decimal_from_match(match: re.Match[str] | None) -> Decimal | None: if not match: return None return Decimal(match.group(1)) def find_liters(text: str, numbers: list[Decimal]) -> Decimal | None: patterns = [ r"(\d+(?:\.\d+)?)\s*(?:l|литр|литра|литров|л)\b", r"(?:volume|qty|кол-?во|количество|объем)\D{0,12}(\d+(?:\.\d+)?)", ] for pattern in patterns: value = decimal_from_match(re.search(pattern, text, re.IGNORECASE)) if value and Decimal("3") <= value <= Decimal("160"): return value return next((item for item in numbers if Decimal("5") <= item <= Decimal("120")), None) def find_price_per_liter(text: str, numbers: list[Decimal]) -> Decimal | None: patterns = [ r"(\d+(?:\.\d+)?)\s*(?:/|за)\s*(?:l|литр|л)\b", r"(?:price|цена|ppu|руб/л|₽/л)\D{0,12}(\d+(?:\.\d+)?)", ] for pattern in patterns: value = decimal_from_match(re.search(pattern, text, re.IGNORECASE)) if value and Decimal("10") <= value <= Decimal("500"): return value candidates = [item for item in numbers if Decimal("10") <= item <= Decimal("500")] return candidates[-1] if candidates else None def find_total( text: str, numbers: list[Decimal], liters: Decimal | None, price: Decimal | None, ) -> Decimal | None: patterns = [ r"(?:total|sum|amount|итого|сумма|к\s*оплате)\D{0,16}(\d+(?:\.\d+)?)", r"(\d+(?:\.\d+)?)\s*(?:rub|₽|руб|krw|₩)", ] for pattern in patterns: value = decimal_from_match(re.search(pattern, text, re.IGNORECASE)) if value and value > Decimal("50"): return value ignored = {value for value in (liters, price) if value is not None} candidates = [item for item in numbers if item > Decimal("50") and item not in ignored] return max(candidates) if candidates else None