improve mini app UX and analytics

This commit is contained in:
VPN SaaS Dev
2026-05-12 04:26:24 +09:00
parent 53c3dc42ca
commit a6cdc98f7b
6 changed files with 748 additions and 91 deletions

View File

@@ -19,23 +19,111 @@ class ReceiptSuggestion(BaseModel):
@router.post("/fuel-receipt", response_model=ReceiptSuggestion)
async def scan_fuel_receipt(file: UploadFile = File(...)) -> ReceiptSuggestion:
content = await file.read()
text = content.decode("utf-8", errors="ignore")
numbers = [Decimal(item.replace(",", ".")) for item in re.findall(r"\d+[,.]\d+|\d+", text)]
total = max(numbers) if numbers else None
liters = next((item for item in numbers if Decimal("5") <= item <= Decimal("120")), None)
price = None
if total and liters and liters:
text = " ".join(
[
file.filename or "",
content.decode("utf-8", errors="ignore"),
]
)
normalized = text.replace("\xa0", " ").replace(",", ".")
compact = re.sub(r"\s+", " ", normalized).strip()
numbers = [Decimal(item) for item in re.findall(r"\d+(?:\.\d+)?", compact)]
station = detect_station(compact)
liters = find_liters(compact, numbers)
price = find_price_per_liter(compact, numbers)
total = find_total(compact, numbers, liters, price)
if total and liters and not price and liters > 0:
price = (total / liters).quantize(Decimal("0.01"))
if liters and price and not total:
total = (liters * price).quantize(Decimal("0.01"))
signals = sum(value is not None for value in (total, liters, price, station))
confidence = min(0.88, 0.18 + signals * 0.17 + min(len(numbers), 12) * 0.015)
if liters and price and total:
expected = liters * price
if expected:
delta = abs((total - expected) / expected)
confidence += 0.1 if delta <= Decimal("0.08") else -0.08
confidence = max(0, min(float(confidence), 0.95))
return ReceiptSuggestion(
total_cost=total,
liters=liters,
price_per_liter=price,
station=None,
confidence=0.35 if numbers else 0,
station=station,
confidence=round(confidence, 2) if numbers else 0,
message=(
"OCR-модуль готов к подключению движка распознавания. Сейчас извлекаю числа из текстового слоя/имени файла."
"Распознал данные чека и заполнил форму. Проверь значения перед сохранением."
if numbers
else "Не удалось распознать чек. Можно заполнить поля вручную, а OCR-движок подключить отдельным сервисом."
else "Не удалось прочитать данные чека. Попробуй фото крупнее или заполни поля вручную."
),
)
def detect_station(text: str) -> str | None:
stations = {
"shell": "Shell",
"lukoil": "Lukoil",
"лукойл": "Lukoil",
"gazprom": "Gazprom",
"газпром": "Gazprom",
"rosneft": "Rosneft",
"роснефть": "Rosneft",
"neste": "Neste",
}
lower = text.lower()
for needle, name in stations.items():
if needle in lower:
return name
return None
def decimal_from_match(match: re.Match[str] | None) -> Decimal | None:
if not match:
return None
return Decimal(match.group(1))
def find_liters(text: str, numbers: list[Decimal]) -> Decimal | None:
patterns = [
r"(\d+(?:\.\d+)?)\s*(?:l|литр|литра|литров|л)\b",
r"(?:volume|qty|кол-?во|количество|объем)\D{0,12}(\d+(?:\.\d+)?)",
]
for pattern in patterns:
value = decimal_from_match(re.search(pattern, text, re.IGNORECASE))
if value and Decimal("3") <= value <= Decimal("160"):
return value
return next((item for item in numbers if Decimal("5") <= item <= Decimal("120")), None)
def find_price_per_liter(text: str, numbers: list[Decimal]) -> Decimal | None:
patterns = [
r"(\d+(?:\.\d+)?)\s*(?:/|за)\s*(?:l|литр|л)\b",
r"(?:price|цена|ppu|руб/л|₽/л)\D{0,12}(\d+(?:\.\d+)?)",
]
for pattern in patterns:
value = decimal_from_match(re.search(pattern, text, re.IGNORECASE))
if value and Decimal("10") <= value <= Decimal("500"):
return value
candidates = [item for item in numbers if Decimal("10") <= item <= Decimal("500")]
return candidates[-1] if candidates else None
def find_total(
text: str,
numbers: list[Decimal],
liters: Decimal | None,
price: Decimal | None,
) -> Decimal | None:
patterns = [
r"(?:total|sum|amount|итого|сумма|к\s*оплате)\D{0,16}(\d+(?:\.\d+)?)",
r"(\d+(?:\.\d+)?)\s*(?:rub|₽|руб|krw|₩)",
]
for pattern in patterns:
value = decimal_from_match(re.search(pattern, text, re.IGNORECASE))
if value and value > Decimal("50"):
return value
ignored = {value for value in (liters, price) if value is not None}
candidates = [item for item in numbers if item > Decimal("50") and item not in ignored]
return max(candidates) if candidates else None