This commit is contained in:
128
app/api/ocr.py
128
app/api/ocr.py
@@ -9,9 +9,10 @@ from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from app.api.deps import get_current_telegram_user
|
||||
from app.db.session import get_session
|
||||
from app.models.car import OCRResult
|
||||
from app.models.user import User
|
||||
from app.services.admin_notifications import create_admin_notification
|
||||
from app.services.ocr_provider import get_ocr_provider
|
||||
from app.services.ocr_provider import OcrResult, get_ocr_provider
|
||||
from app.services.rate_limit import check_rate_limit
|
||||
from app.services.uploads import SAFE_IMAGE_TYPES, SAFE_TEXT_TYPES, validate_upload
|
||||
|
||||
@@ -42,6 +43,51 @@ class OCRResultRead(BaseModel):
|
||||
provider: str = "heuristic"
|
||||
|
||||
|
||||
def ocr_candidates_json(result: OcrResult | None) -> list[dict] | None:
|
||||
if result is None:
|
||||
return None
|
||||
return [
|
||||
{"type": candidate.type, "value": candidate.value, "confidence": candidate.confidence}
|
||||
for candidate in result.candidates
|
||||
]
|
||||
|
||||
|
||||
def ocr_confidence(result: OcrResult | None) -> Decimal | None:
|
||||
if result is None or not result.candidates:
|
||||
return None
|
||||
return Decimal(str(round(max(candidate.confidence for candidate in result.candidates), 4)))
|
||||
|
||||
|
||||
async def save_ocr_result(
|
||||
session: AsyncSession,
|
||||
*,
|
||||
current_user: User,
|
||||
scope: str,
|
||||
filename: str | None,
|
||||
content_type: str | None,
|
||||
status: str,
|
||||
result: OcrResult | None = None,
|
||||
recognized_text: str | None = None,
|
||||
provider: str | None = None,
|
||||
error: str | None = None,
|
||||
) -> OCRResult:
|
||||
record = OCRResult(
|
||||
user_id=current_user.id,
|
||||
scope=scope,
|
||||
filename=filename,
|
||||
content_type=content_type,
|
||||
status=status,
|
||||
provider=result.provider if result is not None else provider,
|
||||
confidence=ocr_confidence(result),
|
||||
recognized_text=result.recognized_text if result is not None else recognized_text,
|
||||
candidates_json=ocr_candidates_json(result),
|
||||
error=error,
|
||||
)
|
||||
session.add(record)
|
||||
await session.flush()
|
||||
return record
|
||||
|
||||
|
||||
async def validate_ocr_upload(
|
||||
*,
|
||||
session: AsyncSession,
|
||||
@@ -59,6 +105,15 @@ async def validate_ocr_upload(
|
||||
allowed_types=SAFE_IMAGE_TYPES | SAFE_TEXT_TYPES,
|
||||
)
|
||||
except HTTPException as exc:
|
||||
await save_ocr_result(
|
||||
session,
|
||||
current_user=current_user,
|
||||
scope="upload_validation",
|
||||
filename=filename,
|
||||
content_type=content_type,
|
||||
status="blocked",
|
||||
error=str(exc.detail),
|
||||
)
|
||||
await create_admin_notification(
|
||||
session,
|
||||
event_type="upload_blocked",
|
||||
@@ -93,6 +148,15 @@ async def recognize_with_alert(
|
||||
try:
|
||||
return await get_ocr_provider().recognize(content, filename)
|
||||
except Exception as exc: # noqa: BLE001 - OCR must fail gracefully and alert admins
|
||||
await save_ocr_result(
|
||||
session,
|
||||
current_user=current_user,
|
||||
scope=scope,
|
||||
filename=filename,
|
||||
content_type=None,
|
||||
status="failed",
|
||||
error=type(exc).__name__,
|
||||
)
|
||||
await create_admin_notification(
|
||||
session,
|
||||
event_type="ocr_failed",
|
||||
@@ -134,10 +198,31 @@ async def parse_text_receipt(
|
||||
scope="parse_text_receipt",
|
||||
)
|
||||
if not result or not result.recognized_text:
|
||||
if result is not None:
|
||||
await save_ocr_result(
|
||||
session,
|
||||
current_user=current_user,
|
||||
scope="parse_text_receipt",
|
||||
filename=file.filename,
|
||||
content_type=file.content_type,
|
||||
status="preview",
|
||||
result=result,
|
||||
)
|
||||
await session.commit()
|
||||
return ReceiptSuggestion(
|
||||
confidence=0,
|
||||
message="Не удалось уверенно распознать чек. Открылся ручной ввод: проверьте дату, сумму, литры и цену.",
|
||||
)
|
||||
await save_ocr_result(
|
||||
session,
|
||||
current_user=current_user,
|
||||
scope="parse_text_receipt",
|
||||
filename=file.filename,
|
||||
content_type=file.content_type,
|
||||
status="preview",
|
||||
result=result,
|
||||
)
|
||||
await session.commit()
|
||||
return parse_receipt_text(result.recognized_text)
|
||||
text = " ".join(
|
||||
[
|
||||
@@ -145,6 +230,17 @@ async def parse_text_receipt(
|
||||
content.decode("utf-8", errors="ignore"),
|
||||
]
|
||||
)
|
||||
await save_ocr_result(
|
||||
session,
|
||||
current_user=current_user,
|
||||
scope="parse_text_receipt",
|
||||
filename=file.filename,
|
||||
content_type=file.content_type,
|
||||
status="preview",
|
||||
recognized_text=text,
|
||||
provider="text",
|
||||
)
|
||||
await session.commit()
|
||||
return parse_receipt_text(text)
|
||||
|
||||
|
||||
@@ -223,6 +319,16 @@ async def recognize_license_plate(
|
||||
)
|
||||
if result is None:
|
||||
return OCRResultRead(recognized_text="", candidates=[], provider="error")
|
||||
await save_ocr_result(
|
||||
session,
|
||||
current_user=current_user,
|
||||
scope="license_plate",
|
||||
filename=file.filename,
|
||||
content_type=file.content_type,
|
||||
status="preview",
|
||||
result=result,
|
||||
)
|
||||
await session.commit()
|
||||
return OCRResultRead(
|
||||
recognized_text=result.recognized_text,
|
||||
candidates=[OCRCandidateRead(**item.__dict__) for item in result.candidates if item.type == "license_plate"],
|
||||
@@ -255,6 +361,16 @@ async def recognize_vin(
|
||||
)
|
||||
if result is None:
|
||||
return OCRResultRead(recognized_text="", candidates=[], provider="error")
|
||||
await save_ocr_result(
|
||||
session,
|
||||
current_user=current_user,
|
||||
scope="vin",
|
||||
filename=file.filename,
|
||||
content_type=file.content_type,
|
||||
status="preview",
|
||||
result=result,
|
||||
)
|
||||
await session.commit()
|
||||
return OCRResultRead(
|
||||
recognized_text=result.recognized_text,
|
||||
candidates=[OCRCandidateRead(**item.__dict__) for item in result.candidates if item.type == "vin"],
|
||||
@@ -287,6 +403,16 @@ async def recognize_service_document(
|
||||
)
|
||||
if result is None:
|
||||
return OCRResultRead(recognized_text="", candidates=[], provider="error")
|
||||
await save_ocr_result(
|
||||
session,
|
||||
current_user=current_user,
|
||||
scope="service_document",
|
||||
filename=file.filename,
|
||||
content_type=file.content_type,
|
||||
status="preview",
|
||||
result=result,
|
||||
)
|
||||
await session.commit()
|
||||
return OCRResultRead(
|
||||
recognized_text=result.recognized_text,
|
||||
candidates=[OCRCandidateRead(**item.__dict__) for item in result.candidates],
|
||||
|
||||
Reference in New Issue
Block a user