import re from dataclasses import dataclass from app.services.vehicle_identity import normalize_license_plate, validate_vin @dataclass class OcrCandidate: type: str value: str confidence: float @dataclass class OcrResult: recognized_text: str candidates: list[OcrCandidate] class OCRProvider: async def recognize(self, content: bytes, filename: str | None = None) -> OcrResult: raise NotImplementedError class StubOCRProvider(OCRProvider): async def recognize(self, content: bytes, filename: str | None = None) -> OcrResult: text = " ".join( [ filename or "", content.decode("utf-8", errors="ignore"), ] ) compact = re.sub(r"\s+", " ", text).strip() candidates: list[OcrCandidate] = [] for raw in re.findall(r"\b[A-HJ-NPR-Z0-9]{17}\b", compact.upper()): try: candidates.append(OcrCandidate(type="vin", value=validate_vin(raw) or raw, confidence=0.84)) except ValueError: continue for raw in re.findall(r"\b[0-9A-ZА-Я가-힣][0-9A-ZА-Я가-힣\-\s]{4,10}\b", compact.upper()): normalized = normalize_license_plate(raw) if normalized and 5 <= len(normalized) <= 10 and not any(item.value == normalized for item in candidates): candidates.append(OcrCandidate(type="license_plate", value=normalized, confidence=0.62)) return OcrResult(recognized_text=compact, candidates=candidates[:8]) def get_ocr_provider() -> OCRProvider: return StubOCRProvider()