49 lines
1.6 KiB
Python
49 lines
1.6 KiB
Python
import re
|
||
from dataclasses import dataclass
|
||
|
||
from app.services.vehicle_identity import normalize_license_plate, validate_vin
|
||
|
||
|
||
@dataclass
|
||
class OcrCandidate:
|
||
type: str
|
||
value: str
|
||
confidence: float
|
||
|
||
|
||
@dataclass
|
||
class OcrResult:
|
||
recognized_text: str
|
||
candidates: list[OcrCandidate]
|
||
|
||
|
||
class OCRProvider:
|
||
async def recognize(self, content: bytes, filename: str | None = None) -> OcrResult:
|
||
raise NotImplementedError
|
||
|
||
|
||
class StubOCRProvider(OCRProvider):
|
||
async def recognize(self, content: bytes, filename: str | None = None) -> OcrResult:
|
||
text = " ".join(
|
||
[
|
||
filename or "",
|
||
content.decode("utf-8", errors="ignore"),
|
||
]
|
||
)
|
||
compact = re.sub(r"\s+", " ", text).strip()
|
||
candidates: list[OcrCandidate] = []
|
||
for raw in re.findall(r"\b[A-HJ-NPR-Z0-9]{17}\b", compact.upper()):
|
||
try:
|
||
candidates.append(OcrCandidate(type="vin", value=validate_vin(raw) or raw, confidence=0.84))
|
||
except ValueError:
|
||
continue
|
||
for raw in re.findall(r"\b[0-9A-ZА-Я가-힣][0-9A-ZА-Я가-힣\-\s]{4,10}\b", compact.upper()):
|
||
normalized = normalize_license_plate(raw)
|
||
if normalized and 5 <= len(normalized) <= 10 and not any(item.value == normalized for item in candidates):
|
||
candidates.append(OcrCandidate(type="license_plate", value=normalized, confidence=0.62))
|
||
return OcrResult(recognized_text=compact, candidates=candidates[:8])
|
||
|
||
|
||
def get_ocr_provider() -> OCRProvider:
|
||
return StubOCRProvider()
|