This commit is contained in:
@@ -50,17 +50,35 @@ class TesseractOCRProvider:
|
||||
def _recognize_sync(self, content: bytes) -> str:
|
||||
try:
|
||||
import pytesseract
|
||||
from PIL import Image
|
||||
from PIL import Image, ImageEnhance, ImageOps
|
||||
except ImportError:
|
||||
return ""
|
||||
try:
|
||||
image = Image.open(BytesIO(content))
|
||||
except Exception:
|
||||
return ""
|
||||
candidates = [image]
|
||||
try:
|
||||
return pytesseract.image_to_string(image, lang=settings.ocr_languages)
|
||||
grayscale = ImageOps.grayscale(image)
|
||||
resized = grayscale.resize((grayscale.width * 2, grayscale.height * 2))
|
||||
contrast = ImageEnhance.Contrast(resized).enhance(1.8)
|
||||
threshold = contrast.point(lambda pixel: 255 if pixel > 165 else 0)
|
||||
candidates.extend([grayscale, contrast, threshold])
|
||||
except Exception:
|
||||
return pytesseract.image_to_string(image)
|
||||
candidates = [image]
|
||||
recognized: list[str] = []
|
||||
for candidate in candidates:
|
||||
for config in ("--psm 6", "--psm 11"):
|
||||
try:
|
||||
text = pytesseract.image_to_string(candidate, lang=settings.ocr_languages, config=config)
|
||||
except Exception:
|
||||
try:
|
||||
text = pytesseract.image_to_string(candidate, config=config)
|
||||
except Exception:
|
||||
text = ""
|
||||
if text.strip():
|
||||
recognized.append(text)
|
||||
return "\n".join(recognized)
|
||||
|
||||
|
||||
class CompositeOCRProvider:
|
||||
|
||||
Reference in New Issue
Block a user