This commit is contained in:
2026-05-16 14:21:57 +02:00
parent 161246e7c0
commit 27783e7efb

45
main.py Normal file
View File

@ -0,0 +1,45 @@
from pathlib import Path
import base64
import requests
OLLAMA_URL = "http://localhost:11434/api/chat"
MODEL = "llama3.2-vision"
IMAGE_EXTS = {".png", ".jpg", ".jpeg", ".webp", ".bmp", ".gif"}
PROMPT = """
Extract all readable text from this image.
Return only the extracted text.
Do not describe the image.
If nothing is readable, return an empty string.
""".strip()
def img_to_b64(path: Path) -> str:
return base64.b64encode(path.read_bytes()).decode("utf-8")
def ocr_image(path: Path) -> str:
payload = {
"model": MODEL,
"stream": False,
"messages": [
{
"role": "user",
"content": PROMPT,
"images": [img_to_b64(path)]
}
]
}
r = requests.post(OLLAMA_URL, json=payload, timeout=300)
r.raise_for_status()
data = r.json()
return data.get("message", {}).get("content", "").strip()
def process_folder(folder: str):
folder_path = Path(folder)
for img in folder_path.iterdir():
if img.is_file() and img.suffix.lower() in IMAGE_EXTS:
print(f"Processing: {img.name}")
text = ocr_image(img)
img.with_suffix(".txt").write_text(text, encoding="utf-8")
if __name__ == "__main__":
process_folder("kepek")