From 27783e7efb25561429f7182e994afe2fb52d5e49 Mon Sep 17 00:00:00 2001 From: Kilokem Date: Sat, 16 May 2026 14:21:57 +0200 Subject: [PATCH] Begin --- main.py | 45 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) create mode 100644 main.py diff --git a/main.py b/main.py new file mode 100644 index 0000000..ea5aa13 --- /dev/null +++ b/main.py @@ -0,0 +1,45 @@ +from pathlib import Path +import base64 +import requests + +OLLAMA_URL = "http://localhost:11434/api/chat" +MODEL = "llama3.2-vision" +IMAGE_EXTS = {".png", ".jpg", ".jpeg", ".webp", ".bmp", ".gif"} + +PROMPT = """ +Extract all readable text from this image. +Return only the extracted text. +Do not describe the image. +If nothing is readable, return an empty string. +""".strip() + +def img_to_b64(path: Path) -> str: + return base64.b64encode(path.read_bytes()).decode("utf-8") + +def ocr_image(path: Path) -> str: + payload = { + "model": MODEL, + "stream": False, + "messages": [ + { + "role": "user", + "content": PROMPT, + "images": [img_to_b64(path)] + } + ] + } + r = requests.post(OLLAMA_URL, json=payload, timeout=300) + r.raise_for_status() + data = r.json() + return data.get("message", {}).get("content", "").strip() + +def process_folder(folder: str): + folder_path = Path(folder) + for img in folder_path.iterdir(): + if img.is_file() and img.suffix.lower() in IMAGE_EXTS: + print(f"Processing: {img.name}") + text = ocr_image(img) + img.with_suffix(".txt").write_text(text, encoding="utf-8") + +if __name__ == "__main__": + process_folder("kepek") \ No newline at end of file