Begin
This commit is contained in:
45
main.py
Normal file
45
main.py
Normal file
@ -0,0 +1,45 @@
|
||||
from pathlib import Path
|
||||
import base64
|
||||
import requests
|
||||
|
||||
OLLAMA_URL = "http://localhost:11434/api/chat"
|
||||
MODEL = "llama3.2-vision"
|
||||
IMAGE_EXTS = {".png", ".jpg", ".jpeg", ".webp", ".bmp", ".gif"}
|
||||
|
||||
PROMPT = """
|
||||
Extract all readable text from this image.
|
||||
Return only the extracted text.
|
||||
Do not describe the image.
|
||||
If nothing is readable, return an empty string.
|
||||
""".strip()
|
||||
|
||||
def img_to_b64(path: Path) -> str:
|
||||
return base64.b64encode(path.read_bytes()).decode("utf-8")
|
||||
|
||||
def ocr_image(path: Path) -> str:
|
||||
payload = {
|
||||
"model": MODEL,
|
||||
"stream": False,
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": PROMPT,
|
||||
"images": [img_to_b64(path)]
|
||||
}
|
||||
]
|
||||
}
|
||||
r = requests.post(OLLAMA_URL, json=payload, timeout=300)
|
||||
r.raise_for_status()
|
||||
data = r.json()
|
||||
return data.get("message", {}).get("content", "").strip()
|
||||
|
||||
def process_folder(folder: str):
|
||||
folder_path = Path(folder)
|
||||
for img in folder_path.iterdir():
|
||||
if img.is_file() and img.suffix.lower() in IMAGE_EXTS:
|
||||
print(f"Processing: {img.name}")
|
||||
text = ocr_image(img)
|
||||
img.with_suffix(".txt").write_text(text, encoding="utf-8")
|
||||
|
||||
if __name__ == "__main__":
|
||||
process_folder("kepek")
|
||||
Reference in New Issue
Block a user