In [None]:
import fitz  # PyMuPDF
import pandas as pd
import re

In [None]:
def extract_tables_from_pdf(pdf_path):
    """
    Extracts tables and their highlighted rows from a PDF file.

    Args:
        pdf_path (str): Path to the PDF file.

    Returns:
        list: A list of dictionaries representing the tables.
    """
    doc = fitz.open(pdf_path)
    tables = []

    for page_num in range(len(doc)):
        page = doc[page_num]
        blocks = page.get_text("dict")['blocks']

        for block in blocks:
            if 'lines' in block:
                table_data = []
                for line in block['lines']:
                    row_data = ""
                    highlight_detected = False

                    for span in line['spans']:
                        row_data += span['text'] + ' '
                        if span['bgcolor'] != 0:  # Non-zero bgcolor indicates highlighting
                            highlight_detected = True

                    table_data.append((row_data.strip(), highlight_detected))
                
                if table_data:
                    tables.append(table_data)

    return tables

In [None]:
def generate_quiz_from_tables(tables):
    """
    Converts extracted tables into a multiple-choice quiz.

    Args:
        tables (list): List of dictionaries representing tables.

    Returns:
        dict: A dictionary containing questions and their respective choices.
    """
    quiz = {}

    for table in tables:
        for i, (row, is_highlighted) in enumerate(table):
            if i == 0:  # First row contains the question
                question = row
                quiz[question] = []
            else:
                choice = row
                if is_highlighted:
                    quiz[question].insert(0, {"choice": choice, "correct": True})
                else:
                    quiz[question].append({"choice": choice, "correct": False})

    return quiz

In [None]:
def main():
    pdf_path = "pelda_2.pdf"  # Replace with your PDF file path
    output_json = "quiz.json"       # Replace with desired output JSON file name

    print("Extracting tables from the PDF...")
    tables = extract_tables_from_pdf(pdf_path)

    print("Generating the quiz...")
    quiz = generate_quiz_from_tables(tables)

    print("Saving the quiz to JSON file...")
    with open(output_json, "w", encoding="utf-8") as f:
        import json
        json.dump(quiz, f, ensure_ascii=False, indent=4)

    print(f"Quiz saved to {output_json}")

if __name__ == "__main__":
    main()
