code icon Code

PDF to Images

Converts each page of a PDF to a PNG image

Source Code

#!/usr/bin/env python3
"""Converts each page of a PDF to a PNG image."""

metadata = {
    "id": "code:document.pdf.to_images",
    "name": "PDF to Images",
    "description": "Converts each page of a PDF to a PNG image",
    "language": "python",
    "packages": ["pdf2image"],
    "args": [
        {"name": "pdf_path", "type": "string", "description": "Path to PDF file", "position": 0},
        {"name": "output_dir", "type": "string", "description": "Output directory for PNG images", "position": 1}
    ]
}

import os
import sys

from pdf2image import convert_from_path


def convert(pdf_path, output_dir, max_dim=1000):
    images = convert_from_path(pdf_path, dpi=200)

    for i, image in enumerate(images):
        # Scale image if needed to keep width/height under `max_dim`
        width, height = image.size
        if width > max_dim or height > max_dim:
            scale_factor = min(max_dim / width, max_dim / height)
            new_width = int(width * scale_factor)
            new_height = int(height * scale_factor)
            image = image.resize((new_width, new_height))

        image_path = os.path.join(output_dir, f"page_{i+1}.png")
        image.save(image_path)
        print(f"Saved page {i+1} as {image_path} (size: {image.size})")

    print(f"Converted {len(images)} pages to PNG images")


if __name__ == "__main__":
    if len(sys.argv) != 3:
        print("Usage: convert_pdf_to_images.py [input pdf] [output directory]")
        sys.exit(1)
    pdf_path = sys.argv[1]
    output_directory = sys.argv[2]
    convert(pdf_path, output_directory)