Comment

Comment on What's the best free version of word?

nutsack@lemmy.dbzer0.com ⁨1⁩ ⁨month⁩ ago

I am composing documents in markdown format and then using a python script to produce a PDF:

#!/usr/bin/env python3
"""Convert Markdown files to PDF."""

import argparse
import sys
from pathlib import Path

try:
    import markdown
    from weasyprint import HTML, CSS
except ImportError:
    print("Missing dependencies. Install with:")
    print("  pip install markdown weasyprint")
    sys.exit(1)


CSS_STYLES = """
@page {
    margin: 0.5in 0.6in;
    size: letter;
}
body {
    font-family: "Courier New", Courier, "Liberation Mono", monospace;
    font-size: 10pt;
    line-height: 1.4;
    color: #222;
    max-width: 100%;
}
h1, h2, h3 {
    margin-top: 1em;
    margin-bottom: 0.3em;
    padding-bottom: 0.2em;
}
h1 { font-size: 16pt; }
h2 { font-size: 13pt; }
h3 { font-size: 11pt; }
h4 { font-size: 10pt; font-weight: normal; margin-bottom: 0.5em;}
ul {
    margin: 0.3em 0;
    padding-left: 1.2em;
}
li {
    margin-bottom: 0.2em;
}
p {
    margin: 0.4em 0;
}
p + p {
    margin-top: 0.2em;
}
strong {
    font-weight: bold;
}
"""


PAGE_BREAK_MARKER = "<!-- pagebreak -->"
PAGE_BREAK_HTML = '<div style="page-break-before: always;"></div>'


def process_page_breaks(html_content: str) -> str:
    """Replace page break markers with actual page break HTML."""
    return html_content.replace(PAGE_BREAK_MARKER, PAGE_BREAK_HTML)


def md_to_html(input_path: Path) -> str:
    """Convert a Markdown file to HTML content."""
    md_content = input_path.read_text(encoding="utf-8")
    html_content = markdown.markdown(md_content)
    return process_page_breaks(html_content)


def convert_md_to_pdf(input_paths: list[Path], output_path: Path) -> None:
    """Convert one or more Markdown files to a single PDF."""
    html_parts = []
    for i, input_path in enumerate(input_paths):
        if i > 0:
            html_parts.append(PAGE_BREAK_HTML)
        html_parts.append(md_to_html(input_path))

    full_html = f"""
    <!DOCTYPE html>
    <html>
    <head><meta charset="utf-8"></head>
    <body>{"".join(html_parts)}</body>
    </html>
    """

    HTML(string=full_html).write_pdf(output_path, stylesheets=[CSS(string=CSS_STYLES)])
    print(f"Created: {output_path}")


def main():
    parser = argparse.ArgumentParser(description="Convert Markdown files to PDF")
    parser.add_argument("files", nargs="*", type=Path, help="Markdown files to convert")
    parser.add_argument("-o", "--output", type=Path, help="Output PDF path")
    parser.add_argument("-m", "--merge", action="store_true", help="Merge all input files into a single PDF")
    args = parser.parse_args()

    # Default to all .md files in current directory
    files = args.files if args.files else list(Path(".").glob("*.md"))

    if not files:
        print("No Markdown files found")
        sys.exit(1)

    if args.merge:
        if not args.output:
            print("Error: --output is required when using --merge")
            sys.exit(1)
        for md_file in files:
            if not md_file.exists():
                print(f"File not found: {md_file}")
                sys.exit(1)
        convert_md_to_pdf(files, args.output)
    else:
        if args.output and len(files) > 1:
            print("Error: --output can only be used with a single input file (or use --merge)")
            sys.exit(1)

        for md_file in files:
            if not md_file.exists():
                print(f"File not found: {md_file}")
                continue
            output_path = args.output if args.output else md_file.with_suffix(".pdf")
            convert_md_to_pdf([md_file], output_path)


if __name__ == "__main__":
    main()

source

Sort:hotnew top

mr_pip@discuss.tchncs.de ⁨1⁩ ⁨month⁩ ago
why this instead of pandoc?

source
- nutsack@lemmy.dbzer0.com ⁨1⁩ ⁨month⁩ ago
  I could not get pandoc to work on my system. after a few attempts going in circles with dependency packages, I gave up
  
  source