mud/scripts/import_books.py

#!/usr/bin/env python3
"""Import .txt story files to TOML thing templates for readable books."""

from pathlib import Path


def parse_txt_file(path: Path) -> tuple[str, str]:
    """
    Parse a .txt file with title on line 1, blank line, then content.

    Returns:
        (title, text) tuple
    """
    lines = path.read_text().splitlines()

    if len(lines) < 2:
        raise ValueError(f"File too short: {path}")

    title = lines[0]

    if len(lines) > 1 and lines[1] != "":
        raise ValueError(f"Expected blank line after title in {path}")

    # Join all lines after the blank line
    text = "\n".join(lines[2:]) if len(lines) > 2 else ""

    return title, text


def generate_slug(filename: str) -> str:
    """Convert filename to slug (remove .txt extension)."""
    return filename.removesuffix(".txt")


def extract_alias_words(title: str) -> list[str]:
    """Extract meaningful words from title, lowercased."""
    # Remove common articles and prepositions, keep hyphenated words
    stopwords = {"the", "a", "an", "in", "on", "or", "and", "of", "to", "our"}

    # Split on spaces but preserve hyphens and apostrophes
    words = title.lower().replace(",", "").split()

    return [w for w in words if w not in stopwords]


def generate_aliases(title: str) -> list[str]:
    """Generate aliases from title."""
    words = extract_alias_words(title)

    aliases = []

    # Full title without articles
    full = " ".join(words)
    if full:
        aliases.append(full)

    # Individual meaningful words
    aliases.extend(words)

    # Remove duplicates while preserving order
    seen = set()
    unique_aliases = []
    for alias in aliases:
        if alias not in seen:
            seen.add(alias)
            unique_aliases.append(alias)

    return unique_aliases


def txt_to_toml(title: str, text: str) -> str:
    """
    Generate TOML string for a thing template.

    Args:
        title: Book title (becomes name field)
        text: Story content (becomes readable_text field)

    Returns:
        TOML-formatted string
    """
    aliases = generate_aliases(title)

    # Build aliases list for TOML
    aliases_str = ", ".join(f'"{a}"' for a in aliases)

    # Escape any triple quotes in the text
    escaped_text = text.replace('"""', r"\"\"\"")

    toml = f'''name = "{title}"
description = "a leather-bound story book"
portable = true
aliases = [{aliases_str}]
readable_text = """
{escaped_text}"""
'''

    return toml


def import_books(input_dir: Path, output_dir: Path) -> dict[str, str]:
    """
    Import all .txt files from input_dir to .toml files in output_dir.

    Returns:
        Dict mapping slug -> title for all imported books
    """
    input_dir = Path(input_dir)
    output_dir = Path(output_dir)

    output_dir.mkdir(parents=True, exist_ok=True)

    imported = {}

    for txt_path in sorted(input_dir.glob("*.txt")):
        slug = generate_slug(txt_path.name)
        toml_path = output_dir / f"{slug}.toml"

        title, text = parse_txt_file(txt_path)
        toml_content = txt_to_toml(title, text)

        toml_path.write_text(toml_content)
        imported[slug] = title

    return imported


def main():
    """Main entry point for standalone script."""
    import sys

    if len(sys.argv) != 3:
        print("Usage: import_books.py INPUT_DIR OUTPUT_DIR")
        sys.exit(1)

    input_dir = Path(sys.argv[1])
    output_dir = Path(sys.argv[2])

    if not input_dir.is_dir():
        print(f"Error: {input_dir} is not a directory")
        sys.exit(1)

    print(f"Importing books from {input_dir} to {output_dir}...")
    imported = import_books(input_dir, output_dir)

    print(f"\nImported {len(imported)} books:")
    for slug, title in imported.items():
        print(f"  {slug}.toml <- {title}")


if __name__ == "__main__":
    main()