mud/scripts/import_books.py

152 lines
3.7 KiB
Python
Executable file

#!/usr/bin/env python3
"""Import .txt story files to TOML thing templates for readable books."""
from pathlib import Path
def parse_txt_file(path: Path) -> tuple[str, str]:
"""
Parse a .txt file with title on line 1, blank line, then content.
Returns:
(title, text) tuple
"""
lines = path.read_text().splitlines()
if len(lines) < 2:
raise ValueError(f"File too short: {path}")
title = lines[0]
if len(lines) > 1 and lines[1] != "":
raise ValueError(f"Expected blank line after title in {path}")
# Join all lines after the blank line
text = "\n".join(lines[2:]) if len(lines) > 2 else ""
return title, text
def generate_slug(filename: str) -> str:
"""Convert filename to slug (remove .txt extension)."""
return filename.removesuffix(".txt")
def extract_alias_words(title: str) -> list[str]:
"""Extract meaningful words from title, lowercased."""
# Remove common articles and prepositions, keep hyphenated words
stopwords = {"the", "a", "an", "in", "on", "or", "and", "of", "to", "our"}
# Split on spaces but preserve hyphens and apostrophes
words = title.lower().replace(",", "").split()
return [w for w in words if w not in stopwords]
def generate_aliases(title: str) -> list[str]:
"""Generate aliases from title."""
words = extract_alias_words(title)
aliases = []
# Full title without articles
full = " ".join(words)
if full:
aliases.append(full)
# Individual meaningful words
aliases.extend(words)
# Remove duplicates while preserving order
seen = set()
unique_aliases = []
for alias in aliases:
if alias not in seen:
seen.add(alias)
unique_aliases.append(alias)
return unique_aliases
def txt_to_toml(title: str, text: str) -> str:
"""
Generate TOML string for a thing template.
Args:
title: Book title (becomes name field)
text: Story content (becomes readable_text field)
Returns:
TOML-formatted string
"""
aliases = generate_aliases(title)
# Build aliases list for TOML
aliases_str = ", ".join(f'"{a}"' for a in aliases)
# Escape any triple quotes in the text
escaped_text = text.replace('"""', r"\"\"\"")
toml = f'''name = "{title}"
description = "a leather-bound story book"
portable = true
aliases = [{aliases_str}]
readable_text = """
{escaped_text}"""
'''
return toml
def import_books(input_dir: Path, output_dir: Path) -> dict[str, str]:
"""
Import all .txt files from input_dir to .toml files in output_dir.
Returns:
Dict mapping slug -> title for all imported books
"""
input_dir = Path(input_dir)
output_dir = Path(output_dir)
output_dir.mkdir(parents=True, exist_ok=True)
imported = {}
for txt_path in sorted(input_dir.glob("*.txt")):
slug = generate_slug(txt_path.name)
toml_path = output_dir / f"{slug}.toml"
title, text = parse_txt_file(txt_path)
toml_content = txt_to_toml(title, text)
toml_path.write_text(toml_content)
imported[slug] = title
return imported
def main():
"""Main entry point for standalone script."""
import sys
if len(sys.argv) != 3:
print("Usage: import_books.py INPUT_DIR OUTPUT_DIR")
sys.exit(1)
input_dir = Path(sys.argv[1])
output_dir = Path(sys.argv[2])
if not input_dir.is_dir():
print(f"Error: {input_dir} is not a directory")
sys.exit(1)
print(f"Importing books from {input_dir} to {output_dir}...")
imported = import_books(input_dir, output_dir)
print(f"\nImported {len(imported)} books:")
for slug, title in imported.items():
print(f" {slug}.toml <- {title}")
if __name__ == "__main__":
main()