mud/tests/test_import_books.py

"""Tests for bulk book import script."""

import tempfile
import tomllib
from pathlib import Path

import pytest
from scripts.import_books import (
    extract_alias_words,
    generate_aliases,
    generate_slug,
    parse_txt_file,
    txt_to_toml,
)


def test_parse_txt_file_with_title_and_content():
    """Parse a basic .txt file with title and content."""
    content = "The Frog King\n\nOnce upon a time...\nThere was a princess."
    with tempfile.NamedTemporaryFile(mode="w", suffix=".txt", delete=False) as f:
        f.write(content)
        f.flush()

        title, text = parse_txt_file(Path(f.name))

        assert title == "The Frog King"
        assert text == "Once upon a time...\nThere was a princess."


def test_parse_txt_file_empty_content():
    """Parse file with only title and blank line."""
    content = "Title Only\n\n"
    with tempfile.NamedTemporaryFile(mode="w", suffix=".txt", delete=False) as f:
        f.write(content)
        f.flush()

        title, text = parse_txt_file(Path(f.name))

        assert title == "Title Only"
        assert text == ""


def test_parse_txt_file_no_blank_line():
    """Parse file where second line is not blank."""
    content = "Title\nImmediate content"
    with tempfile.NamedTemporaryFile(mode="w", suffix=".txt", delete=False) as f:
        f.write(content)
        f.flush()

        with pytest.raises(ValueError, match="Expected blank line"):
            parse_txt_file(Path(f.name))


def test_generate_slug_from_filename():
    """Convert filename to slug for TOML output."""
    result = generate_slug("001_the_frog_king_or_iron_henry.txt")
    assert result == "001_the_frog_king_or_iron_henry"
    assert generate_slug("002_cat_and_mouse.txt") == "002_cat_and_mouse"
    assert generate_slug("simple.txt") == "simple"


def test_extract_alias_words():
    """Extract meaningful words from title for aliases."""
    assert extract_alias_words("The Frog King") == ["frog", "king"]
    result = extract_alias_words("Cat and Mouse in Partnership")
    assert result == ["cat", "mouse", "partnership"]
    result = extract_alias_words("The Frog-King, or Iron Henry")
    assert result == ["frog-king", "iron", "henry"]
    assert extract_alias_words("Our Lady's Child") == ["lady's", "child"]


def test_generate_aliases():
    """Generate aliases from title."""
    # Basic title
    aliases = generate_aliases("The Frog King")
    assert "frog king" in aliases
    assert "frog" in aliases
    assert "king" in aliases

    # With punctuation - gets full phrase plus individual words
    aliases = generate_aliases("The Frog-King, or Iron Henry")
    assert "frog-king iron henry" in aliases
    assert "frog-king" in aliases
    assert "iron" in aliases
    assert "henry" in aliases

    # Single word title should not generate meaningless aliases
    aliases = generate_aliases("Single")
    assert aliases == ["single"]


def test_txt_to_toml_basic():
    """Generate valid TOML from title and text."""
    title = "The Frog King"
    text = "Once upon a time..."

    toml_str = txt_to_toml(title, text)

    # Parse the generated TOML to verify it's valid
    data = tomllib.loads(toml_str)

    assert data["name"] == "The Frog King"
    assert data["description"] == "a leather-bound story book"
    assert data["portable"] is True
    assert "frog king" in data["aliases"]
    assert data["readable_text"] == "Once upon a time..."


def test_txt_to_toml_multiline_text():
    """Generate TOML with multiline readable_text."""
    title = "Test Story"
    text = "Line 1\nLine 2\nLine 3"

    toml_str = txt_to_toml(title, text)
    data = tomllib.loads(toml_str)

    assert data["readable_text"] == "Line 1\nLine 2\nLine 3"


def test_txt_to_toml_empty_text():
    """Generate TOML with empty readable_text."""
    title = "Empty Story"
    text = ""

    toml_str = txt_to_toml(title, text)
    data = tomllib.loads(toml_str)

    assert data["readable_text"] == ""


def test_full_pipeline_single_file(tmp_path):
    """Test complete pipeline from .txt to .toml."""
    from scripts.import_books import import_books

    # Create input directory with one file
    input_dir = tmp_path / "input"
    input_dir.mkdir()

    txt_file = input_dir / "001_the_frog_king.txt"
    txt_file.write_text("The Frog King\n\nOnce upon a time...")

    # Create output directory
    output_dir = tmp_path / "output"
    output_dir.mkdir()

    # Run import
    import_books(input_dir, output_dir)

    # Verify output file was created
    toml_file = output_dir / "001_the_frog_king.toml"
    assert toml_file.exists()

    # Verify contents
    with open(toml_file, "rb") as f:
        data = tomllib.load(f)

    assert data["name"] == "The Frog King"
    assert data["readable_text"] == "Once upon a time..."


def test_full_pipeline_multiple_files(tmp_path):
    """Test pipeline with multiple files."""
    from scripts.import_books import import_books

    input_dir = tmp_path / "input"
    input_dir.mkdir()

    # Create multiple files
    (input_dir / "001_story_one.txt").write_text("Story One\n\nText one")
    (input_dir / "002_story_two.txt").write_text("Story Two\n\nText two")
    (input_dir / "003_story_three.txt").write_text("Story Three\n\nText three")

    output_dir = tmp_path / "output"
    output_dir.mkdir()

    import_books(input_dir, output_dir)

    # Verify all files were created
    assert (output_dir / "001_story_one.toml").exists()
    assert (output_dir / "002_story_two.toml").exists()
    assert (output_dir / "003_story_three.toml").exists()


def test_full_pipeline_skips_non_txt(tmp_path):
    """Pipeline should only process .txt files."""
    from scripts.import_books import import_books

    input_dir = tmp_path / "input"
    input_dir.mkdir()

    (input_dir / "story.txt").write_text("Title\n\nContent")
    (input_dir / "README.md").write_text("# Not a story")
    (input_dir / "data.json").write_text("{}")

    output_dir = tmp_path / "output"
    output_dir.mkdir()

    import_books(input_dir, output_dir)

    # Only the .txt file should generate output
    assert (output_dir / "story.toml").exists()
    assert not (output_dir / "README.toml").exists()
    assert not (output_dir / "data.toml").exists()