"""Tests for bulk book import script.""" import tempfile import tomllib from pathlib import Path import pytest from scripts.import_books import ( extract_alias_words, generate_aliases, generate_slug, parse_txt_file, txt_to_toml, ) def test_parse_txt_file_with_title_and_content(): """Parse a basic .txt file with title and content.""" content = "The Frog King\n\nOnce upon a time...\nThere was a princess." with tempfile.NamedTemporaryFile(mode="w", suffix=".txt", delete=False) as f: f.write(content) f.flush() title, text = parse_txt_file(Path(f.name)) assert title == "The Frog King" assert text == "Once upon a time...\nThere was a princess." def test_parse_txt_file_empty_content(): """Parse file with only title and blank line.""" content = "Title Only\n\n" with tempfile.NamedTemporaryFile(mode="w", suffix=".txt", delete=False) as f: f.write(content) f.flush() title, text = parse_txt_file(Path(f.name)) assert title == "Title Only" assert text == "" def test_parse_txt_file_no_blank_line(): """Parse file where second line is not blank.""" content = "Title\nImmediate content" with tempfile.NamedTemporaryFile(mode="w", suffix=".txt", delete=False) as f: f.write(content) f.flush() with pytest.raises(ValueError, match="Expected blank line"): parse_txt_file(Path(f.name)) def test_generate_slug_from_filename(): """Convert filename to slug for TOML output.""" result = generate_slug("001_the_frog_king_or_iron_henry.txt") assert result == "001_the_frog_king_or_iron_henry" assert generate_slug("002_cat_and_mouse.txt") == "002_cat_and_mouse" assert generate_slug("simple.txt") == "simple" def test_extract_alias_words(): """Extract meaningful words from title for aliases.""" assert extract_alias_words("The Frog King") == ["frog", "king"] result = extract_alias_words("Cat and Mouse in Partnership") assert result == ["cat", "mouse", "partnership"] result = extract_alias_words("The Frog-King, or Iron Henry") assert result == ["frog-king", "iron", "henry"] assert extract_alias_words("Our Lady's Child") == ["lady's", "child"] def test_generate_aliases(): """Generate aliases from title.""" # Basic title aliases = generate_aliases("The Frog King") assert "frog king" in aliases assert "frog" in aliases assert "king" in aliases # With punctuation - gets full phrase plus individual words aliases = generate_aliases("The Frog-King, or Iron Henry") assert "frog-king iron henry" in aliases assert "frog-king" in aliases assert "iron" in aliases assert "henry" in aliases # Single word title should not generate meaningless aliases aliases = generate_aliases("Single") assert aliases == ["single"] def test_txt_to_toml_basic(): """Generate valid TOML from title and text.""" title = "The Frog King" text = "Once upon a time..." toml_str = txt_to_toml(title, text) # Parse the generated TOML to verify it's valid data = tomllib.loads(toml_str) assert data["name"] == "The Frog King" assert data["description"] == "a leather-bound story book" assert data["portable"] is True assert "frog king" in data["aliases"] assert data["readable_text"] == "Once upon a time..." def test_txt_to_toml_multiline_text(): """Generate TOML with multiline readable_text.""" title = "Test Story" text = "Line 1\nLine 2\nLine 3" toml_str = txt_to_toml(title, text) data = tomllib.loads(toml_str) assert data["readable_text"] == "Line 1\nLine 2\nLine 3" def test_txt_to_toml_empty_text(): """Generate TOML with empty readable_text.""" title = "Empty Story" text = "" toml_str = txt_to_toml(title, text) data = tomllib.loads(toml_str) assert data["readable_text"] == "" def test_full_pipeline_single_file(tmp_path): """Test complete pipeline from .txt to .toml.""" from scripts.import_books import import_books # Create input directory with one file input_dir = tmp_path / "input" input_dir.mkdir() txt_file = input_dir / "001_the_frog_king.txt" txt_file.write_text("The Frog King\n\nOnce upon a time...") # Create output directory output_dir = tmp_path / "output" output_dir.mkdir() # Run import import_books(input_dir, output_dir) # Verify output file was created toml_file = output_dir / "001_the_frog_king.toml" assert toml_file.exists() # Verify contents with open(toml_file, "rb") as f: data = tomllib.load(f) assert data["name"] == "The Frog King" assert data["readable_text"] == "Once upon a time..." def test_full_pipeline_multiple_files(tmp_path): """Test pipeline with multiple files.""" from scripts.import_books import import_books input_dir = tmp_path / "input" input_dir.mkdir() # Create multiple files (input_dir / "001_story_one.txt").write_text("Story One\n\nText one") (input_dir / "002_story_two.txt").write_text("Story Two\n\nText two") (input_dir / "003_story_three.txt").write_text("Story Three\n\nText three") output_dir = tmp_path / "output" output_dir.mkdir() import_books(input_dir, output_dir) # Verify all files were created assert (output_dir / "001_story_one.toml").exists() assert (output_dir / "002_story_two.toml").exists() assert (output_dir / "003_story_three.toml").exists() def test_full_pipeline_skips_non_txt(tmp_path): """Pipeline should only process .txt files.""" from scripts.import_books import import_books input_dir = tmp_path / "input" input_dir.mkdir() (input_dir / "story.txt").write_text("Title\n\nContent") (input_dir / "README.md").write_text("# Not a story") (input_dir / "data.json").write_text("{}") output_dir = tmp_path / "output" output_dir.mkdir() import_books(input_dir, output_dir) # Only the .txt file should generate output assert (output_dir / "story.toml").exists() assert not (output_dir / "README.toml").exists() assert not (output_dir / "data.toml").exists()