From 8992b0cc7b998edfd77a5e2c0c4b60eff35fffa4 Mon Sep 17 00:00:00 2001 From: Jared Miller Date: Wed, 28 Jan 2026 12:38:58 -0500 Subject: [PATCH] Add phase 2 design document --- docs/phase2-design.txt | 645 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 645 insertions(+) create mode 100644 docs/phase2-design.txt diff --git a/docs/phase2-design.txt b/docs/phase2-design.txt new file mode 100644 index 0000000..20b00fc --- /dev/null +++ b/docs/phase2-design.txt @@ -0,0 +1,645 @@ +PHASE 2 DESIGN: RICH PROMPTS AND OUTPUT ENHANCEMENTS +====================================================== + +Overview +-------- + +Phase 1 delivered basic remote control: Claude Code session wrapping, SSE streaming +to mobile dashboard, binary prompt approval (y/n only). Phase 2 addresses the most +critical gaps between claude-remote and the crabigator reference implementation: + +1. Rich prompt system - multi-option choices, text input, tab instructions +2. ANSI color parsing in output +3. Session statistics extraction and display +4. Git status widget +5. UI polish - style controls, filtering, auto-scroll + +This phase transforms claude-remote from a proof-of-concept into a genuinely useful +remote approval tool. Priority is on prompt system (the blocker for real usage) +followed by output quality and UX improvements. + + +PROMPT SYSTEM REDESIGN +====================== + +The Current Problem +------------------- + +Claude Code presents three types of interactive prompts: + +1. Permission prompts - tool access requests with options like: + - Yes (allow once) + - Yes, and allow all future requests for this tool + - No (deny) + - Tab to add additional instructions + +2. Question prompts - AskUserQuestion with multiple choice: + - Numbered options (1, 2, 3...) + - Optional "Other" text input + - Sometimes multiple questions in sequence + +3. Plan mode exit prompts - after plan generation: + - Continue with plan + - Modify plan + - Cancel + +Current implementation only sends "y\n" or "n\n" - unusable for real workflows. + + +Message Types Claude Code Emits +-------------------------------- + +Based on crabigator's events.rs, Claude Code hooks provide structured prompt data: + +Permission prompt: +{ + "type": "prompt", + "prompt": { + "prompt_type": "permission", + "tool_name": "Write", + "tool_input": { "file_path": "/path/to/file.txt", "content": "..." }, + "options": [ + { "label": "Yes", "value": "1", "description": "Allow once" }, + { "label": "Yes, always", "value": "2", "description": "Allow all future" }, + { "label": "No", "value": "3", "description": "Deny" } + ], + "allows_tab_instructions": true, + "selected_option": 1 + } +} + +Question prompt: +{ + "type": "prompt", + "prompt": { + "prompt_type": "question", + "questions": [ + { + "question": "Which approach should I take?", + "header": "Implementation choice", + "options": [ + { "label": "Use SQLite", "value": "1" }, + { "label": "Use JSON files", "value": "2" } + ], + "multi_select": false, + "allows_other": true + } + ] + } +} + +Exit plan prompt: +{ + "type": "prompt", + "prompt": { + "prompt_type": "exit_plan", + "options": [ + { "label": "Continue with plan", "value": "1" }, + { "label": "Modify plan", "value": "2" }, + { "label": "Cancel", "value": "3" } + ] + } +} + +Prompt cleared: +{ + "type": "prompt", + "prompt": null +} + + +UI for Each Prompt Type +------------------------ + +Permission prompts: +- Show tool name and relevant input (file path, command, etc.) +- Radio button list of options (Yes / Yes always / No) +- Show which option is currently selected (from selected_option field) +- If allows_tab_instructions is true, show "Add instructions" button +- Instructions UI: textarea + submit button, sends key sequence: + - Navigate to correct option if needed (up/down arrow keys) + - Press Tab + - Type instruction text + - Press Enter + +Question prompts: +- Show question header if present +- Show question text +- Radio button list for single-select, checkboxes for multi-select +- If allows_other is true, show "Other" option with text input +- Submit button sends option number(s) or text input + +Exit plan prompts: +- Show plan context (last few lines of output mentioning the plan) +- Radio button list of options +- Submit button sends option number + +General prompt UI patterns: +- Large touch targets for mobile (min 44px height) +- Clear visual distinction from regular output +- Show prompt at top of session card (sticky while scrolling output) +- Disable interaction if desktop disconnected +- Show loading state after submission +- Clear prompt UI when prompt clears (state change from permission/question to ready) + + +Response Protocol Changes +-------------------------- + +Current: POST /api/sessions/:id/approve with { approved: boolean } +Sends "y\n" or "n\n" to PTY + +New: POST /api/sessions/:id/answer with { response: AnswerResponse } + +Where AnswerResponse is one of: + +Simple option selection: +{ + "type": "option", + "value": "2" // option number as string +} + +Text input (for "Other" in questions): +{ + "type": "text", + "value": "User's custom answer here" +} + +Tab instructions (for permission prompts): +{ + "type": "tab_instructions", + "selected_option": 1, // which option to select first + "instruction": "Only process files in the src/ directory" +} + +Server processing: + +- Option selection: write "${value}\n" to PTY (e.g., "2\n") +- Text input: write "${value}\n" to PTY +- Tab instructions: execute key sequence: + 1. Navigate to selected_option (send up/down arrows) + 2. Wait 100ms + 3. Send Tab key + 4. Wait 100ms + 5. Write instruction text + 6. Send Enter key + +Key sequence execution needs pty.write() for text and special key codes: +- Up arrow: "\x1b[A" +- Down arrow: "\x1b[B" +- Tab: "\t" +- Enter: "\n" + + +OUTPUT IMPROVEMENTS +=================== + +ANSI Color Parsing +------------------ + +Current: plain text output, no colors +Needed: parse ANSI escape codes and render as HTML with colored spans + +Crabigator uses ansi-to-html conversion in dashboard/js/ansi.ts. For Bun stack, +options: + +1. Use ansi-to-html npm package (simple, proven) +2. Implement minimal parser (only need basic colors, not full xterm) +3. Use ansi-escapes to strip codes, add classes for common patterns + +Recommendation: ansi-to-html package for phase 2, optimization later if needed + +Implementation: +- Parse ANSI in server before sending via SSE +- Send HTML with spans: text +- CSS classes map to terminal color palette +- Store raw ANSI in database, parse on read + +CSS terminal colors: +.ansi-black { color: #000000; } +.ansi-red { color: #cd3131; } +.ansi-green { color: #0dbc79; } +.ansi-yellow { color: #e5e510; } +.ansi-blue { color: #2472c8; } +.ansi-magenta { color: #bc3fbc; } +.ansi-cyan { color: #11a8cd; } +.ansi-white { color: #e5e5e5; } +.ansi-bright-black { color: #666666; } +(etc. for full 16-color palette plus bold/italic/underline) + + +Session Stats Extraction +------------------------- + +Claude Code emits stats via hooks. Need to parse and display: + +- Prompts count (user interactions) +- Completions count (LLM responses) +- Tools count (tool invocations) +- Compressions count (context compaction events) +- Thinking time (cumulative seconds in thinking state) +- Work time (cumulative seconds in ready/thinking/busy states) +- Idle time (time since last activity) +- Current mode (normal / auto_accept / plan) +- Current model name (claude-opus-4-5-20251101, etc.) + +Stats arrive via "stats" event type (see crabigator events.rs lines 199-275): + +{ + "type": "stats", + "prompts": 5, + "completions": 3, + "tools": 12, + "compressions": 1, + "thinking_seconds": 45, + "work_seconds": 180, + "mode": "normal", + "model": "claude-opus-4-5-20251101", + "prompts_changed_at": 1706472000.0, + "completions_changed_at": 1706471980.0, + "tool_timestamps": [1706471900.0, 1706471920.0, ...], + "session_start": 1706471800.0, + "idle_since": 1706472100.0 +} + +Stats UI (widget in session card): +- Single line: "5 prompts | 3 completions | 12 tools | 1 compression" +- Timing line: "45s thinking | 3m idle" +- Mode badge: "PLAN MODE" or "AUTO-ACCEPT" if not normal +- Model name: "opus-4-5" (shortened) +- Sparkline for tool activity using tool_timestamps (optional phase 2.5) + +Stats storage: +- Add columns to sessions table: prompts, completions, tools, compressions, + thinking_seconds, work_seconds, mode, model, idle_since +- Update on stats event (high frequency, throttle to every 5s) +- Show in dashboard session list (for quick scanning) + + +Git Status Extraction +---------------------- + +Git status widget shows current branch and file changes. Crabigator extracts this +by parsing git status output in the PTY stream. For claude-remote, two approaches: + +Approach A (simple): Git hook in Claude Code +- Claude Code has git hooks that emit structured git events +- Listen for git events, store in session state +- Display in widget + +Approach B (parse output): Pattern matching +- Watch PTY output for git commands (git status, git diff) +- Parse output to extract branch and file list +- Fragile but works without hook support + +Recommendation: Approach A if hooks available, Approach B as fallback + +Git event structure (from crabigator events.rs lines 95-121): +{ + "type": "git", + "branch": "main", + "files": [ + { "path": "src/index.ts", "status": "M ", "additions": 5, "deletions": 2 }, + { "path": "README.md", "status": "??", "additions": 0, "deletions": 0 } + ] +} + +Git widget UI: +- Collapsible card below stats, above output +- Header: branch name with git icon +- File list: path + status indicator (M/A/D/??) + diff count +- Color code: M=yellow, A=green, D=red, ??=gray +- Max 10 files shown, "and N more" if truncated +- No git data: hide widget entirely + + +SESSION STATE BADGES +==================== + +Claude Code sessions have explicit states (see crabigator events.rs lines 13-20): +- ready: idle, waiting for user input +- thinking: LLM processing request +- permission: waiting for tool permission +- question: waiting for question answer +- complete: session finished +- interrupted: user cancelled + +Current: implicit state inference from prompt presence +Needed: explicit state tracking and visual badges + +State event: +{ + "type": "state", + "state": "thinking", + "timestamp": 1706472000000 +} + +State badge UI (in session card header): +- ready: gray "READY" +- thinking: blue "THINKING" with animated dots +- permission: yellow "PERMISSION REQUIRED" +- question: yellow "QUESTION" +- complete: green "COMPLETE" +- interrupted: red "INTERRUPTED" + +Badge behavior: +- Replace connection status indicator +- Connection status moves to header icon (filled=connected, outline=disconnected) +- State badge is primary status indicator +- Animate state transitions (fade between states) + + +UI ENHANCEMENTS +=============== + +Style Controls +-------------- + +Crabigator has comprehensive style popover (see dashboard.ts lines 69-132). +Priority features for phase 2: + +High priority: +- Column layout: 1 / 2 / 3 / Fit (auto-columns based on viewport width) +- Text size: +/- buttons to scale font size (90% to 140% in 10% steps) +- Terminal height: +/- buttons to adjust output area (250px to 550px in 50px steps) +- Text wrap: toggle between wrap and horizontal scroll + +Medium priority: +- Widget collapse: toggle for collapsing stats/git widgets +- Grouping: all sessions vs. by project (group by cwd) + +Implementation: +- Store preferences in localStorage (key: claude-remote-prefs) +- Apply via CSS classes on container element +- Layout: data-layout="1" | "2" | "3" | "fit" +- Font size: CSS variable --font-scale +- Terminal height: CSS variable --terminal-height +- Wrap: class="terminal-wrap" vs. "terminal-scroll" + + +Session Filtering +----------------- + +Focus mode: view single session, hide others. Useful for multi-session workspaces. + +UI: +- Session count button in header (e.g., "3 sessions") +- Click to open popover with session list +- Each session: command + cwd + state badge +- Click session to filter to that session only +- Filter indicator shows in header: "Viewing 1 session" with X to clear +- Clear filter button returns to all sessions + +Implementation: +- Store filter in component state (not localStorage - session-specific) +- Filter by session ID in render loop +- Popover positioned below sessions button + + +Auto-scroll Control +------------------- + +Output auto-scrolls to bottom on new content. User scrolling up should pause +auto-scroll, with button to re-enable. + +UI: +- Pin/unpin button in output footer +- Unpinned (auto-scroll on): pin icon, tooltip "Pin scroll position" +- Pinned (auto-scroll off): unpin icon, tooltip "Follow output" +- Auto-unpin when user scrolls up +- Auto-pin when user scrolls to bottom + +Implementation: +- Track scroll position in output container +- On scroll event: if scrollTop + clientHeight < scrollHeight - 50, unpin +- On new content: if unpinned, scroll to bottom +- Pin button click: toggle unpinned state + scroll to bottom + + +DATA MODEL CHANGES +================== + +Sessions Table Schema +--------------------- + +Current columns: +- id (primary key) +- command +- cwd +- started_at +- ended_at +- is_active + +New columns for phase 2: + +Stats: +- prompts INTEGER DEFAULT 0 +- completions INTEGER DEFAULT 0 +- tools INTEGER DEFAULT 0 +- compressions INTEGER DEFAULT 0 +- thinking_seconds INTEGER DEFAULT 0 +- work_seconds INTEGER DEFAULT 0 +- mode TEXT DEFAULT 'normal' +- model TEXT +- idle_since INTEGER -- unix timestamp + +State: +- state TEXT DEFAULT 'ready' -- ready|thinking|permission|question|complete|interrupted + +Git: +- git_branch TEXT +- git_files_json TEXT -- JSON array of file objects + +Prompt: +- current_prompt_json TEXT -- serialized CloudPromptData or null + +Migration: +- Add columns with ALTER TABLE +- Default values for existing rows +- No data loss for old sessions + + +Stats Storage Approach +---------------------- + +Stats events arrive frequently (every time a tool runs, every state change). +Two storage strategies: + +Strategy A: Update on every event +- Simple: just UPDATE sessions SET ... WHERE id = ? +- High write volume, may cause SQLite lock contention + +Strategy B: Throttle updates +- Update in-memory session state immediately (for SSE streaming) +- Persist to SQLite every 5 seconds if changed +- Trade-off: 5s data loss window on server crash +- Much lower write volume + +Recommendation: Strategy B (throttled updates) for phase 2 + +Implementation: +- Session manager holds in-memory state map: Map +- On stats event: update in-memory state, mark dirty +- Background interval (5s): persist all dirty sessions +- On session end: persist immediately + + +IMPLEMENTATION PHASES +===================== + +Break implementation into atomic deliverables. Each phase should be independently +testable and useful. + +Phase 2.1: Rich Prompt Foundation +---------------------------------- + +Goal: Support multi-option prompts without tab instructions yet + +Tasks: +1. Define prompt types and response types (TypeScript interfaces) +2. Update session state to track current_prompt_json +3. Extend PTY wrapper to emit structured prompt events +4. Change /approve endpoint to /answer endpoint with response types +5. Render permission prompts with option list (no tab instructions yet) +6. Render question prompts with option list (no "Other" yet) +7. Test: permission prompt with 3 options (yes/yes always/no) + +Deliverable: Can approve tool permissions by selecting specific option + +Phase 2.2: Text Input and Tab Instructions +------------------------------------------- + +Goal: Complete prompt system with all input types + +Tasks: +1. Add "Other" text input for question prompts +2. Add "Add instructions" button for permission prompts +3. Implement key sequence generation for tab instructions +4. Implement key sequence execution in PTY wrapper +5. Test: question prompt with "Other" option +6. Test: permission prompt with tab instructions + +Deliverable: Full prompt interactivity matches desktop Claude Code + +Phase 2.3: Session States and Stats +------------------------------------ + +Goal: Track and display session state and statistics + +Tasks: +1. Add state/stats columns to sessions table (migration) +2. Parse and emit state events from PTY output +3. Parse and emit stats events from PTY output +4. Update session state in database (throttled writes) +5. Add state badges to session card headers +6. Add stats widget to session cards +7. Test: state transitions visible in UI +8. Test: stats update as session progresses + +Deliverable: Rich session metadata visible in dashboard + +Phase 2.4: ANSI Colors +---------------------- + +Goal: Colored terminal output + +Tasks: +1. Add ansi-to-html dependency +2. Parse ANSI codes in output before SSE transmission +3. Add terminal color CSS classes +4. Update output rendering to preserve HTML spans +5. Test: colored tool output (git diff, test results) + +Deliverable: Output looks like terminal, not plain text + +Phase 2.5: Git Widget +--------------------- + +Goal: Show git status for sessions in git repositories + +Tasks: +1. Add git columns to sessions table (migration) +2. Parse git events from PTY output (pattern matching or hooks) +3. Add git widget component to session cards +4. Update widget on git events +5. Test: git status visible after git commands + +Deliverable: Git branch and file changes visible in dashboard + +Phase 2.6: UI Polish +-------------------- + +Goal: Style controls and UX improvements + +Tasks: +1. Add style popover with column layout options +2. Add text size +/- controls +3. Add terminal height +/- controls +4. Add text wrap toggle +5. Add session filtering (focus mode) +6. Add auto-scroll control (pin/unpin) +7. Persist preferences to localStorage +8. Test: style changes apply correctly +9. Test: filter to single session works + +Deliverable: Dashboard matches crabigator's UX quality + + +TESTING STRATEGY +================ + +Each phase needs: +- Unit tests for new functions (prompt parsing, key sequence generation) +- Integration tests for API endpoints (answer endpoint with different response types) +- Manual testing with real Claude Code sessions + +Key test scenarios: +- Permission prompt: approve with each option (yes / yes always / no) +- Permission prompt: add tab instructions +- Question prompt: select numbered option +- Question prompt: enter "Other" text +- State transitions: ready -> thinking -> permission -> ready +- Stats updates: watch counts increment during session +- Git status: run git commands, verify widget updates +- ANSI colors: run command with colored output (git status, test runner) +- Style controls: change layout, font size, terminal height +- Session filter: filter to one session, clear filter +- Auto-scroll: scroll up to pause, scroll down to resume + + +OPEN QUESTIONS +============== + +1. Hook availability: Does Claude Code expose hooks for git/stats/prompts? + - If yes: use structured events + - If no: parse PTY output (fragile but workable) + +2. Key sequence timing: What delays needed between key presses? + - Start with 100ms, adjust based on testing + - May need longer delays for slow terminal responses + +3. Prompt detection: How to reliably detect prompt boundaries in output? + - Look for specific patterns in PTY output + - May need escape sequence markers + - Fallback: timeout-based detection (no output for 500ms = prompt) + +4. Stats persistence frequency: 5s throttle good enough? + - Depends on write volume and SQLite performance + - Monitor DB file size growth + - May need adaptive throttling (1s when active, 10s when idle) + + +REFERENCES +========== + +Crabigator patterns to steal from: +- workers/crabigator-api/src/durable-objects/SessionDO.ts (lines 330-419) + State management, event handling, persistence strategy +- src/cloud/events.rs (lines 1-741) + Event type definitions, prompt structures, key sequences +- workers/crabigator-api/src/dashboard.ts (lines 69-132) + Style controls UI patterns + +Claude Code documentation: +- Prompt hooks: check official docs for hook event structures +- PTY escape codes: standard ANSI/VT100 sequences