# local-llm-stack commands # start everything up: docker compose up -d # stop everything down: docker compose down # view logs (all services) logs: docker compose logs -f # view logs for specific service log service: docker compose logs -f {{service}} # pull a model (default: tinyllama - small and fast on cpu) pull model="tinyllama": docker exec ollama ollama pull {{model}} # list downloaded models models: docker exec ollama ollama list # run a quick test prompt test model="tinyllama": docker exec ollama ollama run {{model}} "Say hello in exactly 5 words" # open the web ui open: xdg-open http://localhost:3001 # check status of all services status: docker compose ps # restart a service restart service: docker compose restart {{service}} # nuke everything (volumes too) nuke: docker compose down -v # show resource usage stats: docker stats ollama open-webui chroma --no-stream # recommended small models for cpu recommend: @echo "models that won't melt your cpu:" @echo " tinyllama - 1.1B params, very fast" @echo " phi3:mini - 3.8B params, smart for size" @echo " qwen2:0.5b - 0.5B params, tiny" @echo " gemma2:2b - 2B params, decent" @echo "" @echo "pull with: just pull tinyllama"