59 lines
1.3 KiB
Makefile
59 lines
1.3 KiB
Makefile
# local-llm-stack commands
|
|
|
|
# start everything
|
|
up:
|
|
docker compose up -d
|
|
|
|
# stop everything
|
|
down:
|
|
docker compose down
|
|
|
|
# view logs (all services)
|
|
logs:
|
|
docker compose logs -f
|
|
|
|
# view logs for specific service
|
|
log service:
|
|
docker compose logs -f {{service}}
|
|
|
|
# pull a model (default: tinyllama - small and fast on cpu)
|
|
pull model="tinyllama":
|
|
docker exec ollama ollama pull {{model}}
|
|
|
|
# list downloaded models
|
|
models:
|
|
docker exec ollama ollama list
|
|
|
|
# run a quick test prompt
|
|
test model="tinyllama":
|
|
docker exec ollama ollama run {{model}} "Say hello in exactly 5 words"
|
|
|
|
# open the web ui
|
|
open:
|
|
xdg-open http://localhost:3001
|
|
|
|
# check status of all services
|
|
status:
|
|
docker compose ps
|
|
|
|
# restart a service
|
|
restart service:
|
|
docker compose restart {{service}}
|
|
|
|
# nuke everything (volumes too)
|
|
nuke:
|
|
docker compose down -v
|
|
|
|
# show resource usage
|
|
stats:
|
|
docker stats ollama open-webui chroma --no-stream
|
|
|
|
# recommended small models for cpu
|
|
recommend:
|
|
@echo "models that won't melt your cpu:"
|
|
@echo " tinyllama - 1.1B params, very fast"
|
|
@echo " phi3:mini - 3.8B params, smart for size"
|
|
@echo " qwen2:0.5b - 0.5B params, tiny"
|
|
@echo " gemma2:2b - 2B params, decent"
|
|
@echo ""
|
|
@echo "pull with: just pull tinyllama"
|