local-llm-stack/compose.yml

services:
  ollama:
    image: ollama/ollama:latest
    container_name: ollama
    volumes:
      - ollama_data:/root/.ollama
    ports:
      - "11434:11434"
    restart: unless-stopped
    # no gpu, cpu only - it'll be slow but it works
    # add this if you ever get a gpu:
    # deploy:
    #   resources:
    #     reservations:
    #       devices:
    #         - driver: nvidia
    #           count: all
    #           capabilities: [gpu]

  open-webui:
    image: ghcr.io/open-webui/open-webui:main
    container_name: open-webui
    volumes:
      - open_webui_data:/app/backend/data
    ports:
      - "3001:8080"
    environment:
      - OLLAMA_BASE_URL=http://ollama:11434
      - WEBUI_AUTH=false
      # use external chroma for RAG
      - CHROMA_HTTP_HOST=chroma
      - CHROMA_HTTP_PORT=8000
      - RAG_EMBEDDING_MODEL=all-MiniLM-L6-v2
    depends_on:
      - ollama
      - chroma
    restart: unless-stopped

  chroma:
    image: chromadb/chroma:latest
    container_name: chroma
    volumes:
      - chroma_data:/chroma/chroma
    ports:
      - "8007:8000"
    environment:
      - IS_PERSISTENT=TRUE
      - ANONYMIZED_TELEMETRY=FALSE
    restart: unless-stopped

volumes:
  ollama_data:
  open_webui_data:
  chroma_data: