# ═══════════════════════════════════════════════════════════
#  MAC — MBM AI Cloud  |  Local Server Configuration
# ═══════════════════════════════════════════════════════════
#  Copy this to .env:  cp .env.example .env
#  Then run:  docker compose up -d
# ═══════════════════════════════════════════════════════════

# ── App ───────────────────────────────────────────────────
MAC_ENV=development
MAC_HOST=0.0.0.0
MAC_PORT=8000
MAC_DEBUG=false
MAC_SECRET_KEY=change-me-to-random-string
MAC_CORS_ORIGINS=["*"]
MAC_WORKERS=4                    # Uvicorn worker processes

# ── Database (PostgreSQL — persistent storage) ────────────
DATABASE_URL=postgresql+asyncpg://mac:mac_password@localhost:5432/mac_db
PGADMIN_PORT=5050
PGADMIN_DEFAULT_EMAIL=admin@mbm.local
PGADMIN_DEFAULT_PASSWORD=ChangeThisStrongPassword!

# ── Redis (rate limiting & caching) ──────────────────────
REDIS_URL=redis://localhost:6379/0

# ── JWT Auth ──────────────────────────────────────────────
JWT_SECRET_KEY=change-me-jwt-secret-random-string
JWT_ALGORITHM=HS256
JWT_ACCESS_TOKEN_EXPIRE_MINUTES=1440

# ── vLLM Local GPU Inference ─────────────────────────────
# Each model runs its own vLLM instance on a separate port.
# Docker Compose sets these automatically via service names.
VLLM_BASE_URL=http://localhost:8001
VLLM_SPEED_URL=http://localhost:8001
VLLM_CODE_URL=http://localhost:8002
VLLM_REASONING_URL=http://localhost:8003
VLLM_INTELLIGENCE_URL=http://localhost:8004
VLLM_API_KEY=
VLLM_TIMEOUT=120                 # HTTP timeout (seconds) for LLM requests
VLLM_HEALTH_TIMEOUT=5            # Timeout for model health checks

# ── Model Registry ────────────────────────────────────────
# Override the entire model list with a JSON array (leave empty for defaults)
# Each object needs: id, name, served_name, url_key, category,
#   parameters, context_length, capabilities (list), specialty.
MAC_MODELS_JSON=

# Only enable specific models from the built-in list (comma-separated IDs)
# Example: MAC_ENABLED_MODELS=qwen2.5:7b,qwen2.5-coder:7b
MAC_ENABLED_MODELS=

# Which model ID the "auto" keyword falls back to (empty = first code model)
MAC_AUTO_FALLBACK=

# Default max_tokens when the client doesn't specify
MAC_DEFAULT_MAX_TOKENS=2048

# ── Docker Compose vLLM Tuning ────────────────────────────
# Adjust these to match your GPU VRAM.  24GB GPU example:
#   Speed (7B) ≈ 5GB, Code (7B) ≈ 5GB, Reason (14B) ≈ 9GB → 19GB total
VLLM_SPEED_MODEL=Qwen/Qwen2.5-7B-Instruct
VLLM_SPEED_PORT=8001
VLLM_SPEED_GPU_MEM=0.22
VLLM_SPEED_MAX_LEN=8192

VLLM_CODE_MODEL=Qwen/Qwen2.5-Coder-7B-Instruct
VLLM_CODE_PORT=8002
VLLM_CODE_GPU_MEM=0.22
VLLM_CODE_MAX_LEN=8192

VLLM_REASON_MODEL=deepseek-ai/DeepSeek-R1-Distill-Qwen-14B
VLLM_REASON_PORT=8003
VLLM_REASON_GPU_MEM=0.35
VLLM_REASON_MAX_LEN=8192

VLLM_DTYPE=auto                  # auto | float16 | bfloat16

# Intelligence slot (uncomment vllm-intel in docker-compose.yml first)
# VLLM_INTEL_MODEL=google/gemma-3-27b-it
# VLLM_INTEL_PORT=8004
# VLLM_INTEL_GPU_MEM=0.45
# VLLM_INTEL_MAX_LEN=4096

# ── Whisper / Speech-to-Text ─────────────────────────────
# Uncomment the whisper service in docker-compose.yml first.
# Uses OpenAI-compatible /v1/audio/transcriptions endpoint.
WHISPER_URL=http://localhost:8005
WHISPER_MODEL=Systran/faster-whisper-small
WHISPER_TIMEOUT=300

# ── Text-to-Speech ───────────────────────────────────────
# Uncomment the tts service in docker-compose.yml first.
# Uses OpenAI-compatible /v1/audio/speech endpoint.
TTS_URL=http://localhost:8006
TTS_MODEL=default
TTS_TIMEOUT=120

# ── Embeddings ────────────────────────────────────────────
# Optional separate embedding server. Leave empty to use VLLM_BASE_URL.
EMBEDDING_URL=
EMBEDDING_MODEL=nomic-embed-text
EMBEDDING_TIMEOUT=60

# ── Rate Limits ───────────────────────────────────────────
RATE_LIMIT_REQUESTS_PER_HOUR=100
RATE_LIMIT_TOKENS_PER_DAY=50000

# ── Qdrant (Vector DB for RAG) ───────────────────────────
QDRANT_URL=http://localhost:6333

# ── SearXNG (Web Search) ─────────────────────────────────
SEARXNG_URL=http://localhost:8888