# ── API Keys ──────────────────────────────────────────────────────────────────
OPENAI_API_KEY=sk-...
ANTHROPIC_API_KEY=sk-ant-...

# ── Upstream base URLs (leave as-is for production) ──────────────────────────
# OPENAI_BASE_URL=https://api.openai.com
# ANTHROPIC_BASE_URL=https://api.anthropic.com

# ── Proxy server ──────────────────────────────────────────────────────────────
PROXY_HOST=0.0.0.0
PROXY_PORT=8000

# ── Feature flags ─────────────────────────────────────────────────────────────

# Inject cache_control on Claude; log eligibility for OpenAI auto-cache
ENABLE_PROMPT_CACHING=true
# Minimum tokens in a block before injecting a Claude cache breakpoint
CACHE_MIN_TOKENS=1024

# Compress verbose tool/function descriptions to compact signatures
ENABLE_TOOL_COMPRESSION=true

# Summarise large RAG/document chunks before sending to the main model
ENABLE_CONTEXT_COMPRESSION=true
# Token count threshold; blocks above this are summarised  
CONTEXT_CHUNK_TOKEN_THRESHOLD=400
# Cheap model used for preprocessing / summarisation
PREPROCESSOR_MODEL=gpt-4o-mini

# Replace full conversation history with a compact structured state object
# (disabled by default – enable for long-running agent sessions)
ENABLE_STATE_COMPRESSION=false
# Number of recent turns to keep verbatim; older turns become a state JSON
STATE_KEEP_LAST_N_TURNS=3

# Inject the "ponytail" lazy-senior-dev ruleset into the system prompt so the
# model writes the smallest correct solution (less code, lower cost, faster)
# without per-agent plugin installs. https://github.com/DietrichGebert/ponytail
ENABLE_PONYTAIL=false
# Intensity: lite | full | ultra
PONYTAIL_MODE=full

# Print per-request token savings to stdout
LOG_TOKEN_SAVINGS=true

# ── Database (FADB) ───────────────────────────────────────────────────────────
# Path to the SQLite database file.
# Leave unset to disable DB writes and run in memory-only mode.
DATABASE_URL=./aisaver.db

# ── Claude Desktop / claude.ai capture ────────────────────────────────────────
# MITM-decrypt these hosts in transparent-relay (recon) mode so Claude Desktop
# and claude.ai chat completions are observed and recorded to the dashboard.
# Bytes are relayed verbatim (no rewrite), so sign-in/session bootstrap keep
# working. Without this, Claude Desktop chat data only tunnels upstream and is
# never counted by the proxy. Leave blank to disable.
MITM_RECON_HOSTS=claude.ai,a.claude.ai
