diff --git a/.env.example b/.env.example index 7c3d6c7..8bf7eb0 100644 --- a/.env.example +++ b/.env.example @@ -16,3 +16,8 @@ DATABASE_URL=sqlite:///./test.db OLLAMA_URL=http://localhost:11434 DEFAULT_MODEL=llama3 APP_TZ=Europe/Berlin + +# Standard-Modell für den Anthropic-kompatiblen Endpunkt (/v1/messages) +# Wird verwendet, wenn der Client kein Modell angibt oder ein Anthropic-Modellname +# (z.B. claude-opus-4-7) auf kein lokales Modell passt. +ANTHROPIC_DEFAULT_MODEL=llama3 diff --git a/backend/database.py b/backend/database.py index df9ad23..6884df0 100644 --- a/backend/database.py +++ b/backend/database.py @@ -1,12 +1,20 @@ import os +from pathlib import Path from dotenv import load_dotenv from sqlalchemy import create_engine -load_dotenv(dotenv_path=os.path.join(os.path.dirname(__file__), '..', '.env')) +load_dotenv(dotenv_path=Path(__file__).resolve().parent.parent / ".env") from sqlalchemy.orm import sessionmaker, declarative_base DATABASE_URL = os.environ.get("DATABASE_URL", "sqlite:///./test.db") +# Relative SQLite-Pfade immer relativ zu dieser Datei auflösen, nicht zum cwd +if DATABASE_URL.startswith("sqlite:///") and not DATABASE_URL.startswith("sqlite:////"): + db_path = DATABASE_URL[len("sqlite:///"):] + if not os.path.isabs(db_path): + db_path = str(Path(__file__).resolve().parent / db_path) + DATABASE_URL = f"sqlite:///{db_path}" + if "sqlite" in DATABASE_URL: engine = create_engine(DATABASE_URL, connect_args={"check_same_thread": False}) else: diff --git a/backend/main.py b/backend/main.py index 8be2865..9136011 100644 --- a/backend/main.py +++ b/backend/main.py @@ -1,6 +1,7 @@ import json import logging import os +import secrets import time from logging.handlers import RotatingFileHandler from pathlib import Path @@ -51,10 +52,16 @@ def _last_user_msg(messages: list, max_len: int = 120) -> str: async def require_api_key(request: Request, db: Session = Depends(get_db)): auth_header = request.headers.get("Authorization", "") + x_api_key = request.headers.get("x-api-key", "") + auth_token = request.headers.get("anthropic-auth-token", "") if auth_header.startswith("Bearer "): api_key = auth_header[7:] elif auth_header.startswith("sk-"): api_key = auth_header + elif x_api_key: + api_key = x_api_key + elif auth_token: + api_key = auth_token else: raise HTTPException(status_code=401, detail="Invalid or missing API key") db_key = crud.verify_api_key(db, api_key) @@ -82,9 +89,14 @@ async def unhandled_exception_handler(request: Request, exc: Exception): request.method, request.url.path, type(exc).__name__, exc, exc_info=exc) return JSONResponse(status_code=500, content={"error": {"message": "Internal server error", "type": "server_error"}}) +def _backend_headers() -> dict: + key = os.getenv("BACKEND_API_KEY") + return {"Authorization": f"Bearer {key}"} if key else {} + + async def proxy_request(url: str, method: str = "GET", json_data: dict = None): async with httpx.AsyncClient(timeout=300.0) as client: - response = await client.request(method=method, url=url, json=json_data) + response = await client.request(method=method, url=url, json=json_data, headers=_backend_headers()) return response @app.post("/api/generate") @@ -171,6 +183,210 @@ async def versions(db: Session = Depends(get_db)): response = await proxy_request(f"{ollama_url}/api/versions", method="GET") return JSONResponse(content=response.json(), status_code=response.status_code) + +# --- Anthropic Messages API compatibility layer --- + +def _anthropic_content_to_str(content) -> str: + """Flatten Anthropic content (string or block array) to a plain string.""" + if isinstance(content, str): + return content + if isinstance(content, list): + parts = [] + for block in content: + if not isinstance(block, dict): + continue + if block.get("type") == "text": + parts.append(block.get("text", "")) + elif block.get("type") == "tool_result": + raw = block.get("content", "") + if isinstance(raw, list): + raw = " ".join(r.get("text", "") for r in raw if isinstance(r, dict) and r.get("type") == "text") + parts.append(str(raw)) + return " ".join(parts) + return str(content) if content else "" + + +def _anthropic_messages_to_ollama(messages: list, system: str = None) -> list: + """Transform Anthropic messages array to Ollama /api/chat format.""" + result = [] + if system: + result.append({"role": "system", "content": system}) + for msg in messages: + role = msg.get("role") + content = msg.get("content") + if role == "assistant" and isinstance(content, list): + text = " ".join(b.get("text", "") for b in content if isinstance(b, dict) and b.get("type") == "text") + tool_calls = [ + {"function": {"name": b["name"], "arguments": b.get("input", {})}} + for b in content if isinstance(b, dict) and b.get("type") == "tool_use" + ] + entry = {"role": "assistant", "content": text} + if tool_calls: + entry["tool_calls"] = tool_calls + result.append(entry) + elif role == "user" and isinstance(content, list): + text_parts = [] + for block in content: + if not isinstance(block, dict): + continue + if block.get("type") == "tool_result": + if text_parts: + result.append({"role": "user", "content": " ".join(text_parts)}) + text_parts = [] + raw = block.get("content", "") + if isinstance(raw, list): + raw = " ".join(r.get("text", "") for r in raw if isinstance(r, dict) and r.get("type") == "text") + result.append({"role": "tool", "content": str(raw)}) + elif block.get("type") == "text": + text_parts.append(block.get("text", "")) + if text_parts: + result.append({"role": "user", "content": " ".join(text_parts)}) + else: + result.append({"role": role, "content": _anthropic_content_to_str(content)}) + return result + + +def _anthropic_tools_to_ollama(tools: list) -> list: + """Transform Anthropic tools to Ollama/OpenAI function format.""" + return [ + { + "type": "function", + "function": { + "name": t["name"], + "description": t.get("description", ""), + "parameters": t.get("input_schema", {}), + }, + } + for t in tools + ] + + +def _ollama_to_anthropic_response(ollama_resp: dict, model_name: str, msg_id: str) -> dict: + """Transform an Ollama /api/chat response to Anthropic Messages API format.""" + msg = ollama_resp.get("message", {}) + text = msg.get("content", "") + tool_calls = msg.get("tool_calls") or [] + + content_blocks = [] + if text: + content_blocks.append({"type": "text", "text": text}) + + stop_reason = "end_turn" + for i, tc in enumerate(tool_calls): + stop_reason = "tool_use" + fn = tc.get("function", {}) + args = fn.get("arguments", {}) + if isinstance(args, str): + try: + args = json.loads(args) + except json.JSONDecodeError: + args = {} + content_blocks.append({ + "type": "tool_use", + "id": f"toolu_{msg_id}_{i}", + "name": fn.get("name", ""), + "input": args, + }) + + return { + "id": f"msg_{msg_id}", + "type": "message", + "role": "assistant", + "content": content_blocks, + "model": model_name, + "stop_reason": stop_reason, + "stop_sequence": None, + "usage": { + "input_tokens": ollama_resp.get("prompt_eval_count", 0), + "output_tokens": ollama_resp.get("eval_count", 0), + }, + } + + +@app.post("/v1/messages") +async def anthropic_messages(request: Request, db: Session = Depends(get_db)): + ollama_url = crud.get_setting(db, "ollama_url", os.getenv("OLLAMA_URL", "http://localhost:11434")) + body = await request.json() + + force_model = crud.get_setting(db, "force_model") or None + model_name = force_model or os.getenv("ANTHROPIC_DEFAULT_MODEL") or body.get("model") + if not model_name: + raise HTTPException(status_code=422, detail="Field 'model' is required") + + anthropic_msgs = body.get("messages", []) + system = body.get("system") + + system_str = _anthropic_content_to_str(system) if system else "" + all_text = system_str + " ".join(_anthropic_content_to_str(m.get("content")) for m in anthropic_msgs) + prompt_tokens = crud.count_tokens(all_text) + + if not crud.check_and_increment_quota(db, request.state.api_key_id, tokens=prompt_tokens, requests=1): + raise HTTPException(status_code=429, detail="Quota exceeded") + + ollama_messages = _anthropic_messages_to_ollama(anthropic_msgs, system=system_str) + ollama_body: dict = {"model": model_name, "messages": ollama_messages, "stream": body.get("stream", False)} + if tools := body.get("tools"): + ollama_body["tools"] = _anthropic_tools_to_ollama(tools) + + msg_id = secrets.token_hex(12) + target = f"{ollama_url}/api/chat" + + usage_log.info('%s | /v1/messages | %s | ~%d tokens | "%s"', + request.state.api_key_name, model_name, prompt_tokens, _last_user_msg(ollama_messages)) + start = time.monotonic() + + if body.get("stream"): + # Backend wird immer non-streaming aufgerufen; der Dev-Proxy baut SSE selbst auf. + # Das ist nötig, weil vorgelagerte Proxys (z.B. Produktiv-Proxy) /api/chat + # nur non-streaming exponieren. + non_stream_body = {**ollama_body, "stream": False} + + async def generate(): + try: + response = await proxy_request(target, method="POST", json_data=non_stream_body) + ollama_resp = response.json() + except Exception as exc: + error_log.error("Stream error | %s | /v1/messages | %s | %s: %s", + request.state.api_key_name, model_name, type(exc).__name__, exc, exc_info=exc) + raise + + msg = ollama_resp.get("message", {}) + text = msg.get("content", "") + input_tokens = ollama_resp.get("prompt_eval_count", 0) + output_tokens = ollama_resp.get("eval_count", 0) + + yield f"event: message_start\ndata: {json.dumps({'type': 'message_start', 'message': {'id': f'msg_{msg_id}', 'type': 'message', 'role': 'assistant', 'content': [], 'model': model_name, 'stop_reason': None, 'stop_sequence': None, 'usage': {'input_tokens': input_tokens, 'output_tokens': 0}}})}\n\n" + yield f"event: content_block_start\ndata: {json.dumps({'type': 'content_block_start', 'index': 0, 'content_block': {'type': 'text', 'text': ''}})}\n\n" + yield f"event: ping\ndata: {json.dumps({'type': 'ping'})}\n\n" + if text: + yield f"event: content_block_delta\ndata: {json.dumps({'type': 'content_block_delta', 'index': 0, 'delta': {'type': 'text_delta', 'text': text}})}\n\n" + yield f"event: content_block_stop\ndata: {json.dumps({'type': 'content_block_stop', 'index': 0})}\n\n" + yield f"event: message_delta\ndata: {json.dumps({'type': 'message_delta', 'delta': {'stop_reason': 'end_turn', 'stop_sequence': None}, 'usage': {'output_tokens': output_tokens}})}\n\n" + yield f"event: message_stop\ndata: {json.dumps({'type': 'message_stop'})}\n\n" + usage_log.info('%s | /v1/messages | %s | actual ↑%d ↓%d tokens | %.1fs', + request.state.api_key_name, model_name, + input_tokens, output_tokens, + time.monotonic() - start) + + return StreamingResponse( + generate(), + media_type="text/event-stream", + headers={"Cache-Control": "no-cache", "X-Accel-Buffering": "no"}, + ) + + try: + response = await proxy_request(target, method="POST", json_data=ollama_body) + result = _ollama_to_anthropic_response(response.json(), model_name, msg_id) + usage_log.info('%s | /v1/messages | %s | actual ↑%d ↓%d tokens | %.1fs', + request.state.api_key_name, model_name, + result["usage"]["input_tokens"], result["usage"]["output_tokens"], + time.monotonic() - start) + return JSONResponse(content=result, status_code=response.status_code) + except Exception as exc: + error_log.error("Proxy error | %s | /v1/messages | %s | %s: %s", + request.state.api_key_name, model_name, type(exc).__name__, exc, exc_info=exc) + raise + @app.get("/v1/models") async def list_openai_models(db: Session = Depends(get_db)): ollama_url = crud.get_setting(db, "ollama_url", os.getenv("OLLAMA_URL", "http://localhost:11434")) @@ -209,7 +425,7 @@ async def openai_chat_completions(request: Request, db: Session = Depends(get_db async def generate(): try: async with httpx.AsyncClient(timeout=300.0) as client: - async with client.stream("POST", target, json=stream_body) as resp: + async with client.stream("POST", target, json=stream_body, headers=_backend_headers()) as resp: async for chunk in resp.aiter_bytes(): try: for line in chunk.decode("utf-8", errors="ignore").splitlines(): diff --git a/backend/tests/test_anthropic_messages.py b/backend/tests/test_anthropic_messages.py new file mode 100644 index 0000000..3607a04 --- /dev/null +++ b/backend/tests/test_anthropic_messages.py @@ -0,0 +1,272 @@ +import json +import os +from unittest.mock import AsyncMock, MagicMock, patch, call + + +def _make_body(model="llama3", messages=None, stream=False, **kwargs): + body = { + "model": model, + "messages": messages or [{"role": "user", "content": "Hello"}], + "max_tokens": 100, + } + if stream: + body["stream"] = True + body.update(kwargs) + return body + + +def _ollama_chat_response(content="Hi!", input_tokens=5, output_tokens=3): + return { + "model": "llama3", + "message": {"role": "assistant", "content": content}, + "prompt_eval_count": input_tokens, + "eval_count": output_tokens, + "done": True, + } + + +# --- Auth --- + +def test_messages_missing_auth_returns_401(test_client): + response = test_client.post("/v1/messages", json=_make_body()) + assert response.status_code == 401 + + +def test_messages_invalid_key_returns_401(test_client): + response = test_client.post( + "/v1/messages", + headers={"x-api-key": "sk-invalid"}, + json=_make_body(), + ) + assert response.status_code == 401 + + +@patch("main.proxy_request", new_callable=AsyncMock) +def test_messages_accepts_anthropic_auth_token_header(mock_proxy, test_client): + mock_proxy.return_value.status_code = 200 + mock_proxy.return_value.json = lambda: _ollama_chat_response() + response = test_client.post( + "/v1/messages", + headers={"anthropic-auth-token": os.environ.get("TEST_API_KEY", "")}, + json=_make_body(), + ) + assert response.status_code == 200 + + +@patch("main.proxy_request", new_callable=AsyncMock) +def test_messages_accepts_x_api_key_header(mock_proxy, test_client): + mock_proxy.return_value.status_code = 200 + mock_proxy.return_value.json = lambda: _ollama_chat_response() + response = test_client.post( + "/v1/messages", + headers={"x-api-key": os.environ.get("TEST_API_KEY", "")}, + json=_make_body(), + ) + assert response.status_code == 200 + + +# --- Validation --- + +def test_messages_missing_model_returns_422(test_client): + env = {k: v for k, v in os.environ.items() if k != "ANTHROPIC_DEFAULT_MODEL"} + with patch.dict(os.environ, env, clear=True): + response = test_client.post( + "/v1/messages", + headers={"Authorization": f"Bearer {os.environ.get('TEST_API_KEY', '')}"}, + json={"messages": [{"role": "user", "content": "Hi"}], "max_tokens": 100}, + ) + assert response.status_code == 422 + + +@patch("main.proxy_request", new_callable=AsyncMock) +def test_messages_anthropic_default_model_used_when_no_model_in_request(mock_proxy, test_client): + mock_proxy.return_value.status_code = 200 + mock_proxy.return_value.json = lambda: _ollama_chat_response() + with patch.dict(os.environ, {"ANTHROPIC_DEFAULT_MODEL": "qwen3-coder:q8_0"}): + test_client.post( + "/v1/messages", + headers={"Authorization": f"Bearer {os.environ.get('TEST_API_KEY', '')}"}, + json={"messages": [{"role": "user", "content": "Hi"}], "max_tokens": 100}, + ) + sent_body = mock_proxy.call_args[1]["json_data"] + assert sent_body["model"] == "qwen3-coder:q8_0" + + +# --- Quota --- + +def test_messages_quota_exceeded_returns_429(test_client): + with patch("main.crud.check_and_increment_quota", return_value=False): + response = test_client.post( + "/v1/messages", + headers={"Authorization": f"Bearer {os.environ.get('TEST_API_KEY', '')}"}, + json=_make_body(), + ) + assert response.status_code == 429 + + +# --- Response format --- + +@patch("main.proxy_request", new_callable=AsyncMock) +def test_messages_returns_anthropic_format(mock_proxy, test_client): + mock_proxy.return_value.status_code = 200 + mock_proxy.return_value.json = lambda: _ollama_chat_response("Hello!") + response = test_client.post( + "/v1/messages", + headers={"Authorization": f"Bearer {os.environ.get('TEST_API_KEY', '')}"}, + json=_make_body(), + ) + assert response.status_code == 200 + data = response.json() + assert data["type"] == "message" + assert data["role"] == "assistant" + assert isinstance(data["content"], list) + assert data["content"][0]["type"] == "text" + assert data["content"][0]["text"] == "Hello!" + assert data["usage"]["input_tokens"] == 5 + assert data["usage"]["output_tokens"] == 3 + + +# --- Request transformation --- + +@patch("main.proxy_request", new_callable=AsyncMock) +def test_messages_system_prompt_becomes_first_system_message(mock_proxy, test_client): + mock_proxy.return_value.status_code = 200 + mock_proxy.return_value.json = lambda: _ollama_chat_response() + test_client.post( + "/v1/messages", + headers={"Authorization": f"Bearer {os.environ.get('TEST_API_KEY', '')}"}, + json=_make_body(system="You are helpful"), + ) + sent_body = mock_proxy.call_args[1]["json_data"] + assert sent_body["messages"][0]["role"] == "system" + assert sent_body["messages"][0]["content"] == "You are helpful" + + +@patch("main.proxy_request", new_callable=AsyncMock) +def test_messages_tools_transformed_to_ollama_function_format(mock_proxy, test_client): + mock_proxy.return_value.status_code = 200 + mock_proxy.return_value.json = lambda: _ollama_chat_response() + test_client.post( + "/v1/messages", + headers={"Authorization": f"Bearer {os.environ.get('TEST_API_KEY', '')}"}, + json=_make_body(tools=[{ + "name": "bash", + "description": "Run bash", + "input_schema": {"type": "object", "properties": {"command": {"type": "string"}}}, + }]), + ) + sent_body = mock_proxy.call_args[1]["json_data"] + assert sent_body["tools"][0]["type"] == "function" + assert sent_body["tools"][0]["function"]["name"] == "bash" + assert "parameters" in sent_body["tools"][0]["function"] + + +@patch("main.proxy_request", new_callable=AsyncMock) +def test_messages_tool_call_response_transformed_to_anthropic(mock_proxy, test_client): + mock_proxy.return_value.status_code = 200 + mock_proxy.return_value.json = lambda: { + "model": "llama3", + "message": { + "role": "assistant", + "content": "", + "tool_calls": [{"function": {"name": "bash", "arguments": {"command": "ls"}}}], + }, + "prompt_eval_count": 10, + "eval_count": 5, + "done": True, + } + response = test_client.post( + "/v1/messages", + headers={"Authorization": f"Bearer {os.environ.get('TEST_API_KEY', '')}"}, + json=_make_body(), + ) + data = response.json() + assert data["stop_reason"] == "tool_use" + tool_block = next(b for b in data["content"] if b["type"] == "tool_use") + assert tool_block["name"] == "bash" + assert tool_block["input"] == {"command": "ls"} + + +# --- Streaming --- + +@patch("main.proxy_request", new_callable=AsyncMock) +def test_messages_streaming_returns_anthropic_sse_events(mock_proxy, test_client): + mock_proxy.return_value.status_code = 200 + mock_proxy.return_value.json = lambda: { + "model": "llama3", + "message": {"role": "assistant", "content": "Hi!"}, + "prompt_eval_count": 5, + "eval_count": 3, + "done": True, + } + + response = test_client.post( + "/v1/messages", + headers={"Authorization": f"Bearer {os.environ.get('TEST_API_KEY', '')}"}, + json=_make_body(stream=True), + ) + + assert response.status_code == 200 + events = [ + json.loads(line[6:]) + for line in response.text.splitlines() + if line.startswith("data: ") + ] + event_types = [e["type"] for e in events] + assert "message_start" in event_types + assert "content_block_start" in event_types + assert "content_block_delta" in event_types + assert "message_stop" in event_types + + deltas = [e for e in events if e["type"] == "content_block_delta"] + text = "".join(d["delta"]["text"] for d in deltas) + assert text == "Hi!" + + +# --- Backend-Auth (BACKEND_API_KEY) --- + +def test_proxy_request_forwards_backend_api_key(test_client): + with patch("main.httpx.AsyncClient") as mock_cls: + mock_response = MagicMock() + mock_response.status_code = 200 + mock_response.json.return_value = {"result": "ok"} + + mock_instance = AsyncMock() + mock_instance.__aenter__ = AsyncMock(return_value=mock_instance) + mock_instance.__aexit__ = AsyncMock(return_value=False) + mock_instance.request = AsyncMock(return_value=mock_response) + mock_cls.return_value = mock_instance + + with patch.dict(os.environ, {"BACKEND_API_KEY": "sk-backend-secret"}): + test_client.post( + "/api/generate", + headers={"Authorization": f"Bearer {os.environ.get('TEST_API_KEY', '')}"}, + json={"model": "llama3", "prompt": "hi"}, + ) + + _, kwargs = mock_instance.request.call_args + assert kwargs.get("headers", {}).get("Authorization") == "Bearer sk-backend-secret" + + +def test_proxy_request_omits_auth_header_when_no_backend_key(test_client): + with patch("main.httpx.AsyncClient") as mock_cls: + mock_response = MagicMock() + mock_response.status_code = 200 + mock_response.json.return_value = {"result": "ok"} + + mock_instance = AsyncMock() + mock_instance.__aenter__ = AsyncMock(return_value=mock_instance) + mock_instance.__aexit__ = AsyncMock(return_value=False) + mock_instance.request = AsyncMock(return_value=mock_response) + mock_cls.return_value = mock_instance + + env_without_key = {k: v for k, v in os.environ.items() if k != "BACKEND_API_KEY"} + with patch.dict(os.environ, env_without_key, clear=True): + test_client.post( + "/api/generate", + headers={"Authorization": f"Bearer {os.environ.get('TEST_API_KEY', '')}"}, + json={"model": "llama3", "prompt": "hi"}, + ) + + _, kwargs = mock_instance.request.call_args + assert "Authorization" not in kwargs.get("headers", {}) diff --git a/start.sh b/start.sh index 0d785ea..96e557b 100755 --- a/start.sh +++ b/start.sh @@ -1,17 +1,19 @@ #!/bin/bash +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" + # .env laden -if [ -f .env ]; then +if [ -f "$SCRIPT_DIR/.env" ]; then set -a - source .env + source "$SCRIPT_DIR/.env" set +a fi # Virtuelle Umgebung aktivieren falls vorhanden -if [ -f .venv/bin/activate ]; then - source .venv/bin/activate -elif [ -f venv/bin/activate ]; then - source venv/bin/activate +if [ -f "$SCRIPT_DIR/.venv/bin/activate" ]; then + source "$SCRIPT_DIR/.venv/bin/activate" +elif [ -f "$SCRIPT_DIR/venv/bin/activate" ]; then + source "$SCRIPT_DIR/venv/bin/activate" fi if [ -z "$ADMIN_PASSWORD" ]; then diff --git a/start_claude.sh b/start_claude.sh new file mode 100755 index 0000000..2dda880 --- /dev/null +++ b/start_claude.sh @@ -0,0 +1,33 @@ +#!/bin/bash + +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" + +# .env laden +if [ -f "$SCRIPT_DIR/.env" ]; then + set -a + source "$SCRIPT_DIR/.env" + set +a +fi + +# API-Key: erstes Argument hat Vorrang, sonst Umgebungsvariable PROXY_API_KEY +API_KEY="${1:-$PROXY_API_KEY}" + +if [ -z "$API_KEY" ]; then + echo "Fehler: Kein API-Key angegeben." + echo "Verwendung: ./start_claude.sh sk-dein-key" + echo " oder: PROXY_API_KEY=sk-dein-key ./start_claude.sh" + exit 1 +fi + +# 0.0.0.0 ist eine Bind-Adresse, kein gültiger Client-Host +PROXY_HOST="${PROXY_HOST:-0.0.0.0}" +PROXY_PORT="${PROXY_PORT:-8000}" +if [ "$PROXY_HOST" = "0.0.0.0" ]; then + PROXY_HOST="localhost" +fi + +export ANTHROPIC_BASE_URL="http://${PROXY_HOST}:${PROXY_PORT}" +export ANTHROPIC_AUTH_TOKEN="$API_KEY" + +echo "Verbinde mit Proxy: $ANTHROPIC_BASE_URL" +exec claude