diff --git a/backend/admin.py b/backend/admin.py index ae72736..27ba7ff 100644 --- a/backend/admin.py +++ b/backend/admin.py @@ -162,9 +162,9 @@ async def get_ollama_models( async with httpx.AsyncClient(timeout=5.0) as client: response = await client.get(f"{ollama_url}/api/tags") models = [m["name"] for m in response.json().get("models", [])] + return {"models": models, "reachable": True} except Exception: - models = [] - return {"models": models} + return {"models": [], "reachable": False} # Statisches Frontend ausliefern (nur im Produktivbetrieb, wenn dist/ existiert) _dist = Path(__file__).parent.parent / "frontend" / "dist" diff --git a/backend/main.py b/backend/main.py index de5e139..331fcba 100644 --- a/backend/main.py +++ b/backend/main.py @@ -10,20 +10,40 @@ from database import get_db import crud import httpx +_log_dir = Path(os.getenv("LOG_FILE", "logs/usage.log")).parent +_log_dir.mkdir(parents=True, exist_ok=True) +_fmt = logging.Formatter("%(asctime)s | %(message)s", datefmt="%Y-%m-%d %H:%M:%S") + # Rotating usage log (8 KB per file, 3 backups) -_log_path = Path(os.getenv("LOG_FILE", "logs/usage.log")) -_log_path.parent.mkdir(parents=True, exist_ok=True) -_handler = RotatingFileHandler(str(_log_path), maxBytes=8192, backupCount=3, encoding="utf-8") -_handler.setFormatter(logging.Formatter("%(asctime)s | %(message)s", datefmt="%Y-%m-%d %H:%M:%S")) +_usage_handler = RotatingFileHandler(str(_log_dir / "usage.log"), maxBytes=8192, backupCount=3, encoding="utf-8") +_usage_handler.setFormatter(_fmt) usage_log = logging.getLogger("proxy.usage") usage_log.setLevel(logging.INFO) -usage_log.addHandler(_handler) +usage_log.addHandler(_usage_handler) usage_log.propagate = False +# Rotating error log (64 KB per file, 5 backups) +_error_handler = RotatingFileHandler(str(_log_dir / "error.log"), maxBytes=65536, backupCount=5, encoding="utf-8") +_error_handler.setFormatter(_fmt) +error_log = logging.getLogger("proxy.error") +error_log.setLevel(logging.ERROR) +error_log.addHandler(_error_handler) +error_log.propagate = False + +def _content_to_str(content) -> str: + """Normalize OpenAI content: string or array of content parts → plain string.""" + if isinstance(content, list): + return " ".join( + part.get("text", "") if isinstance(part, dict) else str(part) + for part in content + ) + return content or "" + + def _last_user_msg(messages: list, max_len: int = 120) -> str: for msg in reversed(messages): if msg.get("role") == "user": - text = (msg.get("content") or "").replace("\n", " ").strip() + text = _content_to_str(msg.get("content")).replace("\n", " ").strip() return text[:max_len] + ("…" if len(text) > max_len else "") return "" @@ -43,6 +63,25 @@ async def require_api_key(request: Request, db: Session = Depends(get_db)): app = FastAPI(title="Ollama Proxy", dependencies=[Depends(require_api_key)]) +@app.on_event("startup") +def apply_env_settings(): + """Write env-configured values into DB so they take effect until next restart.""" + db = next(get_db()) + try: + if url := os.getenv("OLLAMA_URL"): + crud.set_setting(db, "ollama_url", url) + if model := os.getenv("DEFAULT_MODEL"): + crud.set_setting(db, "default_model", model) + db.commit() + finally: + db.close() + +@app.exception_handler(Exception) +async def unhandled_exception_handler(request: Request, exc: Exception): + error_log.error("Unhandled exception | %s %s | %s: %s", + request.method, request.url.path, type(exc).__name__, exc, exc_info=exc) + return JSONResponse(status_code=500, content={"error": {"message": "Internal server error", "type": "server_error"}}) + async def proxy_request(url: str, method: str = "GET", json_data: dict = None): async with httpx.AsyncClient(timeout=300.0) as client: response = await client.request(method=method, url=url, json=json_data) @@ -60,23 +99,33 @@ async def generate(request: Request, db: Session = Depends(get_db)): prompt_preview = (body.get("prompt", "").replace("\n", " ").strip())[:120] usage_log.info('%s | /api/generate | %s | ~%d tokens | "%s"', request.state.api_key_name, body.get("model", "?"), prompt_tokens, prompt_preview) - response = await proxy_request(f"{ollama_url}/api/generate", method="POST", json_data=body) - return JSONResponse(content=response.json(), status_code=response.status_code) + try: + response = await proxy_request(f"{ollama_url}/api/generate", method="POST", json_data=body) + return JSONResponse(content=response.json(), status_code=response.status_code) + except Exception as exc: + error_log.error("Proxy error | %s | /api/generate | %s | %s: %s", + request.state.api_key_name, body.get("model", "?"), type(exc).__name__, exc, exc_info=exc) + raise @app.post("/api/chat") async def chat(request: Request, db: Session = Depends(get_db)): ollama_url = crud.get_setting(db, "ollama_url", os.getenv("OLLAMA_URL", "http://localhost:11434")) body = await request.json() messages = body.get("messages", []) - prompt_tokens = sum(crud.count_tokens(msg.get("content") or "") for msg in messages) + prompt_tokens = sum(crud.count_tokens(_content_to_str(msg.get("content"))) for msg in messages) if not crud.check_and_increment_quota(db, request.state.api_key_id, tokens=prompt_tokens, requests=1): raise HTTPException(status_code=429, detail="Quota exceeded") usage_log.info('%s | /api/chat | %s | ~%d tokens | "%s"', request.state.api_key_name, body.get("model", "?"), prompt_tokens, _last_user_msg(messages)) - response = await proxy_request(f"{ollama_url}/api/chat", method="POST", json_data=body) - return JSONResponse(content=response.json(), status_code=response.status_code) + try: + response = await proxy_request(f"{ollama_url}/api/chat", method="POST", json_data=body) + return JSONResponse(content=response.json(), status_code=response.status_code) + except Exception as exc: + error_log.error("Proxy error | %s | /api/chat | %s | %s: %s", + request.state.api_key_name, body.get("model", "?"), type(exc).__name__, exc, exc_info=exc) + raise @app.get("/api/tags") async def list_models(db: Session = Depends(get_db)): @@ -103,7 +152,7 @@ async def openai_chat_completions(request: Request, db: Session = Depends(get_db body = await request.json() messages = body.get("messages", []) - prompt_tokens = sum(crud.count_tokens(msg.get("content") or "") for msg in messages) + prompt_tokens = sum(crud.count_tokens(_content_to_str(msg.get("content"))) for msg in messages) if not crud.check_and_increment_quota(db, request.state.api_key_id, tokens=prompt_tokens, requests=1): raise HTTPException(status_code=429, detail="Quota exceeded") @@ -119,15 +168,25 @@ async def openai_chat_completions(request: Request, db: Session = Depends(get_db if body.get("stream"): async def generate(): - async with httpx.AsyncClient(timeout=300.0) as client: - async with client.stream("POST", target, json=body) as resp: - async for chunk in resp.aiter_bytes(): - yield chunk + try: + async with httpx.AsyncClient(timeout=300.0) as client: + async with client.stream("POST", target, json=body) as resp: + async for chunk in resp.aiter_bytes(): + yield chunk + except Exception as exc: + error_log.error("Stream error | %s | /v1/chat/completions | %s | %s: %s", + request.state.api_key_name, model_name, type(exc).__name__, exc, exc_info=exc) + raise return StreamingResponse( generate(), media_type="text/event-stream", headers={"Cache-Control": "no-cache", "X-Accel-Buffering": "no"}, ) - response = await proxy_request(target, method="POST", json_data=body) - return JSONResponse(content=response.json(), status_code=response.status_code) + try: + response = await proxy_request(target, method="POST", json_data=body) + return JSONResponse(content=response.json(), status_code=response.status_code) + except Exception as exc: + error_log.error("Proxy error | %s | /v1/chat/completions | %s | %s: %s", + request.state.api_key_name, model_name, type(exc).__name__, exc, exc_info=exc) + raise diff --git a/docker-compose.yml b/docker-compose.yml index 17eba9a..d74e815 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -6,6 +6,8 @@ services: ports: - "${PROXY_PORT:-8000}:${PROXY_PORT:-8000}" - "127.0.0.1:8001:8001" + extra_hosts: + - "host.docker.internal:host-gateway" volumes: - ./backend/test.db:/app/backend/test.db - ./backend/logs:/app/backend/logs diff --git a/frontend/src/main.jsx b/frontend/src/main.jsx index 0b66f63..84be61e 100644 --- a/frontend/src/main.jsx +++ b/frontend/src/main.jsx @@ -80,6 +80,7 @@ function SettingsSection({ password }) { const [settings, setSettings] = useState(null); const [availableModels, setAvailableModels] = useState([]); const [modelsLoading, setModelsLoading] = useState(false); + const [ollamaReachable, setOllamaReachable] = useState(true); const [proxyEndpoint, setProxyEndpoint] = useState(null); const [saved, setSaved] = useState(false); const [error, setError] = useState(null); @@ -91,12 +92,14 @@ function SettingsSection({ password }) { headers: authHeaders(password), params: url ? { url } : {}, }); - const models = res.data.models; + const { models, reachable } = res.data; + setOllamaReachable(reachable); setAvailableModels(models); if (models.length > 0 && !models.includes(currentModel)) { setSettings(s => ({ ...s, default_model: models[0] })); } } catch { + setOllamaReachable(false); setAvailableModels([]); } finally { setModelsLoading(false); @@ -144,14 +147,19 @@ function SettingsSection({ password }) {
- setSettings({ ...settings, ollama_url: e.target.value })} - onBlur={(e) => fetchModels(e.target.value, settings.default_model)} - placeholder="http://localhost:11434" - required - /> +
+ setSettings({ ...settings, ollama_url: e.target.value })} + onBlur={(e) => fetchModels(e.target.value, settings.default_model)} + placeholder="http://localhost:11434" + required + /> + {!ollamaReachable && !modelsLoading && ( +
⚠ Ollama nicht erreichbar unter {settings.ollama_url}
+ )} +
diff --git a/frontend/src/styles.css b/frontend/src/styles.css index 0c76504..85a26c8 100644 --- a/frontend/src/styles.css +++ b/frontend/src/styles.css @@ -194,6 +194,31 @@ tr:hover { font-size: 14px; } +.settings-input-wrap { + flex: 1; + display: flex; + flex-direction: column; + gap: 4px; +} + +.settings-input-wrap input { + width: 100%; + padding: 8px 10px; + border: 1px solid #ccc; + border-radius: 4px; + font-size: 14px; + box-sizing: border-box; +} + +.warning { + color: #b8520a; + background: #fff3e0; + border: 1px solid #e67e22; + border-radius: 4px; + padding: 6px 10px; + font-size: 13px; +} + .settings-form button { align-self: flex-start; padding: 8px 20px;