Fix OpenAI array content, add error logging, Ollama reachability warning
- Normalize OpenAI array-format content to string to fix connection reset - Add error.log with rotating handler for proxy and stream errors - Add global unhandled exception handler returning JSON 500 - Write OLLAMA_URL/DEFAULT_MODEL env vars to DB on startup (reset on restart) - Add extra_hosts to docker-compose.yml for host.docker.internal on Linux - Show warning in admin UI when Ollama URL is unreachable - Return reachable: true/false from /api/ollama-models endpoint
This commit is contained in:
parent
280b3b0762
commit
8d3f9a7661
@ -162,9 +162,9 @@ async def get_ollama_models(
|
|||||||
async with httpx.AsyncClient(timeout=5.0) as client:
|
async with httpx.AsyncClient(timeout=5.0) as client:
|
||||||
response = await client.get(f"{ollama_url}/api/tags")
|
response = await client.get(f"{ollama_url}/api/tags")
|
||||||
models = [m["name"] for m in response.json().get("models", [])]
|
models = [m["name"] for m in response.json().get("models", [])]
|
||||||
|
return {"models": models, "reachable": True}
|
||||||
except Exception:
|
except Exception:
|
||||||
models = []
|
return {"models": [], "reachable": False}
|
||||||
return {"models": models}
|
|
||||||
|
|
||||||
# Statisches Frontend ausliefern (nur im Produktivbetrieb, wenn dist/ existiert)
|
# Statisches Frontend ausliefern (nur im Produktivbetrieb, wenn dist/ existiert)
|
||||||
_dist = Path(__file__).parent.parent / "frontend" / "dist"
|
_dist = Path(__file__).parent.parent / "frontend" / "dist"
|
||||||
|
|||||||
@ -10,20 +10,40 @@ from database import get_db
|
|||||||
import crud
|
import crud
|
||||||
import httpx
|
import httpx
|
||||||
|
|
||||||
|
_log_dir = Path(os.getenv("LOG_FILE", "logs/usage.log")).parent
|
||||||
|
_log_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
_fmt = logging.Formatter("%(asctime)s | %(message)s", datefmt="%Y-%m-%d %H:%M:%S")
|
||||||
|
|
||||||
# Rotating usage log (8 KB per file, 3 backups)
|
# Rotating usage log (8 KB per file, 3 backups)
|
||||||
_log_path = Path(os.getenv("LOG_FILE", "logs/usage.log"))
|
_usage_handler = RotatingFileHandler(str(_log_dir / "usage.log"), maxBytes=8192, backupCount=3, encoding="utf-8")
|
||||||
_log_path.parent.mkdir(parents=True, exist_ok=True)
|
_usage_handler.setFormatter(_fmt)
|
||||||
_handler = RotatingFileHandler(str(_log_path), maxBytes=8192, backupCount=3, encoding="utf-8")
|
|
||||||
_handler.setFormatter(logging.Formatter("%(asctime)s | %(message)s", datefmt="%Y-%m-%d %H:%M:%S"))
|
|
||||||
usage_log = logging.getLogger("proxy.usage")
|
usage_log = logging.getLogger("proxy.usage")
|
||||||
usage_log.setLevel(logging.INFO)
|
usage_log.setLevel(logging.INFO)
|
||||||
usage_log.addHandler(_handler)
|
usage_log.addHandler(_usage_handler)
|
||||||
usage_log.propagate = False
|
usage_log.propagate = False
|
||||||
|
|
||||||
|
# Rotating error log (64 KB per file, 5 backups)
|
||||||
|
_error_handler = RotatingFileHandler(str(_log_dir / "error.log"), maxBytes=65536, backupCount=5, encoding="utf-8")
|
||||||
|
_error_handler.setFormatter(_fmt)
|
||||||
|
error_log = logging.getLogger("proxy.error")
|
||||||
|
error_log.setLevel(logging.ERROR)
|
||||||
|
error_log.addHandler(_error_handler)
|
||||||
|
error_log.propagate = False
|
||||||
|
|
||||||
|
def _content_to_str(content) -> str:
|
||||||
|
"""Normalize OpenAI content: string or array of content parts → plain string."""
|
||||||
|
if isinstance(content, list):
|
||||||
|
return " ".join(
|
||||||
|
part.get("text", "") if isinstance(part, dict) else str(part)
|
||||||
|
for part in content
|
||||||
|
)
|
||||||
|
return content or ""
|
||||||
|
|
||||||
|
|
||||||
def _last_user_msg(messages: list, max_len: int = 120) -> str:
|
def _last_user_msg(messages: list, max_len: int = 120) -> str:
|
||||||
for msg in reversed(messages):
|
for msg in reversed(messages):
|
||||||
if msg.get("role") == "user":
|
if msg.get("role") == "user":
|
||||||
text = (msg.get("content") or "").replace("\n", " ").strip()
|
text = _content_to_str(msg.get("content")).replace("\n", " ").strip()
|
||||||
return text[:max_len] + ("…" if len(text) > max_len else "")
|
return text[:max_len] + ("…" if len(text) > max_len else "")
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
@ -43,6 +63,25 @@ async def require_api_key(request: Request, db: Session = Depends(get_db)):
|
|||||||
|
|
||||||
app = FastAPI(title="Ollama Proxy", dependencies=[Depends(require_api_key)])
|
app = FastAPI(title="Ollama Proxy", dependencies=[Depends(require_api_key)])
|
||||||
|
|
||||||
|
@app.on_event("startup")
|
||||||
|
def apply_env_settings():
|
||||||
|
"""Write env-configured values into DB so they take effect until next restart."""
|
||||||
|
db = next(get_db())
|
||||||
|
try:
|
||||||
|
if url := os.getenv("OLLAMA_URL"):
|
||||||
|
crud.set_setting(db, "ollama_url", url)
|
||||||
|
if model := os.getenv("DEFAULT_MODEL"):
|
||||||
|
crud.set_setting(db, "default_model", model)
|
||||||
|
db.commit()
|
||||||
|
finally:
|
||||||
|
db.close()
|
||||||
|
|
||||||
|
@app.exception_handler(Exception)
|
||||||
|
async def unhandled_exception_handler(request: Request, exc: Exception):
|
||||||
|
error_log.error("Unhandled exception | %s %s | %s: %s",
|
||||||
|
request.method, request.url.path, type(exc).__name__, exc, exc_info=exc)
|
||||||
|
return JSONResponse(status_code=500, content={"error": {"message": "Internal server error", "type": "server_error"}})
|
||||||
|
|
||||||
async def proxy_request(url: str, method: str = "GET", json_data: dict = None):
|
async def proxy_request(url: str, method: str = "GET", json_data: dict = None):
|
||||||
async with httpx.AsyncClient(timeout=300.0) as client:
|
async with httpx.AsyncClient(timeout=300.0) as client:
|
||||||
response = await client.request(method=method, url=url, json=json_data)
|
response = await client.request(method=method, url=url, json=json_data)
|
||||||
@ -60,23 +99,33 @@ async def generate(request: Request, db: Session = Depends(get_db)):
|
|||||||
prompt_preview = (body.get("prompt", "").replace("\n", " ").strip())[:120]
|
prompt_preview = (body.get("prompt", "").replace("\n", " ").strip())[:120]
|
||||||
usage_log.info('%s | /api/generate | %s | ~%d tokens | "%s"',
|
usage_log.info('%s | /api/generate | %s | ~%d tokens | "%s"',
|
||||||
request.state.api_key_name, body.get("model", "?"), prompt_tokens, prompt_preview)
|
request.state.api_key_name, body.get("model", "?"), prompt_tokens, prompt_preview)
|
||||||
|
try:
|
||||||
response = await proxy_request(f"{ollama_url}/api/generate", method="POST", json_data=body)
|
response = await proxy_request(f"{ollama_url}/api/generate", method="POST", json_data=body)
|
||||||
return JSONResponse(content=response.json(), status_code=response.status_code)
|
return JSONResponse(content=response.json(), status_code=response.status_code)
|
||||||
|
except Exception as exc:
|
||||||
|
error_log.error("Proxy error | %s | /api/generate | %s | %s: %s",
|
||||||
|
request.state.api_key_name, body.get("model", "?"), type(exc).__name__, exc, exc_info=exc)
|
||||||
|
raise
|
||||||
|
|
||||||
@app.post("/api/chat")
|
@app.post("/api/chat")
|
||||||
async def chat(request: Request, db: Session = Depends(get_db)):
|
async def chat(request: Request, db: Session = Depends(get_db)):
|
||||||
ollama_url = crud.get_setting(db, "ollama_url", os.getenv("OLLAMA_URL", "http://localhost:11434"))
|
ollama_url = crud.get_setting(db, "ollama_url", os.getenv("OLLAMA_URL", "http://localhost:11434"))
|
||||||
body = await request.json()
|
body = await request.json()
|
||||||
messages = body.get("messages", [])
|
messages = body.get("messages", [])
|
||||||
prompt_tokens = sum(crud.count_tokens(msg.get("content") or "") for msg in messages)
|
prompt_tokens = sum(crud.count_tokens(_content_to_str(msg.get("content"))) for msg in messages)
|
||||||
|
|
||||||
if not crud.check_and_increment_quota(db, request.state.api_key_id, tokens=prompt_tokens, requests=1):
|
if not crud.check_and_increment_quota(db, request.state.api_key_id, tokens=prompt_tokens, requests=1):
|
||||||
raise HTTPException(status_code=429, detail="Quota exceeded")
|
raise HTTPException(status_code=429, detail="Quota exceeded")
|
||||||
|
|
||||||
usage_log.info('%s | /api/chat | %s | ~%d tokens | "%s"',
|
usage_log.info('%s | /api/chat | %s | ~%d tokens | "%s"',
|
||||||
request.state.api_key_name, body.get("model", "?"), prompt_tokens, _last_user_msg(messages))
|
request.state.api_key_name, body.get("model", "?"), prompt_tokens, _last_user_msg(messages))
|
||||||
|
try:
|
||||||
response = await proxy_request(f"{ollama_url}/api/chat", method="POST", json_data=body)
|
response = await proxy_request(f"{ollama_url}/api/chat", method="POST", json_data=body)
|
||||||
return JSONResponse(content=response.json(), status_code=response.status_code)
|
return JSONResponse(content=response.json(), status_code=response.status_code)
|
||||||
|
except Exception as exc:
|
||||||
|
error_log.error("Proxy error | %s | /api/chat | %s | %s: %s",
|
||||||
|
request.state.api_key_name, body.get("model", "?"), type(exc).__name__, exc, exc_info=exc)
|
||||||
|
raise
|
||||||
|
|
||||||
@app.get("/api/tags")
|
@app.get("/api/tags")
|
||||||
async def list_models(db: Session = Depends(get_db)):
|
async def list_models(db: Session = Depends(get_db)):
|
||||||
@ -103,7 +152,7 @@ async def openai_chat_completions(request: Request, db: Session = Depends(get_db
|
|||||||
|
|
||||||
body = await request.json()
|
body = await request.json()
|
||||||
messages = body.get("messages", [])
|
messages = body.get("messages", [])
|
||||||
prompt_tokens = sum(crud.count_tokens(msg.get("content") or "") for msg in messages)
|
prompt_tokens = sum(crud.count_tokens(_content_to_str(msg.get("content"))) for msg in messages)
|
||||||
|
|
||||||
if not crud.check_and_increment_quota(db, request.state.api_key_id, tokens=prompt_tokens, requests=1):
|
if not crud.check_and_increment_quota(db, request.state.api_key_id, tokens=prompt_tokens, requests=1):
|
||||||
raise HTTPException(status_code=429, detail="Quota exceeded")
|
raise HTTPException(status_code=429, detail="Quota exceeded")
|
||||||
@ -119,15 +168,25 @@ async def openai_chat_completions(request: Request, db: Session = Depends(get_db
|
|||||||
|
|
||||||
if body.get("stream"):
|
if body.get("stream"):
|
||||||
async def generate():
|
async def generate():
|
||||||
|
try:
|
||||||
async with httpx.AsyncClient(timeout=300.0) as client:
|
async with httpx.AsyncClient(timeout=300.0) as client:
|
||||||
async with client.stream("POST", target, json=body) as resp:
|
async with client.stream("POST", target, json=body) as resp:
|
||||||
async for chunk in resp.aiter_bytes():
|
async for chunk in resp.aiter_bytes():
|
||||||
yield chunk
|
yield chunk
|
||||||
|
except Exception as exc:
|
||||||
|
error_log.error("Stream error | %s | /v1/chat/completions | %s | %s: %s",
|
||||||
|
request.state.api_key_name, model_name, type(exc).__name__, exc, exc_info=exc)
|
||||||
|
raise
|
||||||
return StreamingResponse(
|
return StreamingResponse(
|
||||||
generate(),
|
generate(),
|
||||||
media_type="text/event-stream",
|
media_type="text/event-stream",
|
||||||
headers={"Cache-Control": "no-cache", "X-Accel-Buffering": "no"},
|
headers={"Cache-Control": "no-cache", "X-Accel-Buffering": "no"},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
response = await proxy_request(target, method="POST", json_data=body)
|
response = await proxy_request(target, method="POST", json_data=body)
|
||||||
return JSONResponse(content=response.json(), status_code=response.status_code)
|
return JSONResponse(content=response.json(), status_code=response.status_code)
|
||||||
|
except Exception as exc:
|
||||||
|
error_log.error("Proxy error | %s | /v1/chat/completions | %s | %s: %s",
|
||||||
|
request.state.api_key_name, model_name, type(exc).__name__, exc, exc_info=exc)
|
||||||
|
raise
|
||||||
|
|||||||
@ -6,6 +6,8 @@ services:
|
|||||||
ports:
|
ports:
|
||||||
- "${PROXY_PORT:-8000}:${PROXY_PORT:-8000}"
|
- "${PROXY_PORT:-8000}:${PROXY_PORT:-8000}"
|
||||||
- "127.0.0.1:8001:8001"
|
- "127.0.0.1:8001:8001"
|
||||||
|
extra_hosts:
|
||||||
|
- "host.docker.internal:host-gateway"
|
||||||
volumes:
|
volumes:
|
||||||
- ./backend/test.db:/app/backend/test.db
|
- ./backend/test.db:/app/backend/test.db
|
||||||
- ./backend/logs:/app/backend/logs
|
- ./backend/logs:/app/backend/logs
|
||||||
|
|||||||
@ -80,6 +80,7 @@ function SettingsSection({ password }) {
|
|||||||
const [settings, setSettings] = useState(null);
|
const [settings, setSettings] = useState(null);
|
||||||
const [availableModels, setAvailableModels] = useState([]);
|
const [availableModels, setAvailableModels] = useState([]);
|
||||||
const [modelsLoading, setModelsLoading] = useState(false);
|
const [modelsLoading, setModelsLoading] = useState(false);
|
||||||
|
const [ollamaReachable, setOllamaReachable] = useState(true);
|
||||||
const [proxyEndpoint, setProxyEndpoint] = useState(null);
|
const [proxyEndpoint, setProxyEndpoint] = useState(null);
|
||||||
const [saved, setSaved] = useState(false);
|
const [saved, setSaved] = useState(false);
|
||||||
const [error, setError] = useState(null);
|
const [error, setError] = useState(null);
|
||||||
@ -91,12 +92,14 @@ function SettingsSection({ password }) {
|
|||||||
headers: authHeaders(password),
|
headers: authHeaders(password),
|
||||||
params: url ? { url } : {},
|
params: url ? { url } : {},
|
||||||
});
|
});
|
||||||
const models = res.data.models;
|
const { models, reachable } = res.data;
|
||||||
|
setOllamaReachable(reachable);
|
||||||
setAvailableModels(models);
|
setAvailableModels(models);
|
||||||
if (models.length > 0 && !models.includes(currentModel)) {
|
if (models.length > 0 && !models.includes(currentModel)) {
|
||||||
setSettings(s => ({ ...s, default_model: models[0] }));
|
setSettings(s => ({ ...s, default_model: models[0] }));
|
||||||
}
|
}
|
||||||
} catch {
|
} catch {
|
||||||
|
setOllamaReachable(false);
|
||||||
setAvailableModels([]);
|
setAvailableModels([]);
|
||||||
} finally {
|
} finally {
|
||||||
setModelsLoading(false);
|
setModelsLoading(false);
|
||||||
@ -144,6 +147,7 @@ function SettingsSection({ password }) {
|
|||||||
</div>
|
</div>
|
||||||
<div className="settings-row">
|
<div className="settings-row">
|
||||||
<label>Ollama-Endpunkt</label>
|
<label>Ollama-Endpunkt</label>
|
||||||
|
<div className="settings-input-wrap">
|
||||||
<input
|
<input
|
||||||
type="url"
|
type="url"
|
||||||
value={settings.ollama_url}
|
value={settings.ollama_url}
|
||||||
@ -152,6 +156,10 @@ function SettingsSection({ password }) {
|
|||||||
placeholder="http://localhost:11434"
|
placeholder="http://localhost:11434"
|
||||||
required
|
required
|
||||||
/>
|
/>
|
||||||
|
{!ollamaReachable && !modelsLoading && (
|
||||||
|
<div className="warning">⚠ Ollama nicht erreichbar unter {settings.ollama_url}</div>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
</div>
|
</div>
|
||||||
<div className="settings-row">
|
<div className="settings-row">
|
||||||
<label>Standard-Modell</label>
|
<label>Standard-Modell</label>
|
||||||
|
|||||||
@ -194,6 +194,31 @@ tr:hover {
|
|||||||
font-size: 14px;
|
font-size: 14px;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
.settings-input-wrap {
|
||||||
|
flex: 1;
|
||||||
|
display: flex;
|
||||||
|
flex-direction: column;
|
||||||
|
gap: 4px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.settings-input-wrap input {
|
||||||
|
width: 100%;
|
||||||
|
padding: 8px 10px;
|
||||||
|
border: 1px solid #ccc;
|
||||||
|
border-radius: 4px;
|
||||||
|
font-size: 14px;
|
||||||
|
box-sizing: border-box;
|
||||||
|
}
|
||||||
|
|
||||||
|
.warning {
|
||||||
|
color: #b8520a;
|
||||||
|
background: #fff3e0;
|
||||||
|
border: 1px solid #e67e22;
|
||||||
|
border-radius: 4px;
|
||||||
|
padding: 6px 10px;
|
||||||
|
font-size: 13px;
|
||||||
|
}
|
||||||
|
|
||||||
.settings-form button {
|
.settings-form button {
|
||||||
align-self: flex-start;
|
align-self: flex-start;
|
||||||
padding: 8px 20px;
|
padding: 8px 20px;
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user