diff --git a/services/rag-api/services/chat.py b/services/rag-api/services/chat.py index b5775f1..801bae8 100644 --- a/services/rag-api/services/chat.py +++ b/services/rag-api/services/chat.py @@ -59,6 +59,8 @@ Answer based on the above context:""" {'role': 'user', 'content': prompt}, ], } + + logger.info(f"Chat request to {url} with model {model}") # Yield sources first sources = [ @@ -68,20 +70,25 @@ Answer based on the above context:""" yield f'data: {json.dumps({"type": "sources", "sources": sources})}\n\n' # Stream tokens - async with httpx.AsyncClient(timeout=120.0) as client: - async with client.stream('POST', url, json=payload) as resp: - resp.raise_for_status() - async for line in resp.aiter_lines(): - if not line.strip(): - continue - try: - chunk_data = json.loads(line) - token = chunk_data.get('message', {}).get('content', '') - if token: - yield f'data: {json.dumps({"type": "token", "token": token})}\n\n' - if chunk_data.get('done', False): - break - except json.JSONDecodeError: - continue + try: + async with httpx.AsyncClient(timeout=180.0) as client: + async with client.stream('POST', url, json=payload) as resp: + logger.info(f"Ollama response status: {resp.status_code}") + resp.raise_for_status() + async for line in resp.aiter_lines(): + if not line.strip(): + continue + try: + chunk_data = json.loads(line) + token = chunk_data.get('message', {}).get('content', '') + if token: + yield f'data: {json.dumps({"type": "token", "token": token})}\n\n' + if chunk_data.get('done', False): + break + except json.JSONDecodeError: + continue + except Exception as e: + logger.error(f"Chat stream error: {e}") + yield f'data: {json.dumps({"type": "error", "error": str(e)})}\n\n' yield f'data: {json.dumps({"type": "done"})}\n\n'