|
|
|
|
@ -59,6 +59,8 @@ Answer based on the above context:"""
|
|
|
|
|
{'role': 'user', 'content': prompt},
|
|
|
|
|
],
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
logger.info(f"Chat request to {url} with model {model}")
|
|
|
|
|
|
|
|
|
|
# Yield sources first
|
|
|
|
|
sources = [
|
|
|
|
|
@ -68,20 +70,25 @@ Answer based on the above context:"""
|
|
|
|
|
yield f'data: {json.dumps({"type": "sources", "sources": sources})}\n\n'
|
|
|
|
|
|
|
|
|
|
# Stream tokens
|
|
|
|
|
async with httpx.AsyncClient(timeout=120.0) as client:
|
|
|
|
|
async with client.stream('POST', url, json=payload) as resp:
|
|
|
|
|
resp.raise_for_status()
|
|
|
|
|
async for line in resp.aiter_lines():
|
|
|
|
|
if not line.strip():
|
|
|
|
|
continue
|
|
|
|
|
try:
|
|
|
|
|
chunk_data = json.loads(line)
|
|
|
|
|
token = chunk_data.get('message', {}).get('content', '')
|
|
|
|
|
if token:
|
|
|
|
|
yield f'data: {json.dumps({"type": "token", "token": token})}\n\n'
|
|
|
|
|
if chunk_data.get('done', False):
|
|
|
|
|
break
|
|
|
|
|
except json.JSONDecodeError:
|
|
|
|
|
continue
|
|
|
|
|
try:
|
|
|
|
|
async with httpx.AsyncClient(timeout=180.0) as client:
|
|
|
|
|
async with client.stream('POST', url, json=payload) as resp:
|
|
|
|
|
logger.info(f"Ollama response status: {resp.status_code}")
|
|
|
|
|
resp.raise_for_status()
|
|
|
|
|
async for line in resp.aiter_lines():
|
|
|
|
|
if not line.strip():
|
|
|
|
|
continue
|
|
|
|
|
try:
|
|
|
|
|
chunk_data = json.loads(line)
|
|
|
|
|
token = chunk_data.get('message', {}).get('content', '')
|
|
|
|
|
if token:
|
|
|
|
|
yield f'data: {json.dumps({"type": "token", "token": token})}\n\n'
|
|
|
|
|
if chunk_data.get('done', False):
|
|
|
|
|
break
|
|
|
|
|
except json.JSONDecodeError:
|
|
|
|
|
continue
|
|
|
|
|
except Exception as e:
|
|
|
|
|
logger.error(f"Chat stream error: {e}")
|
|
|
|
|
yield f'data: {json.dumps({"type": "error", "error": str(e)})}\n\n'
|
|
|
|
|
|
|
|
|
|
yield f'data: {json.dumps({"type": "done"})}\n\n'
|
|
|
|
|
|