You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
53 lines
1.4 KiB
53 lines
1.4 KiB
"""
|
|
routers/chat.py — /chat endpoint with SSE streaming.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
from fastapi import APIRouter, Depends
|
|
from fastapi.responses import StreamingResponse
|
|
|
|
from core.database import get_pool
|
|
from core.settings import Settings
|
|
from models.requests import ChatRequest
|
|
from services.chat import stream_chat
|
|
from services.embedder import EmbedService
|
|
from services.retriever import hybrid_search
|
|
|
|
router = APIRouter(prefix='/chat', tags=['chat'])
|
|
|
|
|
|
def _get_settings() -> Settings:
|
|
from main import app_settings
|
|
return app_settings
|
|
|
|
|
|
@router.post('')
|
|
async def chat(req: ChatRequest, settings: Settings = Depends(_get_settings)):
|
|
pool = await get_pool()
|
|
embedder = EmbedService(settings.ollama_url, settings.embedding_model)
|
|
embedding = await embedder.embed(req.message)
|
|
|
|
async with pool.acquire() as conn:
|
|
context_chunks, _ = await hybrid_search(
|
|
conn=conn,
|
|
query=req.message,
|
|
embedding=embedding,
|
|
limit=req.context_limit,
|
|
threshold=settings.search_threshold,
|
|
)
|
|
|
|
return StreamingResponse(
|
|
stream_chat(
|
|
message=req.message,
|
|
context_chunks=context_chunks,
|
|
ollama_url=settings.ollama_url,
|
|
model=settings.chat_model,
|
|
),
|
|
media_type='text/event-stream',
|
|
headers={
|
|
'Cache-Control': 'no-cache',
|
|
'X-Accel-Buffering': 'no',
|
|
},
|
|
)
|