second-brain/services/rag-api/routers/chat.py

"""
routers/chat.py — /chat endpoint with SSE streaming.
"""

from __future__ import annotations

from fastapi import APIRouter, Depends
from fastapi.responses import StreamingResponse

from core.database import get_pool
from core.settings import Settings
from models.requests import ChatRequest
from services.chat import stream_chat
from services.embedder import EmbedService
from services.retriever import hybrid_search

router = APIRouter(prefix='/chat', tags=['chat'])


def _get_settings() -> Settings:
    from main import app_settings
    return app_settings


@router.post('')
async def chat(req: ChatRequest, settings: Settings = Depends(_get_settings)):
    pool = await get_pool()
    embedder = EmbedService(settings.ollama_url, settings.embedding_model)
    embedding = await embedder.embed(req.message)

    async with pool.acquire() as conn:
        context_chunks, _ = await hybrid_search(
            conn=conn,
            query=req.message,
            embedding=embedding,
            limit=req.context_limit,
            threshold=settings.search_threshold,
        )

    return StreamingResponse(
        stream_chat(
            message=req.message,
            context_chunks=context_chunks,
            ollama_url=settings.ollama_url,
            model=settings.chat_model,
        ),
        media_type='text/event-stream',
        headers={
            'Cache-Control': 'no-cache',
            'X-Accel-Buffering': 'no',
        },
    )