second-brain/services/agents/summarization/agent.py

"""
summarization/agent.py — Summarization Agent: generates summaries for long documents.
"""

from __future__ import annotations

import logging
import re

import httpx

from base_agent import BaseAgent

logger = logging.getLogger('agent.summarization')

SUMMARY_PROMPT = """You are a knowledge management assistant.
Write a concise 2-4 sentence summary of the following document.
The summary should capture the main ideas and be useful for quick reference.
Respond with only the summary, no preamble.

Title: {title}

Content:
{content}

Summary:"""


class SummarizationAgent(BaseAgent):
    agent_type = 'summarization'

    async def process(self, job_id: str, payload: dict) -> dict:
        ollama_url = self.settings.ollama_url
        model = self.settings.chat_model

        async with self.pool.acquire() as conn:
            # Long documents that don't have a summary in frontmatter
            docs = await conn.fetch(
                """
                SELECT id::text, title, content, frontmatter
                FROM documents
                WHERE word_count > 500
                  AND (frontmatter->>'summary' IS NULL OR frontmatter->>'summary' = '')
                LIMIT 10
                """
            )

            summarized = 0
            for doc in docs:
                doc_id = doc['id']
                title = doc['title'] or ''
                content = (doc['content'] or '')[:4000]

                try:
                    summary = await self._generate_summary(title, content, ollama_url, model)
                    if summary:
                        fm = dict(doc['frontmatter'] or {})
                        fm['summary'] = summary
                        await conn.execute(
                            "UPDATE documents SET frontmatter = $2::jsonb WHERE id = $1::uuid",
                            doc_id, __import__('json').dumps(fm),
                        )
                        summarized += 1
                        logger.debug('Summarized: %s', title)
                except Exception as exc:
                    logger.warning('Failed to summarize %s: %s', doc_id, exc)

        return {'documents_summarized': summarized}

    async def _generate_summary(
        self, title: str, content: str, ollama_url: str, model: str
    ) -> str:
        prompt = SUMMARY_PROMPT.format(title=title, content=content)
        async with httpx.AsyncClient(timeout=60.0) as client:
            resp = await client.post(
                f'{ollama_url.rstrip("/")}/api/generate',
                json={'model': model, 'prompt': prompt, 'stream': False},
            )
            resp.raise_for_status()
            return resp.json().get('response', '').strip()