""" summarization/agent.py — Summarization Agent: generates summaries for long documents. """ from __future__ import annotations import logging import re import httpx from base_agent import BaseAgent logger = logging.getLogger('agent.summarization') SUMMARY_PROMPT = """You are a knowledge management assistant. Write a concise 2-4 sentence summary of the following document. The summary should capture the main ideas and be useful for quick reference. Respond with only the summary, no preamble. Title: {title} Content: {content} Summary:""" class SummarizationAgent(BaseAgent): agent_type = 'summarization' async def process(self, job_id: str, payload: dict) -> dict: ollama_url = self.settings.ollama_url model = self.settings.chat_model async with self.pool.acquire() as conn: # Long documents that don't have a summary in frontmatter docs = await conn.fetch( """ SELECT id::text, title, content, frontmatter FROM documents WHERE word_count > 500 AND (frontmatter->>'summary' IS NULL OR frontmatter->>'summary' = '') LIMIT 10 """ ) summarized = 0 for doc in docs: doc_id = doc['id'] title = doc['title'] or '' content = (doc['content'] or '')[:4000] try: summary = await self._generate_summary(title, content, ollama_url, model) if summary: fm = dict(doc['frontmatter'] or {}) fm['summary'] = summary await conn.execute( "UPDATE documents SET frontmatter = $2::jsonb WHERE id = $1::uuid", doc_id, __import__('json').dumps(fm), ) summarized += 1 logger.debug('Summarized: %s', title) except Exception as exc: logger.warning('Failed to summarize %s: %s', doc_id, exc) return {'documents_summarized': summarized} async def _generate_summary( self, title: str, content: str, ollama_url: str, model: str ) -> str: prompt = SUMMARY_PROMPT.format(title=title, content=content) async with httpx.AsyncClient(timeout=60.0) as client: resp = await client.post( f'{ollama_url.rstrip("/")}/api/generate', json={'model': model, 'prompt': prompt, 'stream': False}, ) resp.raise_for_status() return resp.json().get('response', '').strip()