""" linking/agent.py — Knowledge Linking Agent: infers and creates AI-powered document links. """ from __future__ import annotations import json import logging import asyncpg import httpx from base_agent import BaseAgent logger = logging.getLogger('agent.linking') class LinkingAgent(BaseAgent): agent_type = 'linking' async def process(self, job_id: str, payload: dict) -> dict: """ For each document without AI-inferred links: 1. Find top-5 semantically similar documents (vector search). 2. Insert 'ai-inferred' relations. """ async with self.pool.acquire() as conn: # Documents that have chunks but no ai-inferred relations docs = await conn.fetch( """ SELECT DISTINCT d.id::text, d.title, d.path FROM documents d JOIN chunks c ON c.document_id = d.id WHERE NOT EXISTS ( SELECT 1 FROM relations r WHERE r.source_doc_id = d.id AND r.relation_type = 'ai-inferred' ) LIMIT 50 """ ) linked = 0 for doc in docs: doc_id = doc['id'] # Find similar docs via average chunk embedding similar = await conn.fetch( """ WITH doc_avg AS ( SELECT AVG(embedding) AS avg_emb FROM chunks WHERE document_id = $1::uuid ) SELECT d2.id::text AS target_id, d2.path AS target_path, 1 - (AVG(c2.embedding) <=> (SELECT avg_emb FROM doc_avg)) AS score FROM chunks c2 JOIN documents d2 ON d2.id = c2.document_id WHERE c2.document_id != $1::uuid GROUP BY d2.id, d2.path HAVING 1 - (AVG(c2.embedding) <=> (SELECT avg_emb FROM doc_avg)) > 0.75 ORDER BY score DESC LIMIT 5 """, doc_id, ) if not similar: continue records = [ (doc_id, row['target_path'], row['target_id'], 'ai-inferred') for row in similar ] await conn.executemany( """ INSERT INTO relations (source_doc_id, target_path, target_doc_id, relation_type) VALUES ($1::uuid, $2, $3::uuid, $4) ON CONFLICT DO NOTHING """, records, ) linked += len(similar) return {'documents_processed': len(docs), 'links_created': linked}