You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

84 lines
2.8 KiB

"""
linking/agent.py — Knowledge Linking Agent: infers and creates AI-powered document links.
"""
from __future__ import annotations
import json
import logging
import asyncpg
import httpx
from base_agent import BaseAgent
logger = logging.getLogger('agent.linking')
class LinkingAgent(BaseAgent):
agent_type = 'linking'
async def process(self, job_id: str, payload: dict) -> dict:
"""
For each document without AI-inferred links:
1. Find top-5 semantically similar documents (vector search).
2. Insert 'ai-inferred' relations.
"""
async with self.pool.acquire() as conn:
# Documents that have chunks but no ai-inferred relations
docs = await conn.fetch(
"""
SELECT DISTINCT d.id::text, d.title, d.path
FROM documents d
JOIN chunks c ON c.document_id = d.id
WHERE NOT EXISTS (
SELECT 1 FROM relations r
WHERE r.source_doc_id = d.id AND r.relation_type = 'ai-inferred'
)
LIMIT 50
"""
)
linked = 0
for doc in docs:
doc_id = doc['id']
# Find similar docs via average chunk embedding
similar = await conn.fetch(
"""
WITH doc_avg AS (
SELECT AVG(embedding) AS avg_emb
FROM chunks WHERE document_id = $1::uuid
)
SELECT d2.id::text AS target_id, d2.path AS target_path,
1 - (AVG(c2.embedding) <=> (SELECT avg_emb FROM doc_avg)) AS score
FROM chunks c2
JOIN documents d2 ON d2.id = c2.document_id
WHERE c2.document_id != $1::uuid
GROUP BY d2.id, d2.path
HAVING 1 - (AVG(c2.embedding) <=> (SELECT avg_emb FROM doc_avg)) > 0.75
ORDER BY score DESC
LIMIT 5
""",
doc_id,
)
if not similar:
continue
records = [
(doc_id, row['target_path'], row['target_id'], 'ai-inferred')
for row in similar
]
await conn.executemany(
"""
INSERT INTO relations (source_doc_id, target_path, target_doc_id, relation_type)
VALUES ($1::uuid, $2, $3::uuid, $4)
ON CONFLICT DO NOTHING
""",
records,
)
linked += len(similar)
return {'documents_processed': len(docs), 'links_created': linked}

Powered by TurnKey Linux.