You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
84 lines
2.8 KiB
84 lines
2.8 KiB
"""
|
|
linking/agent.py — Knowledge Linking Agent: infers and creates AI-powered document links.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
import logging
|
|
|
|
import asyncpg
|
|
import httpx
|
|
|
|
from base_agent import BaseAgent
|
|
|
|
logger = logging.getLogger('agent.linking')
|
|
|
|
|
|
class LinkingAgent(BaseAgent):
|
|
agent_type = 'linking'
|
|
|
|
async def process(self, job_id: str, payload: dict) -> dict:
|
|
"""
|
|
For each document without AI-inferred links:
|
|
1. Find top-5 semantically similar documents (vector search).
|
|
2. Insert 'ai-inferred' relations.
|
|
"""
|
|
async with self.pool.acquire() as conn:
|
|
# Documents that have chunks but no ai-inferred relations
|
|
docs = await conn.fetch(
|
|
"""
|
|
SELECT DISTINCT d.id::text, d.title, d.path
|
|
FROM documents d
|
|
JOIN chunks c ON c.document_id = d.id
|
|
WHERE NOT EXISTS (
|
|
SELECT 1 FROM relations r
|
|
WHERE r.source_doc_id = d.id AND r.relation_type = 'ai-inferred'
|
|
)
|
|
LIMIT 50
|
|
"""
|
|
)
|
|
|
|
linked = 0
|
|
for doc in docs:
|
|
doc_id = doc['id']
|
|
|
|
# Find similar docs via average chunk embedding
|
|
similar = await conn.fetch(
|
|
"""
|
|
WITH doc_avg AS (
|
|
SELECT AVG(embedding) AS avg_emb
|
|
FROM chunks WHERE document_id = $1::uuid
|
|
)
|
|
SELECT d2.id::text AS target_id, d2.path AS target_path,
|
|
1 - (AVG(c2.embedding) <=> (SELECT avg_emb FROM doc_avg)) AS score
|
|
FROM chunks c2
|
|
JOIN documents d2 ON d2.id = c2.document_id
|
|
WHERE c2.document_id != $1::uuid
|
|
GROUP BY d2.id, d2.path
|
|
HAVING 1 - (AVG(c2.embedding) <=> (SELECT avg_emb FROM doc_avg)) > 0.75
|
|
ORDER BY score DESC
|
|
LIMIT 5
|
|
""",
|
|
doc_id,
|
|
)
|
|
|
|
if not similar:
|
|
continue
|
|
|
|
records = [
|
|
(doc_id, row['target_path'], row['target_id'], 'ai-inferred')
|
|
for row in similar
|
|
]
|
|
await conn.executemany(
|
|
"""
|
|
INSERT INTO relations (source_doc_id, target_path, target_doc_id, relation_type)
|
|
VALUES ($1::uuid, $2, $3::uuid, $4)
|
|
ON CONFLICT DO NOTHING
|
|
""",
|
|
records,
|
|
)
|
|
linked += len(similar)
|
|
|
|
return {'documents_processed': len(docs), 'links_created': linked}
|