You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
79 lines
2.8 KiB
79 lines
2.8 KiB
"""
|
|
maintenance/agent.py — Maintenance Agent: detects broken links, orphaned documents, stale content.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import logging
|
|
from datetime import datetime, timezone, timedelta
|
|
|
|
from base_agent import BaseAgent
|
|
|
|
logger = logging.getLogger('agent.maintenance')
|
|
|
|
|
|
class MaintenanceAgent(BaseAgent):
|
|
agent_type = 'maintenance'
|
|
|
|
async def process(self, job_id: str, payload: dict) -> dict:
|
|
report = {}
|
|
|
|
async with self.pool.acquire() as conn:
|
|
# 1. Broken WikiLinks (target_doc_id is NULL but target_path exists)
|
|
broken_links = await conn.fetchval(
|
|
"""
|
|
SELECT COUNT(*) FROM relations
|
|
WHERE relation_type = 'wikilink' AND target_doc_id IS NULL
|
|
"""
|
|
)
|
|
report['broken_wikilinks'] = broken_links
|
|
|
|
# 2. Orphaned documents (no incoming links and no outgoing links)
|
|
orphans = await conn.fetch(
|
|
"""
|
|
SELECT d.id::text, d.title, d.path
|
|
FROM documents d
|
|
WHERE NOT EXISTS (
|
|
SELECT 1 FROM relations r WHERE r.target_doc_id = d.id
|
|
)
|
|
AND NOT EXISTS (
|
|
SELECT 1 FROM relations r WHERE r.source_doc_id = d.id
|
|
)
|
|
LIMIT 20
|
|
"""
|
|
)
|
|
report['orphaned_documents'] = len(orphans)
|
|
report['orphan_paths'] = [r['path'] for r in orphans]
|
|
|
|
# 3. Documents not re-indexed in >7 days
|
|
stale_cutoff = datetime.now(timezone.utc) - timedelta(days=7)
|
|
stale_count = await conn.fetchval(
|
|
'SELECT COUNT(*) FROM documents WHERE indexed_at < $1 OR indexed_at IS NULL',
|
|
stale_cutoff,
|
|
)
|
|
report['stale_documents'] = stale_count
|
|
|
|
# 4. Documents with chunks but no embeddings
|
|
missing_embeddings = await conn.fetchval(
|
|
'SELECT COUNT(*) FROM chunks WHERE embedding IS NULL'
|
|
)
|
|
report['chunks_missing_embeddings'] = missing_embeddings
|
|
|
|
# 5. Resolve previously broken WikiLinks that now have matching docs
|
|
resolved = await conn.execute(
|
|
"""
|
|
UPDATE relations r
|
|
SET target_doc_id = d.id
|
|
FROM documents d
|
|
WHERE r.target_doc_id IS NULL
|
|
AND r.relation_type = 'wikilink'
|
|
AND (d.path LIKE '%' || r.target_path || '%'
|
|
OR d.title = r.target_path
|
|
OR r.target_path = ANY(d.aliases))
|
|
"""
|
|
)
|
|
report['wikilinks_resolved'] = int(resolved.split()[-1])
|
|
|
|
logger.info('Maintenance report: %s', report)
|
|
return report
|