You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

79 lines
2.8 KiB

"""
maintenance/agent.py — Maintenance Agent: detects broken links, orphaned documents, stale content.
"""
from __future__ import annotations
import logging
from datetime import datetime, timezone, timedelta
from base_agent import BaseAgent
logger = logging.getLogger('agent.maintenance')
class MaintenanceAgent(BaseAgent):
agent_type = 'maintenance'
async def process(self, job_id: str, payload: dict) -> dict:
report = {}
async with self.pool.acquire() as conn:
# 1. Broken WikiLinks (target_doc_id is NULL but target_path exists)
broken_links = await conn.fetchval(
"""
SELECT COUNT(*) FROM relations
WHERE relation_type = 'wikilink' AND target_doc_id IS NULL
"""
)
report['broken_wikilinks'] = broken_links
# 2. Orphaned documents (no incoming links and no outgoing links)
orphans = await conn.fetch(
"""
SELECT d.id::text, d.title, d.path
FROM documents d
WHERE NOT EXISTS (
SELECT 1 FROM relations r WHERE r.target_doc_id = d.id
)
AND NOT EXISTS (
SELECT 1 FROM relations r WHERE r.source_doc_id = d.id
)
LIMIT 20
"""
)
report['orphaned_documents'] = len(orphans)
report['orphan_paths'] = [r['path'] for r in orphans]
# 3. Documents not re-indexed in >7 days
stale_cutoff = datetime.now(timezone.utc) - timedelta(days=7)
stale_count = await conn.fetchval(
'SELECT COUNT(*) FROM documents WHERE indexed_at < $1 OR indexed_at IS NULL',
stale_cutoff,
)
report['stale_documents'] = stale_count
# 4. Documents with chunks but no embeddings
missing_embeddings = await conn.fetchval(
'SELECT COUNT(*) FROM chunks WHERE embedding IS NULL'
)
report['chunks_missing_embeddings'] = missing_embeddings
# 5. Resolve previously broken WikiLinks that now have matching docs
resolved = await conn.execute(
"""
UPDATE relations r
SET target_doc_id = d.id
FROM documents d
WHERE r.target_doc_id IS NULL
AND r.relation_type = 'wikilink'
AND (d.path LIKE '%' || r.target_path || '%'
OR d.title = r.target_path
OR r.target_path = ANY(d.aliases))
"""
)
report['wikilinks_resolved'] = int(resolved.split()[-1])
logger.info('Maintenance report: %s', report)
return report

Powered by TurnKey Linux.