You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

196 lines
9.4 KiB

-- AI Second Brain — PostgreSQL Schema
-- Requires: PostgreSQL 14+ with pgvector extension
-- Enable extensions
CREATE EXTENSION IF NOT EXISTS "uuid-ossp";
CREATE EXTENSION IF NOT EXISTS vector;
CREATE EXTENSION IF NOT EXISTS pg_trgm; -- for fuzzy text search
-- ---------------------------------------------------------------------------
-- DOCUMENTS
-- Represents a single Markdown file in the vault.
-- ---------------------------------------------------------------------------
CREATE TABLE IF NOT EXISTS documents (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
path TEXT NOT NULL UNIQUE, -- relative path within vault
title TEXT,
content TEXT NOT NULL, -- full raw markdown
content_hash TEXT NOT NULL, -- SHA-256 for change detection
frontmatter JSONB NOT NULL DEFAULT '{}',
tags TEXT[] NOT NULL DEFAULT '{}',
aliases TEXT[] NOT NULL DEFAULT '{}',
word_count INTEGER,
created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT now(),
indexed_at TIMESTAMPTZ,
fts_vector TSVECTOR -- auto-maintained below
);
CREATE INDEX IF NOT EXISTS idx_documents_path ON documents (path);
CREATE INDEX IF NOT EXISTS idx_documents_tags ON documents USING GIN (tags);
CREATE INDEX IF NOT EXISTS idx_documents_aliases ON documents USING GIN (aliases);
CREATE INDEX IF NOT EXISTS idx_documents_fts ON documents USING GIN (fts_vector);
CREATE INDEX IF NOT EXISTS idx_documents_frontmatter ON documents USING GIN (frontmatter);
CREATE INDEX IF NOT EXISTS idx_documents_updated ON documents (updated_at DESC);
-- Auto-update fts_vector on insert/update
CREATE OR REPLACE FUNCTION documents_fts_trigger()
RETURNS TRIGGER AS $$
BEGIN
NEW.fts_vector :=
setweight(to_tsvector('english', coalesce(NEW.title, '')), 'A') ||
setweight(to_tsvector('english', coalesce(array_to_string(NEW.tags, ' '), '')), 'B') ||
setweight(to_tsvector('english', coalesce(NEW.content, '')), 'C');
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
DROP TRIGGER IF EXISTS trig_documents_fts ON documents;
CREATE TRIGGER trig_documents_fts
BEFORE INSERT OR UPDATE ON documents
FOR EACH ROW EXECUTE FUNCTION documents_fts_trigger();
-- Auto-update updated_at timestamp
CREATE OR REPLACE FUNCTION set_updated_at()
RETURNS TRIGGER AS $$
BEGIN
NEW.updated_at = now();
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
DROP TRIGGER IF EXISTS trig_documents_updated_at ON documents;
CREATE TRIGGER trig_documents_updated_at
BEFORE UPDATE ON documents
FOR EACH ROW EXECUTE FUNCTION set_updated_at();
-- ---------------------------------------------------------------------------
-- CHUNKS
-- Sliding-window text chunks from documents, each with an embedding vector.
-- ---------------------------------------------------------------------------
CREATE TABLE IF NOT EXISTS chunks (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
document_id UUID NOT NULL REFERENCES documents (id) ON DELETE CASCADE,
chunk_index INTEGER NOT NULL,
content TEXT NOT NULL,
token_count INTEGER,
embedding VECTOR(768), -- nomic-embed-text dimension
metadata JSONB NOT NULL DEFAULT '{}',-- heading path, page, etc.
created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
UNIQUE (document_id, chunk_index)
);
CREATE INDEX IF NOT EXISTS idx_chunks_document_id ON chunks (document_id);
-- HNSW index — fast approximate nearest-neighbour search
-- Requires pgvector >= 0.5.0. Falls back to IVFFlat if unavailable.
CREATE INDEX IF NOT EXISTS idx_chunks_embedding_hnsw
ON chunks USING hnsw (embedding vector_cosine_ops)
WITH (m = 16, ef_construction = 64);
-- ---------------------------------------------------------------------------
-- ENTITIES
-- Named entities extracted from documents (optional NER layer).
-- ---------------------------------------------------------------------------
CREATE TABLE IF NOT EXISTS entities (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
document_id UUID NOT NULL REFERENCES documents (id) ON DELETE CASCADE,
name TEXT NOT NULL,
entity_type TEXT NOT NULL, -- PERSON, ORG, CONCEPT, PLACE, etc.
context TEXT, -- surrounding sentence
confidence FLOAT,
created_at TIMESTAMPTZ NOT NULL DEFAULT now()
);
CREATE INDEX IF NOT EXISTS idx_entities_document_id ON entities (document_id);
CREATE INDEX IF NOT EXISTS idx_entities_name ON entities (name);
CREATE INDEX IF NOT EXISTS idx_entities_type ON entities (entity_type);
CREATE INDEX IF NOT EXISTS idx_entities_name_trgm ON entities USING GIN (name gin_trgm_ops);
-- ---------------------------------------------------------------------------
-- RELATIONS
-- WikiLink / explicit relations between documents.
-- ---------------------------------------------------------------------------
CREATE TABLE IF NOT EXISTS relations (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
source_doc_id UUID NOT NULL REFERENCES documents (id) ON DELETE CASCADE,
target_path TEXT NOT NULL, -- raw link target (may be unresolved)
target_doc_id UUID REFERENCES documents (id) ON DELETE SET NULL,
relation_type TEXT NOT NULL DEFAULT 'wikilink', -- wikilink | tag | explicit | ai-inferred
label TEXT, -- optional human label for the edge
context TEXT, -- surrounding text of the link
created_at TIMESTAMPTZ NOT NULL DEFAULT now()
);
CREATE INDEX IF NOT EXISTS idx_relations_source ON relations (source_doc_id);
CREATE INDEX IF NOT EXISTS idx_relations_target_id ON relations (target_doc_id);
CREATE INDEX IF NOT EXISTS idx_relations_target_path ON relations (target_path);
CREATE INDEX IF NOT EXISTS idx_relations_type ON relations (relation_type);
-- ---------------------------------------------------------------------------
-- AGENT JOBS
-- Persistent job queue consumed by AI agents.
-- ---------------------------------------------------------------------------
CREATE TABLE IF NOT EXISTS agent_jobs (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
agent_type TEXT NOT NULL, -- ingestion | linking | tagging | summarization | maintenance
status TEXT NOT NULL DEFAULT 'pending', -- pending | running | done | failed | cancelled
priority INTEGER NOT NULL DEFAULT 5, -- 1 (highest) .. 10 (lowest)
payload JSONB NOT NULL DEFAULT '{}',
result JSONB,
error TEXT,
retry_count INTEGER NOT NULL DEFAULT 0,
max_retries INTEGER NOT NULL DEFAULT 3,
created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
started_at TIMESTAMPTZ,
completed_at TIMESTAMPTZ,
scheduled_for TIMESTAMPTZ NOT NULL DEFAULT now()
);
CREATE INDEX IF NOT EXISTS idx_agent_jobs_status ON agent_jobs (status);
CREATE INDEX IF NOT EXISTS idx_agent_jobs_type ON agent_jobs (agent_type);
CREATE INDEX IF NOT EXISTS idx_agent_jobs_scheduled ON agent_jobs (scheduled_for ASC)
WHERE status = 'pending';
-- ---------------------------------------------------------------------------
-- AGENT LOGS
-- Structured log entries written by agents.
-- ---------------------------------------------------------------------------
CREATE TABLE IF NOT EXISTS agent_logs (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
job_id UUID REFERENCES agent_jobs (id) ON DELETE SET NULL,
agent_type TEXT NOT NULL,
level TEXT NOT NULL DEFAULT 'info', -- debug | info | warning | error
message TEXT NOT NULL,
metadata JSONB NOT NULL DEFAULT '{}',
created_at TIMESTAMPTZ NOT NULL DEFAULT now()
);
CREATE INDEX IF NOT EXISTS idx_agent_logs_job_id ON agent_logs (job_id);
CREATE INDEX IF NOT EXISTS idx_agent_logs_created ON agent_logs (created_at DESC);
CREATE INDEX IF NOT EXISTS idx_agent_logs_level ON agent_logs (level);
-- ---------------------------------------------------------------------------
-- SYSTEM CONFIG
-- Runtime key-value configuration, editable by agents and admins.
-- ---------------------------------------------------------------------------
CREATE TABLE IF NOT EXISTS system_config (
key TEXT PRIMARY KEY,
value JSONB NOT NULL,
description TEXT,
updated_at TIMESTAMPTZ NOT NULL DEFAULT now()
);
-- Seed default configuration
INSERT INTO system_config (key, value, description) VALUES
('embedding_model', '"nomic-embed-text"', 'Ollama model for embeddings'),
('chat_model', '"mistral"', 'Ollama model for chat/generation'),
('chunk_size', '700', 'Target tokens per chunk'),
('chunk_overlap', '70', 'Overlap tokens between chunks'),
('search_top_k', '10', 'Default number of search results'),
('search_threshold', '0.65', 'Minimum cosine similarity score'),
('rerank_enabled', 'false', 'Enable cross-encoder reranking'),
('auto_tag', 'true', 'Auto-tag documents via LLM'),
('auto_summarize', 'true', 'Auto-summarize long documents')
ON CONFLICT (key) DO NOTHING;

Powered by TurnKey Linux.