-- AI Second Brain — PostgreSQL Schema -- Requires: PostgreSQL 14+ with pgvector extension -- Enable extensions CREATE EXTENSION IF NOT EXISTS "uuid-ossp"; CREATE EXTENSION IF NOT EXISTS vector; CREATE EXTENSION IF NOT EXISTS pg_trgm; -- for fuzzy text search -- --------------------------------------------------------------------------- -- DOCUMENTS -- Represents a single Markdown file in the vault. -- --------------------------------------------------------------------------- CREATE TABLE IF NOT EXISTS documents ( id UUID PRIMARY KEY DEFAULT gen_random_uuid(), path TEXT NOT NULL UNIQUE, -- relative path within vault title TEXT, content TEXT NOT NULL, -- full raw markdown content_hash TEXT NOT NULL, -- SHA-256 for change detection frontmatter JSONB NOT NULL DEFAULT '{}', tags TEXT[] NOT NULL DEFAULT '{}', aliases TEXT[] NOT NULL DEFAULT '{}', word_count INTEGER, created_at TIMESTAMPTZ NOT NULL DEFAULT now(), updated_at TIMESTAMPTZ NOT NULL DEFAULT now(), indexed_at TIMESTAMPTZ, fts_vector TSVECTOR -- auto-maintained below ); CREATE INDEX IF NOT EXISTS idx_documents_path ON documents (path); CREATE INDEX IF NOT EXISTS idx_documents_tags ON documents USING GIN (tags); CREATE INDEX IF NOT EXISTS idx_documents_aliases ON documents USING GIN (aliases); CREATE INDEX IF NOT EXISTS idx_documents_fts ON documents USING GIN (fts_vector); CREATE INDEX IF NOT EXISTS idx_documents_frontmatter ON documents USING GIN (frontmatter); CREATE INDEX IF NOT EXISTS idx_documents_updated ON documents (updated_at DESC); -- Auto-update fts_vector on insert/update CREATE OR REPLACE FUNCTION documents_fts_trigger() RETURNS TRIGGER AS $$ BEGIN NEW.fts_vector := setweight(to_tsvector('english', coalesce(NEW.title, '')), 'A') || setweight(to_tsvector('english', coalesce(array_to_string(NEW.tags, ' '), '')), 'B') || setweight(to_tsvector('english', coalesce(NEW.content, '')), 'C'); RETURN NEW; END; $$ LANGUAGE plpgsql; DROP TRIGGER IF EXISTS trig_documents_fts ON documents; CREATE TRIGGER trig_documents_fts BEFORE INSERT OR UPDATE ON documents FOR EACH ROW EXECUTE FUNCTION documents_fts_trigger(); -- Auto-update updated_at timestamp CREATE OR REPLACE FUNCTION set_updated_at() RETURNS TRIGGER AS $$ BEGIN NEW.updated_at = now(); RETURN NEW; END; $$ LANGUAGE plpgsql; DROP TRIGGER IF EXISTS trig_documents_updated_at ON documents; CREATE TRIGGER trig_documents_updated_at BEFORE UPDATE ON documents FOR EACH ROW EXECUTE FUNCTION set_updated_at(); -- --------------------------------------------------------------------------- -- CHUNKS -- Sliding-window text chunks from documents, each with an embedding vector. -- --------------------------------------------------------------------------- CREATE TABLE IF NOT EXISTS chunks ( id UUID PRIMARY KEY DEFAULT gen_random_uuid(), document_id UUID NOT NULL REFERENCES documents (id) ON DELETE CASCADE, chunk_index INTEGER NOT NULL, content TEXT NOT NULL, token_count INTEGER, embedding VECTOR(768), -- nomic-embed-text dimension metadata JSONB NOT NULL DEFAULT '{}',-- heading path, page, etc. created_at TIMESTAMPTZ NOT NULL DEFAULT now(), UNIQUE (document_id, chunk_index) ); CREATE INDEX IF NOT EXISTS idx_chunks_document_id ON chunks (document_id); -- HNSW index — fast approximate nearest-neighbour search -- Requires pgvector >= 0.5.0. Falls back to IVFFlat if unavailable. CREATE INDEX IF NOT EXISTS idx_chunks_embedding_hnsw ON chunks USING hnsw (embedding vector_cosine_ops) WITH (m = 16, ef_construction = 64); -- --------------------------------------------------------------------------- -- ENTITIES -- Named entities extracted from documents (optional NER layer). -- --------------------------------------------------------------------------- CREATE TABLE IF NOT EXISTS entities ( id UUID PRIMARY KEY DEFAULT gen_random_uuid(), document_id UUID NOT NULL REFERENCES documents (id) ON DELETE CASCADE, name TEXT NOT NULL, entity_type TEXT NOT NULL, -- PERSON, ORG, CONCEPT, PLACE, etc. context TEXT, -- surrounding sentence confidence FLOAT, created_at TIMESTAMPTZ NOT NULL DEFAULT now() ); CREATE INDEX IF NOT EXISTS idx_entities_document_id ON entities (document_id); CREATE INDEX IF NOT EXISTS idx_entities_name ON entities (name); CREATE INDEX IF NOT EXISTS idx_entities_type ON entities (entity_type); CREATE INDEX IF NOT EXISTS idx_entities_name_trgm ON entities USING GIN (name gin_trgm_ops); -- --------------------------------------------------------------------------- -- RELATIONS -- WikiLink / explicit relations between documents. -- --------------------------------------------------------------------------- CREATE TABLE IF NOT EXISTS relations ( id UUID PRIMARY KEY DEFAULT gen_random_uuid(), source_doc_id UUID NOT NULL REFERENCES documents (id) ON DELETE CASCADE, target_path TEXT NOT NULL, -- raw link target (may be unresolved) target_doc_id UUID REFERENCES documents (id) ON DELETE SET NULL, relation_type TEXT NOT NULL DEFAULT 'wikilink', -- wikilink | tag | explicit | ai-inferred label TEXT, -- optional human label for the edge context TEXT, -- surrounding text of the link created_at TIMESTAMPTZ NOT NULL DEFAULT now() ); CREATE INDEX IF NOT EXISTS idx_relations_source ON relations (source_doc_id); CREATE INDEX IF NOT EXISTS idx_relations_target_id ON relations (target_doc_id); CREATE INDEX IF NOT EXISTS idx_relations_target_path ON relations (target_path); CREATE INDEX IF NOT EXISTS idx_relations_type ON relations (relation_type); -- --------------------------------------------------------------------------- -- AGENT JOBS -- Persistent job queue consumed by AI agents. -- --------------------------------------------------------------------------- CREATE TABLE IF NOT EXISTS agent_jobs ( id UUID PRIMARY KEY DEFAULT gen_random_uuid(), agent_type TEXT NOT NULL, -- ingestion | linking | tagging | summarization | maintenance status TEXT NOT NULL DEFAULT 'pending', -- pending | running | done | failed | cancelled priority INTEGER NOT NULL DEFAULT 5, -- 1 (highest) .. 10 (lowest) payload JSONB NOT NULL DEFAULT '{}', result JSONB, error TEXT, retry_count INTEGER NOT NULL DEFAULT 0, max_retries INTEGER NOT NULL DEFAULT 3, created_at TIMESTAMPTZ NOT NULL DEFAULT now(), started_at TIMESTAMPTZ, completed_at TIMESTAMPTZ, scheduled_for TIMESTAMPTZ NOT NULL DEFAULT now() ); CREATE INDEX IF NOT EXISTS idx_agent_jobs_status ON agent_jobs (status); CREATE INDEX IF NOT EXISTS idx_agent_jobs_type ON agent_jobs (agent_type); CREATE INDEX IF NOT EXISTS idx_agent_jobs_scheduled ON agent_jobs (scheduled_for ASC) WHERE status = 'pending'; -- --------------------------------------------------------------------------- -- AGENT LOGS -- Structured log entries written by agents. -- --------------------------------------------------------------------------- CREATE TABLE IF NOT EXISTS agent_logs ( id UUID PRIMARY KEY DEFAULT gen_random_uuid(), job_id UUID REFERENCES agent_jobs (id) ON DELETE SET NULL, agent_type TEXT NOT NULL, level TEXT NOT NULL DEFAULT 'info', -- debug | info | warning | error message TEXT NOT NULL, metadata JSONB NOT NULL DEFAULT '{}', created_at TIMESTAMPTZ NOT NULL DEFAULT now() ); CREATE INDEX IF NOT EXISTS idx_agent_logs_job_id ON agent_logs (job_id); CREATE INDEX IF NOT EXISTS idx_agent_logs_created ON agent_logs (created_at DESC); CREATE INDEX IF NOT EXISTS idx_agent_logs_level ON agent_logs (level); -- --------------------------------------------------------------------------- -- SYSTEM CONFIG -- Runtime key-value configuration, editable by agents and admins. -- --------------------------------------------------------------------------- CREATE TABLE IF NOT EXISTS system_config ( key TEXT PRIMARY KEY, value JSONB NOT NULL, description TEXT, updated_at TIMESTAMPTZ NOT NULL DEFAULT now() ); -- Seed default configuration INSERT INTO system_config (key, value, description) VALUES ('embedding_model', '"nomic-embed-text"', 'Ollama model for embeddings'), ('chat_model', '"mistral"', 'Ollama model for chat/generation'), ('chunk_size', '700', 'Target tokens per chunk'), ('chunk_overlap', '70', 'Overlap tokens between chunks'), ('search_top_k', '10', 'Default number of search results'), ('search_threshold', '0.65', 'Minimum cosine similarity score'), ('rerank_enabled', 'false', 'Enable cross-encoder reranking'), ('auto_tag', 'true', 'Auto-tag documents via LLM'), ('auto_summarize', 'true', 'Auto-summarize long documents') ON CONFLICT (key) DO NOTHING;