You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
196 lines
9.4 KiB
196 lines
9.4 KiB
-- AI Second Brain — PostgreSQL Schema
|
|
-- Requires: PostgreSQL 14+ with pgvector extension
|
|
|
|
-- Enable extensions
|
|
CREATE EXTENSION IF NOT EXISTS "uuid-ossp";
|
|
CREATE EXTENSION IF NOT EXISTS vector;
|
|
CREATE EXTENSION IF NOT EXISTS pg_trgm; -- for fuzzy text search
|
|
|
|
-- ---------------------------------------------------------------------------
|
|
-- DOCUMENTS
|
|
-- Represents a single Markdown file in the vault.
|
|
-- ---------------------------------------------------------------------------
|
|
CREATE TABLE IF NOT EXISTS documents (
|
|
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
|
path TEXT NOT NULL UNIQUE, -- relative path within vault
|
|
title TEXT,
|
|
content TEXT NOT NULL, -- full raw markdown
|
|
content_hash TEXT NOT NULL, -- SHA-256 for change detection
|
|
frontmatter JSONB NOT NULL DEFAULT '{}',
|
|
tags TEXT[] NOT NULL DEFAULT '{}',
|
|
aliases TEXT[] NOT NULL DEFAULT '{}',
|
|
word_count INTEGER,
|
|
created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
|
|
updated_at TIMESTAMPTZ NOT NULL DEFAULT now(),
|
|
indexed_at TIMESTAMPTZ,
|
|
fts_vector TSVECTOR -- auto-maintained below
|
|
);
|
|
|
|
CREATE INDEX IF NOT EXISTS idx_documents_path ON documents (path);
|
|
CREATE INDEX IF NOT EXISTS idx_documents_tags ON documents USING GIN (tags);
|
|
CREATE INDEX IF NOT EXISTS idx_documents_aliases ON documents USING GIN (aliases);
|
|
CREATE INDEX IF NOT EXISTS idx_documents_fts ON documents USING GIN (fts_vector);
|
|
CREATE INDEX IF NOT EXISTS idx_documents_frontmatter ON documents USING GIN (frontmatter);
|
|
CREATE INDEX IF NOT EXISTS idx_documents_updated ON documents (updated_at DESC);
|
|
|
|
-- Auto-update fts_vector on insert/update
|
|
CREATE OR REPLACE FUNCTION documents_fts_trigger()
|
|
RETURNS TRIGGER AS $$
|
|
BEGIN
|
|
NEW.fts_vector :=
|
|
setweight(to_tsvector('english', coalesce(NEW.title, '')), 'A') ||
|
|
setweight(to_tsvector('english', coalesce(array_to_string(NEW.tags, ' '), '')), 'B') ||
|
|
setweight(to_tsvector('english', coalesce(NEW.content, '')), 'C');
|
|
RETURN NEW;
|
|
END;
|
|
$$ LANGUAGE plpgsql;
|
|
|
|
DROP TRIGGER IF EXISTS trig_documents_fts ON documents;
|
|
CREATE TRIGGER trig_documents_fts
|
|
BEFORE INSERT OR UPDATE ON documents
|
|
FOR EACH ROW EXECUTE FUNCTION documents_fts_trigger();
|
|
|
|
-- Auto-update updated_at timestamp
|
|
CREATE OR REPLACE FUNCTION set_updated_at()
|
|
RETURNS TRIGGER AS $$
|
|
BEGIN
|
|
NEW.updated_at = now();
|
|
RETURN NEW;
|
|
END;
|
|
$$ LANGUAGE plpgsql;
|
|
|
|
DROP TRIGGER IF EXISTS trig_documents_updated_at ON documents;
|
|
CREATE TRIGGER trig_documents_updated_at
|
|
BEFORE UPDATE ON documents
|
|
FOR EACH ROW EXECUTE FUNCTION set_updated_at();
|
|
|
|
-- ---------------------------------------------------------------------------
|
|
-- CHUNKS
|
|
-- Sliding-window text chunks from documents, each with an embedding vector.
|
|
-- ---------------------------------------------------------------------------
|
|
CREATE TABLE IF NOT EXISTS chunks (
|
|
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
|
document_id UUID NOT NULL REFERENCES documents (id) ON DELETE CASCADE,
|
|
chunk_index INTEGER NOT NULL,
|
|
content TEXT NOT NULL,
|
|
token_count INTEGER,
|
|
embedding VECTOR(768), -- nomic-embed-text dimension
|
|
metadata JSONB NOT NULL DEFAULT '{}',-- heading path, page, etc.
|
|
created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
|
|
UNIQUE (document_id, chunk_index)
|
|
);
|
|
|
|
CREATE INDEX IF NOT EXISTS idx_chunks_document_id ON chunks (document_id);
|
|
|
|
-- HNSW index — fast approximate nearest-neighbour search
|
|
-- Requires pgvector >= 0.5.0. Falls back to IVFFlat if unavailable.
|
|
CREATE INDEX IF NOT EXISTS idx_chunks_embedding_hnsw
|
|
ON chunks USING hnsw (embedding vector_cosine_ops)
|
|
WITH (m = 16, ef_construction = 64);
|
|
|
|
-- ---------------------------------------------------------------------------
|
|
-- ENTITIES
|
|
-- Named entities extracted from documents (optional NER layer).
|
|
-- ---------------------------------------------------------------------------
|
|
CREATE TABLE IF NOT EXISTS entities (
|
|
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
|
document_id UUID NOT NULL REFERENCES documents (id) ON DELETE CASCADE,
|
|
name TEXT NOT NULL,
|
|
entity_type TEXT NOT NULL, -- PERSON, ORG, CONCEPT, PLACE, etc.
|
|
context TEXT, -- surrounding sentence
|
|
confidence FLOAT,
|
|
created_at TIMESTAMPTZ NOT NULL DEFAULT now()
|
|
);
|
|
|
|
CREATE INDEX IF NOT EXISTS idx_entities_document_id ON entities (document_id);
|
|
CREATE INDEX IF NOT EXISTS idx_entities_name ON entities (name);
|
|
CREATE INDEX IF NOT EXISTS idx_entities_type ON entities (entity_type);
|
|
CREATE INDEX IF NOT EXISTS idx_entities_name_trgm ON entities USING GIN (name gin_trgm_ops);
|
|
|
|
-- ---------------------------------------------------------------------------
|
|
-- RELATIONS
|
|
-- WikiLink / explicit relations between documents.
|
|
-- ---------------------------------------------------------------------------
|
|
CREATE TABLE IF NOT EXISTS relations (
|
|
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
|
source_doc_id UUID NOT NULL REFERENCES documents (id) ON DELETE CASCADE,
|
|
target_path TEXT NOT NULL, -- raw link target (may be unresolved)
|
|
target_doc_id UUID REFERENCES documents (id) ON DELETE SET NULL,
|
|
relation_type TEXT NOT NULL DEFAULT 'wikilink', -- wikilink | tag | explicit | ai-inferred
|
|
label TEXT, -- optional human label for the edge
|
|
context TEXT, -- surrounding text of the link
|
|
created_at TIMESTAMPTZ NOT NULL DEFAULT now()
|
|
);
|
|
|
|
CREATE INDEX IF NOT EXISTS idx_relations_source ON relations (source_doc_id);
|
|
CREATE INDEX IF NOT EXISTS idx_relations_target_id ON relations (target_doc_id);
|
|
CREATE INDEX IF NOT EXISTS idx_relations_target_path ON relations (target_path);
|
|
CREATE INDEX IF NOT EXISTS idx_relations_type ON relations (relation_type);
|
|
|
|
-- ---------------------------------------------------------------------------
|
|
-- AGENT JOBS
|
|
-- Persistent job queue consumed by AI agents.
|
|
-- ---------------------------------------------------------------------------
|
|
CREATE TABLE IF NOT EXISTS agent_jobs (
|
|
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
|
agent_type TEXT NOT NULL, -- ingestion | linking | tagging | summarization | maintenance
|
|
status TEXT NOT NULL DEFAULT 'pending', -- pending | running | done | failed | cancelled
|
|
priority INTEGER NOT NULL DEFAULT 5, -- 1 (highest) .. 10 (lowest)
|
|
payload JSONB NOT NULL DEFAULT '{}',
|
|
result JSONB,
|
|
error TEXT,
|
|
retry_count INTEGER NOT NULL DEFAULT 0,
|
|
max_retries INTEGER NOT NULL DEFAULT 3,
|
|
created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
|
|
started_at TIMESTAMPTZ,
|
|
completed_at TIMESTAMPTZ,
|
|
scheduled_for TIMESTAMPTZ NOT NULL DEFAULT now()
|
|
);
|
|
|
|
CREATE INDEX IF NOT EXISTS idx_agent_jobs_status ON agent_jobs (status);
|
|
CREATE INDEX IF NOT EXISTS idx_agent_jobs_type ON agent_jobs (agent_type);
|
|
CREATE INDEX IF NOT EXISTS idx_agent_jobs_scheduled ON agent_jobs (scheduled_for ASC)
|
|
WHERE status = 'pending';
|
|
|
|
-- ---------------------------------------------------------------------------
|
|
-- AGENT LOGS
|
|
-- Structured log entries written by agents.
|
|
-- ---------------------------------------------------------------------------
|
|
CREATE TABLE IF NOT EXISTS agent_logs (
|
|
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
|
job_id UUID REFERENCES agent_jobs (id) ON DELETE SET NULL,
|
|
agent_type TEXT NOT NULL,
|
|
level TEXT NOT NULL DEFAULT 'info', -- debug | info | warning | error
|
|
message TEXT NOT NULL,
|
|
metadata JSONB NOT NULL DEFAULT '{}',
|
|
created_at TIMESTAMPTZ NOT NULL DEFAULT now()
|
|
);
|
|
|
|
CREATE INDEX IF NOT EXISTS idx_agent_logs_job_id ON agent_logs (job_id);
|
|
CREATE INDEX IF NOT EXISTS idx_agent_logs_created ON agent_logs (created_at DESC);
|
|
CREATE INDEX IF NOT EXISTS idx_agent_logs_level ON agent_logs (level);
|
|
|
|
-- ---------------------------------------------------------------------------
|
|
-- SYSTEM CONFIG
|
|
-- Runtime key-value configuration, editable by agents and admins.
|
|
-- ---------------------------------------------------------------------------
|
|
CREATE TABLE IF NOT EXISTS system_config (
|
|
key TEXT PRIMARY KEY,
|
|
value JSONB NOT NULL,
|
|
description TEXT,
|
|
updated_at TIMESTAMPTZ NOT NULL DEFAULT now()
|
|
);
|
|
|
|
-- Seed default configuration
|
|
INSERT INTO system_config (key, value, description) VALUES
|
|
('embedding_model', '"nomic-embed-text"', 'Ollama model for embeddings'),
|
|
('chat_model', '"mistral"', 'Ollama model for chat/generation'),
|
|
('chunk_size', '700', 'Target tokens per chunk'),
|
|
('chunk_overlap', '70', 'Overlap tokens between chunks'),
|
|
('search_top_k', '10', 'Default number of search results'),
|
|
('search_threshold', '0.65', 'Minimum cosine similarity score'),
|
|
('rerank_enabled', 'false', 'Enable cross-encoder reranking'),
|
|
('auto_tag', 'true', 'Auto-tag documents via LLM'),
|
|
('auto_summarize', 'true', 'Auto-summarize long documents')
|
|
ON CONFLICT (key) DO NOTHING;
|