Fix indexer: handle date serialization and pgvector format

main
Clawd 3 weeks ago
parent 90961cb62e
commit 773199bcb6

@ -7,6 +7,7 @@ from __future__ import annotations
import hashlib
import json
import logging
from datetime import date, datetime
from typing import Any
import asyncpg
@ -21,6 +22,14 @@ def sha256(text: str) -> str:
return hashlib.sha256(text.encode('utf-8')).hexdigest()
class DateTimeEncoder(json.JSONEncoder):
"""JSON encoder that handles date/datetime objects."""
def default(self, obj):
if isinstance(obj, (date, datetime)):
return obj.isoformat()
return super().default(obj)
async def upsert_document(
conn: asyncpg.Connection,
doc: ParsedDocument,
@ -56,7 +65,7 @@ async def upsert_document(
doc.title,
doc.content_raw,
content_hash,
json.dumps(doc.frontmatter),
json.dumps(doc.frontmatter, cls=DateTimeEncoder),
doc.tags,
doc.aliases,
doc.word_count,
@ -71,12 +80,14 @@ async def upsert_document(
# ---- Insert chunks + embeddings ----
chunk_records = []
for chunk, embedding in zip(chunks, embeddings):
# Convert embedding list to pgvector string format
embedding_str = '[' + ','.join(str(x) for x in embedding) + ']'
chunk_records.append((
row['id'],
chunk.chunk_index,
chunk.content,
chunk.token_count,
embedding,
embedding_str,
json.dumps(chunk.metadata),
))

Loading…
Cancel
Save

Powered by TurnKey Linux.