#!/usr/bin/env bash # ============================================================================= # ALPHA_PROJECT — Qdrant — Setup collections e payload indexes # ============================================================================= # Collections già create il 2026-03-21. Script conservato per tracciabilità # e disaster recovery (da eseguire su un'istanza Qdrant vuota). # # Prerequisiti: # sudo microk8s kubectl port-forward svc/qdrant -n persistence 6333:6333 # # Esecuzione: # bash alpha/db/qdrant.sh # ============================================================================= set -euo pipefail QDRANT_URL="${QDRANT_URL:-http://localhost:6333}" QDRANT_API_KEY="${QDRANT_API_KEY:-__Montecarlo00!}" # Dimensione vettori: 1536 = text-embedding-3-small (Copilot, bootstrap phase) # Da aggiornare a 768 alla migrazione verso nomic-embed-text su Ollama VECTOR_SIZE=1536 header_key="api-key: ${QDRANT_API_KEY}" echo "==> Connessione a ${QDRANT_URL}" curl -sf "${QDRANT_URL}/" -H "${header_key}" | grep -o '"version":"[^"]*"' echo "" # ----------------------------------------------------------------------------- # Collections # Architettura multi-tenant: isolamento via campo user_id nel payload. # Valori user_id: "martin" | "shared" | # ----------------------------------------------------------------------------- for COL in episodes knowledge preferences; do echo "==> Creazione collection: ${COL}" curl -sf -X PUT "${QDRANT_URL}/collections/${COL}" \ -H "${header_key}" \ -H "Content-Type: application/json" \ -d "{ \"vectors\": { \"size\": ${VECTOR_SIZE}, \"distance\": \"Cosine\" }, \"optimizers_config\": { \"default_segment_number\": 2 }, \"replication_factor\": 1 }" | grep -o '"status":"[^"]*"' done echo "" # ----------------------------------------------------------------------------- # Payload indexes (per pre-filtering efficiente prima della ricerca vettoriale) # ----------------------------------------------------------------------------- for COL in episodes knowledge preferences; do echo "==> Indexes per collection: ${COL}" for FIELD in user_id source category; do printf " %-20s (keyword) → " "${FIELD}" curl -sf -X PUT "${QDRANT_URL}/collections/${COL}/index" \ -H "${header_key}" \ -H "Content-Type: application/json" \ -d "{\"field_name\": \"${FIELD}\", \"field_schema\": \"keyword\"}" \ | grep -o '"status":"[^"]*"' done printf " %-20s (datetime) → " "date" curl -sf -X PUT "${QDRANT_URL}/collections/${COL}/index" \ -H "${header_key}" \ -H "Content-Type: application/json" \ -d '{"field_name": "date", "field_schema": "datetime"}' \ | grep -o '"status":"[^"]*"' printf " %-20s (bool) → " "action_required" curl -sf -X PUT "${QDRANT_URL}/collections/${COL}/index" \ -H "${header_key}" \ -H "Content-Type: application/json" \ -d '{"field_name": "action_required", "field_schema": "bool"}' \ | grep -o '"status":"[^"]*"' done echo "" # ----------------------------------------------------------------------------- # Verifica finale # ----------------------------------------------------------------------------- echo "==> Collections attive:" curl -sf "${QDRANT_URL}/collections" -H "${header_key}" \ | python3 -c "import sys,json; [print(' -', c['name']) for c in json.load(sys.stdin)['result']['collections']]" echo "" echo "✅ Setup Qdrant completato." echo " Collections: episodes, knowledge, preferences" echo " Payload indexes: user_id, source, category, date, action_required"