Testing Pinecone, Weaviate, and Qdrant Integrations
Vector databases are infrastructure. Like any database, they can be misconfigured, queried incorrectly, or updated in ways that corrupt your index. Unlike relational databases, the failure modes are subtle: incorrect metadata filters silently narrow results, wrong namespace routing returns data from a different index, stale vectors from deleted documents still appear in search results.
Testing your vector database integration is not optional if your application depends on semantic search or RAG.
What to Test for Vector Database Integrations
- Upsert correctness — vectors are stored with correct IDs, metadata, and values
- Query accuracy — similarity search returns correct top-K results for known vectors
- Namespace/collection isolation — queries don't bleed across namespaces or tenants
- Metadata filtering — filters correctly narrow results
- Delete propagation — deleted vectors no longer appear in results
- Index freshness — recently upserted vectors appear in subsequent queries
Testing Pinecone Integration
Setting Up a Test Index
Use a separate Pinecone index for tests, or a separate namespace within your development index:
import pytest
import numpy as np
from pinecone import Pinecone
@pytest.fixture(scope="session")
def pinecone_test_index():
pc = Pinecone(api_key=os.environ["PINECONE_TEST_API_KEY"])
index_name = "test-index"
if index_name not in pc.list_indexes().names():
pc.create_index(
name=index_name,
dimension=1536,
metric="cosine",
spec=ServerlessSpec(cloud="aws", region="us-east-1"),
)
index = pc.Index(index_name)
yield index
# Cleanup after all tests
index.delete(delete_all=True, namespace="test")
@pytest.fixture(autouse=True)
def cleanup_test_namespace(pinecone_test_index):
yield
pinecone_test_index.delete(delete_all=True, namespace="test")Testing Upsert and Query
def make_random_vector(dim=1536):
v = np.random.randn(dim).astype(np.float32)
return (v / np.linalg.norm(v)).tolist()
def test_pinecone_upsert_and_query(pinecone_test_index):
# Create a known vector
known_vector = make_random_vector()
pinecone_test_index.upsert(
vectors=[
{"id": "doc-1", "values": known_vector, "metadata": {"source": "test", "category": "A"}},
],
namespace="test"
)
import time; time.sleep(1) # Wait for indexing
# Query with the same vector — should return itself as top result
results = pinecone_test_index.query(
vector=known_vector,
top_k=1,
namespace="test",
include_metadata=True,
)
assert len(results.matches) == 1
assert results.matches[0].id == "doc-1"
assert results.matches[0].score > 0.999 # nearly identical vector
assert results.matches[0].metadata["category"] == "A"
def test_pinecone_metadata_filter(pinecone_test_index):
vec_a = make_random_vector()
vec_b = make_random_vector()
pinecone_test_index.upsert(
vectors=[
{"id": "cat-a-1", "values": vec_a, "metadata": {"category": "A"}},
{"id": "cat-b-1", "values": vec_b, "metadata": {"category": "B"}},
],
namespace="test"
)
import time; time.sleep(1)
# Query with filter — should only return category A
results = pinecone_test_index.query(
vector=vec_a,
top_k=10,
namespace="test",
filter={"category": {"$eq": "A"}},
include_metadata=True,
)
returned_ids = [m.id for m in results.matches]
assert "cat-a-1" in returned_ids
assert "cat-b-1" not in returned_ids
def test_pinecone_namespace_isolation(pinecone_test_index):
"""Verify namespace A and namespace B don't share results."""
vec = make_random_vector()
pinecone_test_index.upsert(
vectors=[{"id": "shared-id", "values": vec, "metadata": {"ns": "ns-a"}}],
namespace="test-ns-a"
)
import time; time.sleep(1)
# Query in different namespace — should not find the vector
results = pinecone_test_index.query(
vector=vec,
top_k=5,
namespace="test-ns-b", # different namespace
)
assert len(results.matches) == 0
def test_pinecone_delete_propagation(pinecone_test_index):
vec = make_random_vector()
pinecone_test_index.upsert(
vectors=[{"id": "to-delete", "values": vec}],
namespace="test"
)
import time; time.sleep(1)
pinecone_test_index.delete(ids=["to-delete"], namespace="test")
time.sleep(1)
results = pinecone_test_index.query(
vector=vec, top_k=5, namespace="test"
)
returned_ids = [m.id for m in results.matches]
assert "to-delete" not in returned_idsTesting Weaviate Integration
Weaviate's schema-based approach requires testing class creation and object insertion:
import weaviate
import pytest
@pytest.fixture(scope="session")
def weaviate_client():
client = weaviate.connect_to_local(
host=os.environ.get("WEAVIATE_TEST_HOST", "localhost"),
port=int(os.environ.get("WEAVIATE_TEST_PORT", 8080)),
)
# Create test class
if not client.collections.exists("TestDocument"):
client.collections.create(
name="TestDocument",
vectorizer_config=weaviate.config.Configure.Vectorizer.none(),
properties=[
weaviate.classes.config.Property(name="content", data_type=weaviate.classes.config.DataType.TEXT),
weaviate.classes.config.Property(name="category", data_type=weaviate.classes.config.DataType.TEXT),
]
)
yield client
client.collections.delete("TestDocument")
client.close()
def test_weaviate_insert_and_search(weaviate_client):
collection = weaviate_client.collections.get("TestDocument")
# Insert with known vector
known_vector = make_random_vector()
uuid = collection.data.insert(
properties={"content": "Test document content", "category": "tech"},
vector=known_vector,
)
# Near-vector search
results = collection.query.near_vector(
near_vector=known_vector,
limit=1,
return_metadata=weaviate.classes.query.MetadataQuery(certainty=True),
)
assert len(results.objects) == 1
assert str(results.objects[0].uuid) == str(uuid)
assert results.objects[0].metadata.certainty > 0.99
def test_weaviate_where_filter(weaviate_client):
collection = weaviate_client.collections.get("TestDocument")
vec_tech = make_random_vector()
vec_science = make_random_vector()
collection.data.insert(
properties={"content": "Tech article", "category": "tech"},
vector=vec_tech,
)
collection.data.insert(
properties={"content": "Science article", "category": "science"},
vector=vec_tech, # same vector — only filter should distinguish
)
results = collection.query.near_vector(
near_vector=vec_tech,
limit=10,
filters=weaviate.classes.query.Filter.by_property("category").equal("tech"),
return_properties=["content", "category"],
)
categories = [obj.properties["category"] for obj in results.objects]
assert all(c == "tech" for c in categories)
assert "science" not in categoriesTesting Qdrant Integration
Qdrant uses collections with explicit vector configuration:
from qdrant_client import QdrantClient
from qdrant_client.models import Distance, VectorParams, PointStruct, Filter, FieldCondition, MatchValue
@pytest.fixture(scope="session")
def qdrant_client():
client = QdrantClient(
host=os.environ.get("QDRANT_TEST_HOST", "localhost"),
port=int(os.environ.get("QDRANT_TEST_PORT", 6333)),
)
collection_name = "test_documents"
if not client.collection_exists(collection_name):
client.create_collection(
collection_name=collection_name,
vectors_config=VectorParams(size=1536, distance=Distance.COSINE),
)
yield client, collection_name
client.delete_collection(collection_name)
def test_qdrant_upsert_and_search(qdrant_client):
client, collection = qdrant_client
known_vector = make_random_vector()
client.upsert(
collection_name=collection,
points=[
PointStruct(
id=1,
vector=known_vector,
payload={"content": "Test document", "category": "A"},
)
],
)
results = client.search(
collection_name=collection,
query_vector=known_vector,
limit=1,
with_payload=True,
)
assert len(results) == 1
assert results[0].id == 1
assert results[0].score > 0.999
assert results[0].payload["category"] == "A"
def test_qdrant_payload_filter(qdrant_client):
client, collection = qdrant_client
vec = make_random_vector()
client.upsert(
collection_name=collection,
points=[
PointStruct(id=10, vector=make_random_vector(), payload={"category": "A"}),
PointStruct(id=11, vector=make_random_vector(), payload={"category": "B"}),
PointStruct(id=12, vector=vec, payload={"category": "A"}),
]
)
results = client.search(
collection_name=collection,
query_vector=vec,
limit=10,
query_filter=Filter(
must=[FieldCondition(key="category", match=MatchValue(value="A"))]
),
with_payload=True,
)
ids = [r.id for r in results]
assert 11 not in ids # category B should be filtered out
assert all(r.payload["category"] == "A" for r in results)Testing Your Application-Level Abstraction
If your application wraps the vector DB client in a service layer, test that abstraction:
from your_app.vector_store import VectorStore
def test_vector_store_add_and_search():
store = VectorStore(backend="qdrant", collection="test")
doc_id = store.add_document(
text="PostgreSQL is an open-source relational database",
metadata={"source": "docs", "doc_type": "database"}
)
results = store.search(
query="open source relational database",
top_k=5,
filter={"doc_type": "database"},
)
assert len(results) > 0
assert results[0].doc_id == doc_id
assert results[0].score > 0.7
assert results[0].metadata["source"] == "docs"
def test_vector_store_delete_removes_from_search():
store = VectorStore(backend="qdrant", collection="test")
doc_id = store.add_document("Temporary document to be deleted", metadata={})
store.delete_document(doc_id)
results = store.search("Temporary document to be deleted", top_k=5)
returned_ids = [r.doc_id for r in results]
assert doc_id not in returned_idsKey Takeaways
- Always use a separate test index or namespace — never run tests against your production index
- Test upsert-then-query immediately to catch indexing latency issues
- Test metadata filters explicitly — they are a common source of silent bugs
- Test namespace/collection isolation to catch multi-tenant routing errors
- Test delete propagation — deleted vectors must not appear in subsequent queries
- Test your application-layer abstraction, not just the raw client — that's where application bugs live