Pinecone Integration Testing: Index Operations, Namespaces, and Validation

Pinecone Integration Testing: Index Operations, Namespaces, and Validation

Pinecone is a managed vector database used in production RAG systems, semantic search, and recommendation engines. Testing Pinecone integrations differs from testing local databases — you're working with a remote, managed service, which introduces network latency, eventual consistency, and cost considerations.

This guide covers integration testing strategies for Pinecone: index operations, namespace isolation, upsert/query validation, and how to structure tests for both serverless and pod-based indexes.

Testing Strategy for Pinecone

Pinecone tests fall into three categories:

1. Unit tests (no Pinecone calls): Test your application code that builds queries, processes results, and handles errors. Use mocks.

2. Integration tests (Pinecone test index): Test against a dedicated test index in Pinecone. These are slower (100-500ms per operation) and cost real money, but verify that your code works with the actual Pinecone API.

3. Smoke tests (production index): Minimal tests against the real index to verify deployment health. Very few, read-only.

Structure your CI to run unit tests on every commit, integration tests on PRs and scheduled runs, and smoke tests after deploy.

Setup

pip install pinecone-client pytest pytest-asyncio python-dotenv
# tests/conftest.py
import pytest
import os
import time
from pinecone import Pinecone, ServerlessSpec

@pytest.fixture(scope="session")
def pc():
    """Pinecone client for the test session."""
    api_key = os.environ.get("PINECONE_TEST_API_KEY")
    if not api_key:
        pytest.skip("PINECONE_TEST_API_KEY not set — skipping Pinecone integration tests")
    return Pinecone(api_key=api_key)

@pytest.fixture(scope="session")
def test_index_name():
    return f"integration-test-{int(time.time())}"

@pytest.fixture(scope="session")
def test_index(pc, test_index_name):
    """Create a test index, yield it, delete it after all tests."""
    pc.create_index(
        name=test_index_name,
        dimension=384,
        metric="cosine",
        spec=ServerlessSpec(cloud="aws", region="us-east-1"),
    )
    
    # Wait for index to be ready
    while not pc.describe_index(test_index_name).status["ready"]:
        time.sleep(1)
    
    index = pc.Index(test_index_name)
    yield index
    
    # Cleanup after all tests complete
    pc.delete_index(test_index_name)

Testing Index Operations

# tests/pinecone/test_index_operations.py
import pytest
import time
from pinecone import Pinecone, ServerlessSpec, PodSpec

class TestIndexLifecycle:
    def test_create_and_describe_serverless_index(self, pc):
        """Serverless index should be created with correct configuration."""
        index_name = f"test-serverless-{int(time.time())}"
        
        pc.create_index(
            name=index_name,
            dimension=256,
            metric="dotproduct",
            spec=ServerlessSpec(cloud="aws", region="us-east-1"),
        )
        
        try:
            # Wait for ready state
            for _ in range(30):
                desc = pc.describe_index(index_name)
                if desc.status["ready"]:
                    break
                time.sleep(2)
            
            assert desc.dimension == 256
            assert desc.metric == "dotproduct"
            assert "serverless" in str(desc.spec).lower()
        finally:
            pc.delete_index(index_name)

    def test_list_indexes_includes_new_index(self, pc, test_index_name):
        """Newly created index should appear in list_indexes."""
        index_list = pc.list_indexes()
        index_names = [idx.name for idx in index_list]
        
        assert test_index_name in index_names

    def test_describe_index_stats(self, test_index):
        """describe_index_stats should return correct dimension count."""
        stats = test_index.describe_index_stats()
        
        assert stats.dimension == 384
        assert isinstance(stats.total_vector_count, int)
        assert stats.total_vector_count >= 0

Testing Upsert Operations

# tests/pinecone/test_upsert.py
import pytest
import time
import numpy as np

def random_vector(dim=384) -> list[float]:
    vec = np.random.randn(dim).astype(np.float32)
    return (vec / np.linalg.norm(vec)).tolist()

def wait_for_upsert(index, namespace: str = "", expected_count: int = 1, timeout: int = 30):
    """Poll until upserted vectors are available (eventual consistency)."""
    deadline = time.time() + timeout
    while time.time() < deadline:
        stats = index.describe_index_stats()
        ns_stats = stats.namespaces.get(namespace, {})
        count = getattr(ns_stats, 'vector_count', 0)
        if count >= expected_count:
            return
        time.sleep(1)
    raise TimeoutError(f"Timed out waiting for {expected_count} vectors in namespace '{namespace}'")

class TestUpsertOperations:
    def test_upsert_single_vector(self, test_index):
        """Single vector upsert should succeed."""
        vector_id = f"single-{int(time.time())}"
        
        response = test_index.upsert(
            vectors=[{"id": vector_id, "values": random_vector()}],
            namespace="test-upsert",
        )
        
        assert response.upserted_count == 1

    def test_upsert_with_metadata(self, test_index):
        """Metadata should be stored alongside vector values."""
        vector_id = f"meta-{int(time.time())}"
        metadata = {"source": "test", "category": "integration", "score": 0.95}
        
        test_index.upsert(
            vectors=[{
                "id": vector_id,
                "values": random_vector(),
                "metadata": metadata,
            }],
            namespace="test-metadata",
        )
        
        wait_for_upsert(test_index, namespace="test-metadata", expected_count=1)
        
        # Fetch and verify metadata
        result = test_index.fetch(ids=[vector_id], namespace="test-metadata")
        stored = result.vectors[vector_id]
        
        assert stored.metadata["source"] == "test"
        assert stored.metadata["category"] == "integration"
        assert abs(stored.metadata["score"] - 0.95) < 0.001

    def test_batch_upsert(self, test_index):
        """Batch upsert should handle multiple vectors efficiently."""
        prefix = f"batch-{int(time.time())}"
        vectors = [
            {"id": f"{prefix}-{i}", "values": random_vector()}
            for i in range(50)
        ]
        
        response = test_index.upsert(
            vectors=vectors,
            namespace="test-batch",
        )
        
        assert response.upserted_count == 50

    def test_upsert_updates_existing_vector(self, test_index):
        """Upserting with an existing ID should replace the vector."""
        vector_id = f"update-{int(time.time())}"
        original = random_vector()
        updated = random_vector()
        
        test_index.upsert(
            vectors=[{"id": vector_id, "values": original}],
            namespace="test-update",
        )
        wait_for_upsert(test_index, "test-update", 1)
        
        test_index.upsert(
            vectors=[{"id": vector_id, "values": updated}],
            namespace="test-update",
        )
        
        result = test_index.fetch(ids=[vector_id], namespace="test-update")
        fetched = result.vectors[vector_id].values
        
        # Fetched vector should match updated, not original
        assert not np.allclose(fetched, original, atol=1e-4)
        assert np.allclose(fetched, updated, atol=1e-4)

Testing Namespace Isolation

Namespaces are Pinecone's mechanism for logical data separation. Incorrect namespace configuration is a common production bug:

# tests/pinecone/test_namespaces.py
import pytest
import time

class TestNamespaceIsolation:
    def test_vectors_isolated_by_namespace(self, test_index):
        """Vectors in namespace A should not appear in queries to namespace B."""
        prefix = f"iso-{int(time.time())}"
        vec = random_vector()
        
        # Upsert to namespace A
        test_index.upsert(
            vectors=[{"id": f"{prefix}-vec", "values": vec}],
            namespace="namespace-a",
        )
        wait_for_upsert(test_index, "namespace-a", 1)
        
        # Query namespace B using the same vector — should get 0 results
        results = test_index.query(
            vector=vec,
            top_k=5,
            namespace="namespace-b",
            include_metadata=False,
        )
        
        result_ids = [m["id"] for m in results["matches"]]
        assert f"{prefix}-vec" not in result_ids, (
            "Vector from namespace-a leaked into namespace-b query"
        )

    def test_query_default_namespace_does_not_include_named_namespace(self, test_index):
        """The default (empty string) namespace should be separate from named namespaces."""
        prefix = f"ns-test-{int(time.time())}"
        vec = random_vector()
        
        test_index.upsert(
            vectors=[{"id": f"{prefix}-named", "values": vec}],
            namespace="custom-namespace",
        )
        wait_for_upsert(test_index, "custom-namespace", 1)
        
        # Query without specifying namespace (default)
        results = test_index.query(
            vector=vec,
            top_k=5,
            namespace="",  # Default namespace
        )
        
        result_ids = [m["id"] for m in results["matches"]]
        assert f"{prefix}-named" not in result_ids

    def test_delete_from_specific_namespace(self, test_index):
        """Delete should only affect the specified namespace."""
        prefix = f"del-ns-{int(time.time())}"
        vec = random_vector()
        
        # Add to two namespaces
        test_index.upsert(
            vectors=[{"id": f"{prefix}", "values": vec}],
            namespace="ns-delete",
        )
        test_index.upsert(
            vectors=[{"id": f"{prefix}", "values": vec}],
            namespace="ns-keep",
        )
        
        # Delete from ns-delete
        test_index.delete(ids=[f"{prefix}"], namespace="ns-delete")
        time.sleep(2)  # Allow delete to propagate
        
        # Fetch from ns-keep — should still exist
        result = test_index.fetch(ids=[f"{prefix}"], namespace="ns-keep")
        assert f"{prefix}" in result.vectors

    def test_namespace_stats_tracked_separately(self, test_index):
        """Each namespace should have its own vector count in stats."""
        prefix = f"stats-{int(time.time())}"
        
        test_index.upsert(
            vectors=[
                {"id": f"{prefix}-1", "values": random_vector()},
                {"id": f"{prefix}-2", "values": random_vector()},
            ],
            namespace="stats-ns",
        )
        wait_for_upsert(test_index, "stats-ns", 2)
        
        stats = test_index.describe_index_stats()
        
        # The stats-ns namespace should have at least our 2 vectors
        assert "stats-ns" in stats.namespaces
        assert stats.namespaces["stats-ns"].vector_count >= 2

Testing Query and Result Validation

# tests/pinecone/test_query.py
import pytest

class TestQueryBehavior:
    def test_query_returns_top_k_results(self, test_index, populated_namespace):
        """Query should return exactly k results when k results are available."""
        query_vec = random_vector()
        
        results = test_index.query(
            vector=query_vec,
            top_k=5,
            namespace=populated_namespace,
        )
        
        assert len(results["matches"]) == 5

    def test_scores_are_in_descending_order(self, test_index, populated_namespace):
        """Results should be sorted by similarity score descending."""
        results = test_index.query(
            vector=random_vector(),
            top_k=10,
            namespace=populated_namespace,
        )
        
        scores = [m["score"] for m in results["matches"]]
        assert scores == sorted(scores, reverse=True), (
            f"Scores not in descending order: {scores}"
        )

    def test_cosine_scores_between_minus_one_and_one(self, test_index, populated_namespace):
        """Cosine similarity scores should be in [-1, 1] for normalized vectors."""
        results = test_index.query(
            vector=random_vector(),
            top_k=10,
            namespace=populated_namespace,
        )
        
        for match in results["matches"]:
            assert -1.0 <= match["score"] <= 1.0, (
                f"Score {match['score']} out of [-1, 1] range for {match['id']}"
            )

    def test_metadata_filter_narrows_results(self, test_index, populated_namespace):
        """Metadata filter should exclude non-matching documents."""
        # Pre-condition: populated_namespace has docs with 'category' metadata
        results = test_index.query(
            vector=random_vector(),
            top_k=10,
            namespace=populated_namespace,
            filter={"category": {"$eq": "testing"}},
            include_metadata=True,
        )
        
        for match in results["matches"]:
            assert match["metadata"]["category"] == "testing", (
                f"Metadata filter leaked non-testing doc: {match['id']}"
            )

    def test_include_values_returns_vector_data(self, test_index, populated_namespace):
        """include_values=True should return the stored vector for each match."""
        results = test_index.query(
            vector=random_vector(),
            top_k=3,
            namespace=populated_namespace,
            include_values=True,
        )
        
        for match in results["matches"]:
            assert match.get("values") is not None
            assert len(match["values"]) == 384  # Our test index dimension

Mocking Pinecone for Unit Tests

For fast unit tests that don't hit the Pinecone API:

# tests/unit/test_rag_service.py
import pytest
from unittest.mock import MagicMock, patch
from app.rag_service import RagService

@pytest.fixture
def mock_pinecone_index():
    index = MagicMock()
    index.query.return_value = {
        "matches": [
            {"id": "doc-1", "score": 0.92, "metadata": {"text": "HelpMeTest uses Robot Framework"}},
            {"id": "doc-2", "score": 0.85, "metadata": {"text": "Playwright is used for browser automation"}},
        ]
    }
    return index

class TestRagService:
    def test_retrieves_and_formats_context(self, mock_pinecone_index):
        service = RagService(index=mock_pinecone_index, top_k=2)
        
        context = service.get_context("What testing frameworks does HelpMeTest use?")
        
        assert "Robot Framework" in context
        assert "Playwright" in context
        mock_pinecone_index.query.assert_called_once()

    def test_applies_score_threshold(self, mock_pinecone_index):
        """Results below threshold should be filtered out."""
        mock_pinecone_index.query.return_value = {
            "matches": [
                {"id": "doc-1", "score": 0.92, "metadata": {"text": "Relevant"}},
                {"id": "doc-2", "score": 0.45, "metadata": {"text": "Barely relevant"}},
            ]
        }
        
        service = RagService(index=mock_pinecone_index, score_threshold=0.7)
        context = service.get_context("test query")
        
        assert "Relevant" in context
        assert "Barely relevant" not in context

Summary

Pinecone integration testing requires handling eventual consistency, namespace isolation, and the cost of real API calls:

  • Separate unit tests from integration tests — use mocks for unit tests, a dedicated test index for integration
  • Handle eventual consistency — upserted vectors aren't immediately queryable; poll with describe_index_stats
  • Test namespace isolation explicitly — it's a common source of production bugs
  • Validate result ordering and score ranges — don't assume the API always returns sorted results
  • Clean up test indexes — use session-scoped fixtures with cleanup to avoid orphaned indexes
  • Skip integration tests when API key is absent — don't block CI if credentials aren't available

With this structure, your Pinecone integration tests run reliably in CI and catch real configuration bugs before they reach production.

Read more