Gemini API Testing Guide: Test Google AI with Python
Google's Gemini API powers everything from simple text generation to multimodal applications that process images, audio, and documents. Testing Gemini-powered apps requires handling non-deterministic outputs, managing API costs, and validating complex multimodal behaviors. Here's a complete testing framework.
Gemini SDK Overview
Google provides two ways to use Gemini:
google-generativeai— Python SDK for direct Gemini access- Vertex AI SDK — Enterprise-grade, GCP-integrated
This guide covers both. Install:
pip install google-generativeai google-cloud-aiplatform pytest pytest-asyncioProject Structure
gemini_app/
├── src/
│ ├── gemini_client.py # Wrapper around google-generativeai
│ └── vertex_client.py # Vertex AI alternative
├── tests/
│ ├── conftest.py
│ ├── test_unit.py
│ ├── test_integration.py
│ └── test_multimodal.py
└── .env.env:
GOOGLE_API_KEY=AIza...
GEMINI_MODEL=gemini-1.5-flash
VERTEX_PROJECT=my-gcp-project
VERTEX_LOCATION=us-central1The Gemini Client
# src/gemini_client.py
import google.generativeai as genai
import os
class GeminiClient:
def __init__(self):
genai.configure(api_key=os.environ["GOOGLE_API_KEY"])
self.model = genai.GenerativeModel(
model_name=os.environ.get("GEMINI_MODEL", "gemini-1.5-flash"),
generation_config=genai.GenerationConfig(
temperature=0.7,
max_output_tokens=2048,
)
)
self.total_tokens = 0
def generate(self, prompt: str) -> str:
if not prompt.strip():
raise ValueError("Prompt cannot be empty")
response = self.model.generate_content(prompt)
if response.prompt_feedback.block_reason:
raise ValueError(f"Prompt blocked: {response.prompt_feedback.block_reason}")
self.total_tokens += response.usage_metadata.total_token_count
return response.text
def generate_with_image(self, prompt: str, image_path: str) -> str:
import PIL.Image
img = PIL.Image.open(image_path)
response = self.model.generate_content([prompt, img])
return response.text
def start_chat(self):
return self.model.start_chat(history=[])Unit Tests with Mocks
Gemini's SDK structure requires careful mocking. The key objects to mock:
# tests/conftest.py
import pytest
from unittest.mock import MagicMock, patch
def make_gemini_response(text: str, blocked: bool = False):
"""Factory for mock Gemini response objects."""
response = MagicMock()
response.text = text
response.prompt_feedback = MagicMock()
response.prompt_feedback.block_reason = None if not blocked else "SAFETY"
response.usage_metadata = MagicMock()
response.usage_metadata.total_token_count = 150
response.usage_metadata.prompt_token_count = 50
response.usage_metadata.candidates_token_count = 100
return response
@pytest.fixture
def mock_gemini(monkeypatch):
with patch("src.gemini_client.genai") as mock_genai:
mock_model = MagicMock()
mock_genai.GenerativeModel.return_value = mock_model
mock_model.generate_content.return_value = make_gemini_response("Test response")
yield mock_model# tests/test_unit.py
import pytest
from src.gemini_client import GeminiClient
class TestGeminiClient:
def test_generate_returns_text(self, mock_gemini):
client = GeminiClient()
result = client.generate("Explain unit testing")
assert isinstance(result, str)
assert len(result) > 0
def test_empty_prompt_raises(self, mock_gemini):
client = GeminiClient()
with pytest.raises(ValueError, match="cannot be empty"):
client.generate("")
def test_blocked_prompt_raises(self, mock_gemini):
from tests.conftest import make_gemini_response
mock_gemini.generate_content.return_value = make_gemini_response("", blocked=True)
client = GeminiClient()
with pytest.raises(ValueError, match="Prompt blocked"):
client.generate("Problematic content")
def test_tracks_token_usage(self, mock_gemini):
client = GeminiClient()
assert client.total_tokens == 0
client.generate("Hello")
assert client.total_tokens == 150
def test_accumulates_tokens_across_calls(self, mock_gemini):
client = GeminiClient()
client.generate("First call")
client.generate("Second call")
assert client.total_tokens == 300
def test_model_configured_from_env(self, mock_gemini, monkeypatch):
monkeypatch.setenv("GEMINI_MODEL", "gemini-1.5-pro")
with patch("src.gemini_client.genai") as mock_genai:
mock_genai.GenerativeModel.return_value = MagicMock()
client = GeminiClient()
call_args = mock_genai.GenerativeModel.call_args
assert call_args.kwargs["model_name"] == "gemini-1.5-pro"Testing Chat Sessions
Gemini's multi-turn chat requires testing that conversation history is maintained:
# tests/test_chat.py
import pytest
from unittest.mock import MagicMock, call
from src.gemini_client import GeminiClient
class TestChatSession:
def test_chat_sends_messages_sequentially(self, mock_gemini):
mock_chat = MagicMock()
mock_gemini.start_chat.return_value = mock_chat
mock_chat.send_message.side_effect = [
MagicMock(text="I'm Gemini, a helpful AI assistant."),
MagicMock(text="Sure, the capital of France is Paris."),
]
client = GeminiClient()
chat = client.start_chat()
response1 = chat.send_message("Who are you?")
response2 = chat.send_message("What's the capital of France?")
assert "Gemini" in response1.text or "AI" in response1.text
assert "Paris" in response2.text
assert mock_chat.send_message.call_count == 2
def test_chat_history_preserved(self, mock_gemini):
"""Verify that history parameter is passed correctly."""
history = [
{"role": "user", "parts": ["Hello"]},
{"role": "model", "parts": ["Hi there!"]}
]
client = GeminiClient()
mock_gemini.start_chat.return_value = MagicMock()
mock_gemini.start_chat(history=history)
call_kwargs = mock_gemini.start_chat.call_args.kwargs
assert call_kwargs["history"] == historyIntegration Tests
# tests/test_integration.py
import pytest
import google.generativeai as genai
import os
import json
pytestmark = pytest.mark.skipif(
os.environ.get("RUN_INTEGRATION_TESTS") != "true",
reason="Set RUN_INTEGRATION_TESTS=true to enable"
)
@pytest.fixture(scope="session")
def gemini_model():
genai.configure(api_key=os.environ["GOOGLE_API_KEY"])
return genai.GenerativeModel("gemini-1.5-flash")
class TestGeminiIntegration:
def test_simple_generation(self, gemini_model):
response = gemini_model.generate_content(
"Reply with the single word: testing"
)
assert "testing" in response.text.lower()
def test_json_structured_output(self, gemini_model):
response = gemini_model.generate_content(
'Return ONLY valid JSON: {"language": "Python", "version": 3}',
generation_config=genai.GenerationConfig(
response_mime_type="application/json"
)
)
data = json.loads(response.text)
assert data["language"] == "Python"
assert data["version"] == 3
def test_safety_filters_active(self, gemini_model):
"""Verify safety filters respond to problematic content."""
response = gemini_model.generate_content(
"What is 2 + 2?",
safety_settings={
"HARASSMENT": "BLOCK_NONE",
"HATE_SPEECH": "BLOCK_NONE"
}
)
# Benign content should never be blocked
assert response.text is not None
assert "4" in response.text
def test_token_counting(self, gemini_model):
text = "Hello, how does token counting work in Gemini?"
token_count = gemini_model.count_tokens(text)
assert token_count.total_tokens > 0
assert token_count.total_tokens < 50 # Simple sentence, few tokens
def test_multi_turn_conversation(self, gemini_model):
chat = gemini_model.start_chat()
r1 = chat.send_message("My name is Alex. Remember it.")
r2 = chat.send_message("What's my name?")
assert "Alex" in r2.text
def test_long_context_window(self, gemini_model):
"""Gemini 1.5 Flash supports 1M token context."""
# Test with a moderately long input
long_text = "The quick brown fox jumps over the lazy dog. " * 500
response = gemini_model.generate_content(
f"How many words does this text start with? Just give the number.\n\n{long_text[:500]}"
)
assert response.text is not NoneTesting Multimodal Features
Gemini's image understanding requires specialized tests:
# tests/test_multimodal.py
import pytest
import io
import os
from unittest.mock import MagicMock, patch
from PIL import Image
@pytest.fixture
def sample_image():
"""Create a simple test image in memory."""
img = Image.new("RGB", (100, 100), color=(255, 0, 0)) # Red square
img_bytes = io.BytesIO()
img.save(img_bytes, format="PNG")
img_bytes.seek(0)
return img_bytes
class TestMultimodalProcessing:
def test_image_description_mock(self, mock_gemini, sample_image):
mock_gemini.generate_content.return_value = MagicMock(
text="A red square on a white background.",
prompt_feedback=MagicMock(block_reason=None),
usage_metadata=MagicMock(total_token_count=100)
)
from src.gemini_client import GeminiClient
client = GeminiClient()
with patch("PIL.Image.open", return_value=Image.open(sample_image)):
result = client.generate_with_image("Describe this image", "test.png")
assert isinstance(result, str)
assert len(result) > 0
@pytest.mark.skipif(
os.environ.get("RUN_INTEGRATION_TESTS") != "true",
reason="Integration test"
)
def test_image_description_integration(self, sample_image):
import google.generativeai as genai
import PIL.Image
genai.configure(api_key=os.environ["GOOGLE_API_KEY"])
model = genai.GenerativeModel("gemini-1.5-flash")
img = PIL.Image.open(sample_image)
response = model.generate_content(["What color is the dominant color?", img])
assert "red" in response.text.lower()Testing Vertex AI (Enterprise)
For production deployments using Vertex AI:
# tests/test_vertex.py
import pytest
from unittest.mock import MagicMock, patch
class TestVertexGemini:
@pytest.fixture
def mock_vertex(self):
with patch("vertexai.generative_models.GenerativeModel") as mock_cls:
mock_model = MagicMock()
mock_cls.return_value = mock_model
response = MagicMock()
response.text = "Vertex AI response"
response.usage_metadata.total_token_count = 120
mock_model.generate_content.return_value = response
yield mock_model
def test_vertex_model_generates(self, mock_vertex):
from src.vertex_client import VertexGeminiClient
client = VertexGeminiClient(
project="test-project",
location="us-central1"
)
result = client.generate("Test prompt")
assert result == "Vertex AI response"
def test_vertex_uses_correct_location(self, mock_vertex):
with patch("vertexai.init") as mock_init:
from src.vertex_client import VertexGeminiClient
VertexGeminiClient(project="proj", location="europe-west4")
mock_init.assert_called_with(project="proj", location="europe-west4")Error Handling Tests
# tests/test_error_handling.py
import pytest
from unittest.mock import MagicMock, patch
import google.generativeai as genai
class TestGeminiErrors:
def test_handles_quota_exceeded(self, mocker):
import google.api_core.exceptions as gexc
mock = mocker.patch("src.gemini_client.genai")
mock.GenerativeModel.return_value.generate_content.side_effect = gexc.ResourceExhausted(
"Quota exceeded"
)
from src.gemini_client import GeminiClient
client = GeminiClient()
with pytest.raises(gexc.ResourceExhausted):
client.generate("test prompt")
def test_handles_invalid_api_key(self, mocker):
import google.api_core.exceptions as gexc
mock = mocker.patch("src.gemini_client.genai")
mock.GenerativeModel.return_value.generate_content.side_effect = gexc.PermissionDenied(
"API key not valid"
)
from src.gemini_client import GeminiClient
client = GeminiClient()
with pytest.raises(gexc.PermissionDenied):
client.generate("test")
def test_retry_on_transient_error(self, mocker):
import google.api_core.exceptions as gexc
call_count = 0
def side_effect(*args, **kwargs):
nonlocal call_count
call_count += 1
if call_count < 3:
raise gexc.ServiceUnavailable("Temporary overload")
m = MagicMock()
m.text = "Success"
m.prompt_feedback.block_reason = None
m.usage_metadata.total_token_count = 50
return m
mock = mocker.patch("src.gemini_client.genai")
mock.GenerativeModel.return_value.generate_content.side_effect = side_effect
from src.gemini_client_with_retry import GeminiClientWithRetry
client = GeminiClientWithRetry(max_retries=3, base_delay=0.01)
result = client.generate("test")
assert result == "Success"
assert call_count == 3CI/CD Configuration
# .github/workflows/test.yml
name: Gemini App Tests
on:
push:
branches: [main]
schedule:
- cron: '0 7 * * 2' # Weekly Tuesday runs
jobs:
unit-tests:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: '3.12'
- run: pip install -r requirements.txt
- run: pytest tests/test_unit.py tests/test_chat.py tests/test_error_handling.py -v
integration-tests:
runs-on: ubuntu-latest
if: github.event_name == 'schedule'
env:
GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
RUN_INTEGRATION_TESTS: "true"
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: '3.12'
- run: pip install -r requirements.txt
- run: pytest tests/test_integration.py tests/test_multimodal.py -v --timeout=120End-to-End Testing
Unit tests verify your Gemini integration layer. But if you've built a user-facing product — a document analyzer, a multimodal chatbot, an AI-powered search tool — you need end-to-end tests that validate the complete user experience.
HelpMeTest runs automated browser tests against your live application:
*** Test Cases ***
Gemini Chatbot Answers Document Questions
Go To https://your-app.com/chat
Upload File .file-upload ${CURDIR}/test-document.pdf
Wait Until Element Is Visible .upload-complete
Fill Text .question-input Summarize the key findings
Click .submit-button
Wait Until Element Is Visible .ai-response timeout=45s
Element Should Contain .ai-response findingsThis catches the class of bugs that only appear in production: UI state management issues, timeouts on large documents, or Gemini API errors that your error handling doesn't catch properly.
Summary
- Mock
genai.GenerativeModelat the class level in unit tests - Test blocked responses — Gemini's safety filters can block valid prompts in edge cases
- Gate integration tests with env vars — never run real API calls on every commit
- Test chat history explicitly — multi-turn conversations have statefulness bugs
- Use
response_mime_type="application/json"when you need structured output — more reliable than prompting for JSON - Test multimodal inputs with synthetic images in unit tests, real images in integration tests
- Monitor token usage across integration test runs to control costs