Audio Testing: How to Test Audio Processing, Streaming, and Playback
Audio applications span podcasts, voice messages, music processing, and real-time communication. Testing audio is harder than images because you can't easily "see" the output — you need to assert on metadata, waveform properties, or transcriptions.
Testing Audio Format Conversion
// services/audio-processor.js
import { execFile } from 'child_process'
import { promisify } from 'util'
import path from 'path'
const execFileAsync = promisify(execFile)
export async function convertToMP3(inputPath, outputPath) {
await execFileAsync('ffmpeg', [
'-i', inputPath,
'-codec:a', 'libmp3lame',
'-qscale:a', '2', // VBR quality 2 (~190kbps)
'-y',
outputPath
])
}
export async function getAudioMetadata(filePath) {
const { stdout } = await execFileAsync('ffprobe', [
'-v', 'quiet',
'-print_format', 'json',
'-show_format',
'-show_streams',
filePath
])
return JSON.parse(stdout)
}// services/audio-processor.test.js
import fs from 'fs'
import path from 'path'
import os from 'os'
import { convertToMP3, getAudioMetadata } from './audio-processor'
let outputDir
beforeEach(() => {
outputDir = fs.mkdtempSync(path.join(os.tmpdir(), 'audio-test-'))
})
afterEach(() => {
fs.rmSync(outputDir, { recursive: true })
})
test('converts WAV to MP3', async () => {
const inputPath = 'tests/fixtures/test-5s.wav'
const outputPath = path.join(outputDir, 'output.mp3')
await convertToMP3(inputPath, outputPath)
expect(fs.existsSync(outputPath)).toBe(true)
const meta = await getAudioMetadata(outputPath)
const stream = meta.streams[0]
expect(stream.codec_name).toBe('mp3')
expect(stream.codec_type).toBe('audio')
})
test('output duration matches input', async () => {
const inputPath = 'tests/fixtures/test-5s.wav'
const outputPath = path.join(outputDir, 'output.mp3')
const inputMeta = await getAudioMetadata(inputPath)
await convertToMP3(inputPath, outputPath)
const outputMeta = await getAudioMetadata(outputPath)
const inputDuration = parseFloat(inputMeta.format.duration)
const outputDuration = parseFloat(outputMeta.format.duration)
// Duration should be within 0.1 seconds
expect(Math.abs(outputDuration - inputDuration)).toBeLessThan(0.1)
})Testing Speech-to-Text
// services/transcription.js
import OpenAI from 'openai'
export async function transcribeAudio(audioBuffer, mimeType = 'audio/wav') {
const openai = new OpenAI()
const transcription = await openai.audio.transcriptions.create({
file: new File([audioBuffer], 'audio.wav', { type: mimeType }),
model: 'whisper-1',
})
return transcription.text
}
// Unit test: mock the API
vi.mock('openai')
test('calls Whisper API with correct parameters', async () => {
const mockCreate = vi.fn().mockResolvedValue({ text: 'Hello world' })
OpenAI.prototype.audio = { transcriptions: { create: mockCreate } }
const audioBuffer = Buffer.from('fake-wav-data')
const result = await transcribeAudio(audioBuffer)
expect(result).toBe('Hello world')
expect(mockCreate).toHaveBeenCalledWith(expect.objectContaining({
model: 'whisper-1',
}))
})Testing Podcast Feed Generation
// services/podcast-feed.js
export function generateRSSFeed(podcast) {
return `<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0" xmlns:itunes="http://www.itunes.com/dtds/podcast-1.0.dtd">
<channel>
<title>${escapeXML(podcast.title)}</title>
<link>${podcast.url}</link>
<description>${escapeXML(podcast.description)}</description>
<itunes:author>${escapeXML(podcast.author)}</itunes:author>
${podcast.episodes.map(ep => `
<item>
<title>${escapeXML(ep.title)}</title>
<enclosure url="${ep.audioUrl}" type="audio/mpeg" length="${ep.fileSize}"/>
<pubDate>${ep.publishedAt.toUTCString()}</pubDate>
<itunes:duration>${ep.duration}</itunes:duration>
<guid isPermaLink="false">${ep.id}</guid>
</item>`).join('')}
</channel>
</rss>`
}// services/podcast-feed.test.js
import { XMLParser } from 'fast-xml-parser'
import { generateRSSFeed } from './podcast-feed'
const parser = new XMLParser({ ignoreAttributes: false })
test('generates valid RSS 2.0 feed', () => {
const podcast = {
title: 'Test Podcast',
url: 'https://example.com',
description: 'A test podcast',
author: 'Test Author',
episodes: [{
id: 'ep-1',
title: 'Episode 1',
audioUrl: 'https://cdn.example.com/ep1.mp3',
fileSize: 12345678,
publishedAt: new Date('2024-01-15T10:00:00Z'),
duration: '00:30:00',
}]
}
const feed = generateRSSFeed(podcast)
const parsed = parser.parse(feed)
expect(parsed.rss['@_version']).toBe('2.0')
expect(parsed.rss.channel.title).toBe('Test Podcast')
expect(parsed.rss.channel.item).toBeTruthy()
})
test('escapes special characters in titles', () => {
const podcast = {
title: 'Podcast & More: "Testing"',
// ...
}
const feed = generateRSSFeed(podcast)
// XML special chars should be escaped
expect(feed).not.toContain('<title>Podcast & More')
expect(feed).toContain('&')
})
test('episode has required enclosure element', () => {
const feed = generateRSSFeed(testPodcast)
const parsed = parser.parse(feed)
const item = parsed.rss.channel.item
expect(item.enclosure['@_url']).toContain('.mp3')
expect(item.enclosure['@_type']).toBe('audio/mpeg')
expect(parseInt(item.enclosure['@_length'])).toBeGreaterThan(0)
})Mocking Web Audio API
// test setup
const mockAudioContext = {
createBufferSource: vi.fn().mockReturnValue({
buffer: null,
connect: vi.fn(),
start: vi.fn(),
stop: vi.fn(),
onended: null,
}),
createGain: vi.fn().mockReturnValue({
gain: { value: 1, setValueAtTime: vi.fn() },
connect: vi.fn(),
}),
decodeAudioData: vi.fn(),
destination: {},
currentTime: 0,
state: 'running',
resume: vi.fn().mockResolvedValue(undefined),
suspend: vi.fn().mockResolvedValue(undefined),
}
global.AudioContext = vi.fn().mockImplementation(() => mockAudioContext)
global.webkitAudioContext = global.AudioContext
test('AudioPlayer loads and plays audio', async () => {
const fakeArrayBuffer = new ArrayBuffer(1024)
const fakePCMData = { duration: 30.5 }
mockAudioContext.decodeAudioData.mockResolvedValue(fakePCMData)
const player = new AudioPlayer()
await player.load('https://example.com/episode.mp3')
player.play()
const source = mockAudioContext.createBufferSource()
expect(source.start).toHaveBeenCalled()
})Waveform Data Testing
// services/waveform.js
export async function generateWaveform(audioBuffer, samples = 100) {
const audioData = await decodeAudioBuffer(audioBuffer)
const channelData = audioData.getChannelData(0)
const blockSize = Math.floor(channelData.length / samples)
const waveform = []
for (let i = 0; i < samples; i++) {
const start = i * blockSize
let max = 0
for (let j = start; j < start + blockSize; j++) {
max = Math.max(max, Math.abs(channelData[j]))
}
waveform.push(Math.round(max * 100))
}
return waveform
}
test('generates waveform with correct sample count', async () => {
const waveform = await generateWaveform(testAudioBuffer, 100)
expect(waveform).toHaveLength(100)
expect(waveform.every(v => v >= 0 && v <= 100)).toBe(true)
})
test('silent audio generates zero waveform', async () => {
const silentBuffer = createSilentAudioBuffer(10) // 10 seconds silence
const waveform = await generateWaveform(silentBuffer)
expect(waveform.every(v => v === 0)).toBe(true)
})Summary
Audio testing approaches:
- Format conversion — FFmpeg integration tests, assert on metadata
- Duration validation — input/output duration within tolerance
- Transcription — mock external APIs (Whisper) in unit tests
- RSS/Podcast feeds — XML validation, required fields, entity escaping
- Web Audio API — mock AudioContext for player logic tests
- Waveform generation — unit test the algorithm with synthetic data
For CI, generate short test audio files (2-5 seconds silence/tone) with FFmpeg rather than committing real audio files. Silence is sufficient for format conversion and metadata tests.