Audio Testing: How to Test Audio Processing, Streaming, and Playback

Audio Testing: How to Test Audio Processing, Streaming, and Playback

Audio applications span podcasts, voice messages, music processing, and real-time communication. Testing audio is harder than images because you can't easily "see" the output — you need to assert on metadata, waveform properties, or transcriptions.

Testing Audio Format Conversion

// services/audio-processor.js
import { execFile } from 'child_process'
import { promisify } from 'util'
import path from 'path'

const execFileAsync = promisify(execFile)

export async function convertToMP3(inputPath, outputPath) {
  await execFileAsync('ffmpeg', [
    '-i', inputPath,
    '-codec:a', 'libmp3lame',
    '-qscale:a', '2',  // VBR quality 2 (~190kbps)
    '-y',
    outputPath
  ])
}

export async function getAudioMetadata(filePath) {
  const { stdout } = await execFileAsync('ffprobe', [
    '-v', 'quiet',
    '-print_format', 'json',
    '-show_format',
    '-show_streams',
    filePath
  ])
  return JSON.parse(stdout)
}
// services/audio-processor.test.js
import fs from 'fs'
import path from 'path'
import os from 'os'
import { convertToMP3, getAudioMetadata } from './audio-processor'

let outputDir

beforeEach(() => {
  outputDir = fs.mkdtempSync(path.join(os.tmpdir(), 'audio-test-'))
})

afterEach(() => {
  fs.rmSync(outputDir, { recursive: true })
})

test('converts WAV to MP3', async () => {
  const inputPath = 'tests/fixtures/test-5s.wav'
  const outputPath = path.join(outputDir, 'output.mp3')
  
  await convertToMP3(inputPath, outputPath)
  
  expect(fs.existsSync(outputPath)).toBe(true)
  
  const meta = await getAudioMetadata(outputPath)
  const stream = meta.streams[0]
  
  expect(stream.codec_name).toBe('mp3')
  expect(stream.codec_type).toBe('audio')
})

test('output duration matches input', async () => {
  const inputPath = 'tests/fixtures/test-5s.wav'
  const outputPath = path.join(outputDir, 'output.mp3')
  
  const inputMeta = await getAudioMetadata(inputPath)
  await convertToMP3(inputPath, outputPath)
  const outputMeta = await getAudioMetadata(outputPath)
  
  const inputDuration = parseFloat(inputMeta.format.duration)
  const outputDuration = parseFloat(outputMeta.format.duration)
  
  // Duration should be within 0.1 seconds
  expect(Math.abs(outputDuration - inputDuration)).toBeLessThan(0.1)
})

Testing Speech-to-Text

// services/transcription.js
import OpenAI from 'openai'

export async function transcribeAudio(audioBuffer, mimeType = 'audio/wav') {
  const openai = new OpenAI()
  
  const transcription = await openai.audio.transcriptions.create({
    file: new File([audioBuffer], 'audio.wav', { type: mimeType }),
    model: 'whisper-1',
  })
  
  return transcription.text
}

// Unit test: mock the API
vi.mock('openai')

test('calls Whisper API with correct parameters', async () => {
  const mockCreate = vi.fn().mockResolvedValue({ text: 'Hello world' })
  OpenAI.prototype.audio = { transcriptions: { create: mockCreate } }
  
  const audioBuffer = Buffer.from('fake-wav-data')
  const result = await transcribeAudio(audioBuffer)
  
  expect(result).toBe('Hello world')
  expect(mockCreate).toHaveBeenCalledWith(expect.objectContaining({
    model: 'whisper-1',
  }))
})

Testing Podcast Feed Generation

// services/podcast-feed.js
export function generateRSSFeed(podcast) {
  return `<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0" xmlns:itunes="http://www.itunes.com/dtds/podcast-1.0.dtd">
  <channel>
    <title>${escapeXML(podcast.title)}</title>
    <link>${podcast.url}</link>
    <description>${escapeXML(podcast.description)}</description>
    <itunes:author>${escapeXML(podcast.author)}</itunes:author>
    ${podcast.episodes.map(ep => `
    <item>
      <title>${escapeXML(ep.title)}</title>
      <enclosure url="${ep.audioUrl}" type="audio/mpeg" length="${ep.fileSize}"/>
      <pubDate>${ep.publishedAt.toUTCString()}</pubDate>
      <itunes:duration>${ep.duration}</itunes:duration>
      <guid isPermaLink="false">${ep.id}</guid>
    </item>`).join('')}
  </channel>
</rss>`
}
// services/podcast-feed.test.js
import { XMLParser } from 'fast-xml-parser'
import { generateRSSFeed } from './podcast-feed'

const parser = new XMLParser({ ignoreAttributes: false })

test('generates valid RSS 2.0 feed', () => {
  const podcast = {
    title: 'Test Podcast',
    url: 'https://example.com',
    description: 'A test podcast',
    author: 'Test Author',
    episodes: [{
      id: 'ep-1',
      title: 'Episode 1',
      audioUrl: 'https://cdn.example.com/ep1.mp3',
      fileSize: 12345678,
      publishedAt: new Date('2024-01-15T10:00:00Z'),
      duration: '00:30:00',
    }]
  }
  
  const feed = generateRSSFeed(podcast)
  const parsed = parser.parse(feed)
  
  expect(parsed.rss['@_version']).toBe('2.0')
  expect(parsed.rss.channel.title).toBe('Test Podcast')
  expect(parsed.rss.channel.item).toBeTruthy()
})

test('escapes special characters in titles', () => {
  const podcast = {
    title: 'Podcast & More: "Testing"',
    // ...
  }
  
  const feed = generateRSSFeed(podcast)
  
  // XML special chars should be escaped
  expect(feed).not.toContain('<title>Podcast & More')
  expect(feed).toContain('&amp;')
})

test('episode has required enclosure element', () => {
  const feed = generateRSSFeed(testPodcast)
  const parsed = parser.parse(feed)
  
  const item = parsed.rss.channel.item
  expect(item.enclosure['@_url']).toContain('.mp3')
  expect(item.enclosure['@_type']).toBe('audio/mpeg')
  expect(parseInt(item.enclosure['@_length'])).toBeGreaterThan(0)
})

Mocking Web Audio API

// test setup
const mockAudioContext = {
  createBufferSource: vi.fn().mockReturnValue({
    buffer: null,
    connect: vi.fn(),
    start: vi.fn(),
    stop: vi.fn(),
    onended: null,
  }),
  createGain: vi.fn().mockReturnValue({
    gain: { value: 1, setValueAtTime: vi.fn() },
    connect: vi.fn(),
  }),
  decodeAudioData: vi.fn(),
  destination: {},
  currentTime: 0,
  state: 'running',
  resume: vi.fn().mockResolvedValue(undefined),
  suspend: vi.fn().mockResolvedValue(undefined),
}

global.AudioContext = vi.fn().mockImplementation(() => mockAudioContext)
global.webkitAudioContext = global.AudioContext

test('AudioPlayer loads and plays audio', async () => {
  const fakeArrayBuffer = new ArrayBuffer(1024)
  const fakePCMData = { duration: 30.5 }
  
  mockAudioContext.decodeAudioData.mockResolvedValue(fakePCMData)
  
  const player = new AudioPlayer()
  await player.load('https://example.com/episode.mp3')
  player.play()
  
  const source = mockAudioContext.createBufferSource()
  expect(source.start).toHaveBeenCalled()
})

Waveform Data Testing

// services/waveform.js
export async function generateWaveform(audioBuffer, samples = 100) {
  const audioData = await decodeAudioBuffer(audioBuffer)
  const channelData = audioData.getChannelData(0)
  const blockSize = Math.floor(channelData.length / samples)
  
  const waveform = []
  for (let i = 0; i < samples; i++) {
    const start = i * blockSize
    let max = 0
    for (let j = start; j < start + blockSize; j++) {
      max = Math.max(max, Math.abs(channelData[j]))
    }
    waveform.push(Math.round(max * 100))
  }
  
  return waveform
}

test('generates waveform with correct sample count', async () => {
  const waveform = await generateWaveform(testAudioBuffer, 100)
  
  expect(waveform).toHaveLength(100)
  expect(waveform.every(v => v >= 0 && v <= 100)).toBe(true)
})

test('silent audio generates zero waveform', async () => {
  const silentBuffer = createSilentAudioBuffer(10) // 10 seconds silence
  const waveform = await generateWaveform(silentBuffer)
  
  expect(waveform.every(v => v === 0)).toBe(true)
})

Summary

Audio testing approaches:

  • Format conversion — FFmpeg integration tests, assert on metadata
  • Duration validation — input/output duration within tolerance
  • Transcription — mock external APIs (Whisper) in unit tests
  • RSS/Podcast feeds — XML validation, required fields, entity escaping
  • Web Audio API — mock AudioContext for player logic tests
  • Waveform generation — unit test the algorithm with synthetic data

For CI, generate short test audio files (2-5 seconds silence/tone) with FFmpeg rather than committing real audio files. Silence is sufficient for format conversion and metadata tests.

Read more