Quickstart

Convert live audio streams into accurate text transcriptions in real-time with sub-second latency.

Overview

By the end of this tutorial, you'll be able to transcribe live audio streams using our WebSocket API.

Get Your API Key

To access the API, you’ll need your API key. You can generate your API key in the console here [link]. Store the key as a managed secret.

Step 1: Establish WebSocket connection

// Connect to the transcription service
const ws = new WebSocket('wss://tl.shunyalabs.ai/');

ws.onopen = () => {
  console.log('Connected to transcription service');
};

ws.onmessage = (event) => {
  const result = JSON.parse(event.data);
  console.log('Transcription:', result);
};

ws.onerror = (error) => {
  console.error('WebSocket error:', error);
};

ws.onclose = () => {
  console.log('Connection closed');
};

Step 2: Initialize transcription session

// Send initialization message
ws.send(JSON.stringify({
  action: 'send',
  type: 'init',
  config: {
    language: 'en',
    api_key: 'YOUR_API_KEY_HERE'
  }
}));

Step 3: Send audio data

let frameCounter = 0;

function sendAudioChunk(audioBuffer) {
  const base64Audio = arrayBufferToBase64(audioBuffer);

  ws.send(JSON.stringify({
    action: 'send',
    type: 'frame',
    frame_seq: frameCounter++,
    audio_inline_b64: base64Audio,
    dtype: 'float32',
    channels: 1,
    sr: 16000
  }));
}

// Helper function
function arrayBufferToBase64(buffer) {
  const bytes = new Uint8Array(buffer);
  let binary = '';
  for (let i = 0; i < bytes.byteLength; i++) {
    binary += String.fromCharCode(bytes[i]);
  }
  return btoa(binary);
}