Speaker Intelligence
Speaker Diarization
Enable enable_diarization=True to label each segment with its speaker.
Python SDK
python
config = TranscriptionConfig(model="zero-indic", enable_diarization=True)
result = await client.asr.transcribe("call.wav", config=config)
print(result.text)
# [SPEAKER_00] नमस्ते, आप कैसे हैं [SPEAKER_01] मैं ठीक हूँ धन्यवाद
for seg in result.segments:
print(f"[{seg.start:.1f}s-{seg.end:.1f}s] [{seg.speaker}] {seg.text}")
# [0.5s-3.2s] [SPEAKER_00] नमस्ते, आप कैसे हैं
# [4.1s-6.8s] [SPEAKER_01] मैं ठीक हूँ धन्यवाद
print(result.speakers)
# ['SPEAKER_00', 'SPEAKER_01']REST API
terminal
curl -X POST https://asr.shunyalabs.ai/v1/audio/transcriptions \
-H "Authorization: Bearer <API_KEY>" \
-F "[email protected]" \
-F "model=zero-indic" \
-F "enable_diarization=true"Output
json
{
"text": "[SPEAKER_00] नमस्ते, आप कैसे हैं [SPEAKER_01] मैं ठीक हूँ धन्यवाद",
"segments": [
{ "start": 0.5, "end": 3.2, "text": "नमस्ते, आप कैसे हैं", "speaker": "SPEAKER_00" },
{ "start": 4.1, "end": 6.8, "text": "मैं ठीक हूँ धन्यवाद", "speaker": "SPEAKER_01" }
],
"speakers": ["SPEAKER_00", "SPEAKER_01"]
}