interface STTOptions {
    apiKey?: string;
    baseUrl: string;
    bufferSizeMs: number;
    domain?: string;
    encoding: STTEncoding;
    endOfTurnConfidenceThreshold?: number;
    formatTurns?: boolean;
    keytermsPrompt?: string[];
    languageDetection?: boolean;
    maxSpeakers?: number;
    maxTurnSilence?: number;
    minTurnSilence?: number;
    prompt?: string;
    sampleRate: number;
    speakerLabels?: boolean;
    speechModel: STTModels;
    vadThreshold?: number;
}

Properties

apiKey?: string
baseUrl: string
bufferSizeMs: number

How large each chunk of audio is before being sent to AssemblyAI, in milliseconds. Corresponds to Python's buffer_size_seconds (seconds there, ms here per this repo's time-unit convention).

domain?: string
encoding: STTEncoding
endOfTurnConfidenceThreshold?: number
formatTurns?: boolean
keytermsPrompt?: string[]
languageDetection?: boolean
maxSpeakers?: number
maxTurnSilence?: number

Maximum silence (ms) before end-of-turn is forced regardless of confidence.

minTurnSilence?: number

Minimum silence (ms) before a confident end-of-turn is finalized.

prompt?: string

Only supported with the u3-rt-pro model.

sampleRate: number
speakerLabels?: boolean

Enable speaker diarization. Note: AssemblyAI will return per-word speaker labels, but the JS framework's stt.SpeechData type does not yet expose a speakerId field (unlike the Python framework), so the labels are not currently surfaced on emitted events. Setting this to true still has effect server-side. Once the base SpeechData interface gains speaker support, #processStreamEvent should forward data.words[].speaker too.

speechModel: STTModels
vadThreshold?: number