Configuration options for STT.

interface STTOptions {
    apiKey?: string;
    baseURL: string;
    enableVoiceProfile: boolean;
    endOfTurnConfidenceThreshold: number;
    language: string;
    minEndOfTurnSilenceWhenConfident: number;
    model: string;
    numChannels: number;
    sampleRate: number;
    vadThreshold?: number;
    voiceProfileTopN: number;
    wsURL: string;
}

Properties

apiKey?: string

Inworld API key. Defaults to $INWORLD_API_KEY.

baseURL: string

Base URL for the REST API. Default: 'https://api.inworld.ai/'.

enableVoiceProfile: boolean

Enable acoustic voice profiling (emotion, accent, age, pitch, style). Default: true.

endOfTurnConfidenceThreshold: number

Confidence threshold for end-of-turn detection. Default: 0.3.

language: string

BCP-47 language tag. Default: 'en-US'.

minEndOfTurnSilenceWhenConfident: number

Minimum silence in ms before committing end-of-turn when confidence is high. Default: 200.

model: string

Model to use. Default: 'inworld/inworld-stt-1'.

numChannels: number

Number of audio channels. Default: 1.

sampleRate: number

Input audio sample rate in Hz. Default: 16000.

vadThreshold?: number

VAD activity threshold (0–1). Omit to use the server default.

voiceProfileTopN: number

Number of top candidates to return per voice profile dimension. Default: 1.

wsURL: string

Base URL for the WebSocket API. Default: 'wss://api.inworld.ai/'.