interface InworldSTTOptions {
    audio_encoding?: "LINEAR16" | "AUTO_DETECT";
    enable_voice_profile?: boolean;
    end_of_turn_confidence_threshold?: number;
    inactivity_timeout_seconds?: number;
    include_word_timestamps?: boolean;
    min_end_of_turn_silence_when_confident?: number;
    prompts?: string[];
    vad_threshold?: number;
    voice_profile_top_n?: number;
}

Properties

audio_encoding?: "LINEAR16" | "AUTO_DETECT"

Wire-format encoding sent to Inworld. Default: LINEAR16.

enable_voice_profile?: boolean

Enable Voice Profile detection. Default: true.

end_of_turn_confidence_threshold?: number

End-of-turn confidence threshold (0.0–1.0). Default: 0.5.

inactivity_timeout_seconds?: number

Stop transcription after this many seconds of silence; 0 disables.

include_word_timestamps?: boolean

Enable word-level timestamps. Default: true.

min_end_of_turn_silence_when_confident?: number

Minimum end-of-turn silence in milliseconds when confident.

prompts?: string[]

Domain-specific contextual hints passed to the model.

vad_threshold?: number

VAD threshold (0.0–1.0). Default: 0.5.

voice_profile_top_n?: number

Max labels per category in voice-profile responses (1–20). Default: 10.