Module livekit.plugins.murf

Murf AI plugin for LiveKit Agents

Classes

class ChunkedStream (*,
tts: TTS,
input_text: str,
conn_options: APIConnectOptions)
Expand source code
class ChunkedStream(tts.ChunkedStream):
    """Synthesize chunked text using the http streaming output endpoint"""

    def __init__(self, *, tts: TTS, input_text: str, conn_options: APIConnectOptions) -> None:
        super().__init__(tts=tts, input_text=input_text, conn_options=conn_options)
        self._tts: TTS = tts
        self._opts = replace(tts._opts)

    async def _run(self, output_emitter: tts.AudioEmitter) -> None:
        request_id = utils.shortuuid()

        try:
            async with self._tts._ensure_session().post(
                self._opts.get_http_url("/v1/speech/stream"),
                headers={API_AUTH_HEADER: self._opts.api_key},
                json={
                    "text": self._input_text,
                    "model": self._opts.model,
                    "multiNativeLocale": self._opts.locale,
                    "voice_id": self._opts.voice,
                    "style": self._opts.style,
                    "rate": self._opts.speed,
                    "pitch": self._opts.pitch,
                    "format": self._opts.encoding,
                    "sample_rate": self._opts.sample_rate,
                },
                timeout=aiohttp.ClientTimeout(total=30, sock_connect=self._conn_options.timeout),
            ) as resp:
                resp.raise_for_status()

                output_emitter.initialize(
                    request_id=request_id,
                    sample_rate=self._opts.sample_rate,
                    num_channels=1,
                    mime_type="audio/pcm",
                )

                async for data, _ in resp.content.iter_chunks():
                    output_emitter.push(data)

                output_emitter.flush()
        except asyncio.TimeoutError:
            raise APITimeoutError() from None
        except aiohttp.ClientResponseError as e:
            raise APIStatusError(
                message=e.message, status_code=e.status, request_id=None, body=None
            ) from None
        except Exception as e:
            raise APIConnectionError() from e

Synthesize chunked text using the http streaming output endpoint

Ancestors

  • livekit.agents.tts.tts.ChunkedStream
  • abc.ABC
class TTS (*,
api_key: str | None = None,
model: TTSModels | str = 'FALCON',
locale: TTSLocales | str | None = None,
voice: str = 'en-US-matthew',
style: TTSStyles | str | None = None,
speed: int | None = None,
pitch: int | None = None,
sample_rate: int = 24000,
encoding: TTSEncoding = 'pcm',
base_url: str = 'https://global.api.murf.ai',
http_session: aiohttp.ClientSession | None = None,
tokenizer: NotGivenOr[tokenize.SentenceTokenizer] = NOT_GIVEN,
text_pacing: tts.SentenceStreamPacer | bool = False,
min_buffer_size: int = 3,
max_buffer_delay_in_ms: int = 0,
streaming: bool = True)
Expand source code
class TTS(tts.TTS):
    def __init__(
        self,
        *,
        api_key: str | None = None,
        model: TTSModels | str = "FALCON",
        locale: TTSLocales | str | None = None,
        voice: str = TTSDefaultVoiceId,
        style: TTSStyles | str | None = None,
        speed: int | None = None,
        pitch: int | None = None,
        sample_rate: int = 24000,
        encoding: TTSEncoding = "pcm",
        base_url: str = "https://global.api.murf.ai",
        http_session: aiohttp.ClientSession | None = None,
        tokenizer: NotGivenOr[tokenize.SentenceTokenizer] = NOT_GIVEN,
        text_pacing: tts.SentenceStreamPacer | bool = False,
        min_buffer_size: int = 3,
        max_buffer_delay_in_ms: int = 0,
        streaming: bool = True,
    ) -> None:
        """
        Create a new instance of Murf AI TTS.

        See https://murf.ai/api/docs/api-reference/text-to-speech/stream-input for more details on the the Murf AI API.

        Args:
            api_key (str | None, optional): The Murf AI API key. If not provided, it will be read from the MURF_API_KEY environment variable.
            model (TTSModels | str, optional): The Murf AI TTS model to use. Defaults to "FALCON".
            locale (str | None, optional): The locale for synthesis (e.g., "en-US", "en-UK"). If not provided, will be inferred from voice.
            voice (str, optional): The voice ID from Murf AI's voice library (e.g., "en-US-matthew"). Defaults to TTSDefaultVoiceId.
            style (TTSStyles | str | None, optional): The voice style to apply (e.g., "Conversation"). Can be None for default style.
            speed (int | None, optional): The speech speed control. Higher values = faster speech. None for default speed.
            pitch (int | None, optional): The speech pitch control. Higher values = higher pitch. None for default pitch.
            sample_rate (int, optional): The audio sample rate in Hz. Defaults to 24000.
            encoding (str, optional): The audio encoding format. Defaults to "pcm".
            http_session (aiohttp.ClientSession | None, optional): An existing aiohttp ClientSession to use. If not provided, a new session will be created.
            base_url (str, optional): The base URL for the Murf AI API. Defaults to "https://global.api.murf.ai".
            tokenizer (tokenize.SentenceTokenizer, optional): The tokenizer to use. Defaults to tokenize.basic.SentenceTokenizer(min_sentence_len=min_buffer_size).
            text_pacing (tts.SentenceStreamPacer | bool, optional): Stream pacer for the TTS. Set to True to use the default pacer, False to disable.
            min_buffer_size (int, optional):Minimum characters to buffer before sending text to audio when no sentence boundary is detected. Higher values improve quality; lower values reduce TTFB. Defaults to 3.
            max_buffer_delay_in_ms (int, optional): Maximum wait time before sending buffered text if min_buffer_size isn’t reached. Defaults to 0.
            streaming (bool, optional): If True, uses WebSocket streaming for real-time audio. If False, uses HTTP requests. Defaults to True.
        """  # noqa: E501

        self._streaming = streaming

        super().__init__(
            capabilities=tts.TTSCapabilities(streaming=streaming),
            sample_rate=sample_rate,
            num_channels=1,
        )

        murf_api_key = api_key or os.environ.get("MURF_API_KEY")
        if not murf_api_key:
            raise ValueError("MURF_API_KEY must be set")

        self._opts = _TTSOptions(
            api_key=murf_api_key,
            model=model,
            locale=locale,
            voice=voice,
            style=style or TTSDefaultVoiceStyle,
            speed=speed,
            pitch=pitch,
            sample_rate=sample_rate,
            encoding=encoding,
            base_url=base_url,
            min_buffer_size=min_buffer_size,
            max_buffer_delay_in_ms=max_buffer_delay_in_ms,
        )
        self._session = http_session
        self._pool = utils.ConnectionPool[aiohttp.ClientWebSocketResponse](
            connect_cb=self._connect_ws,
            close_cb=self._close_ws,
            max_session_duration=300,
            mark_refreshed_on_get=True,
        )
        self._streams = weakref.WeakSet[SynthesizeStream]()
        self._sentence_tokenizer = (
            tokenizer
            if is_given(tokenizer)
            else tokenize.blingfire.SentenceTokenizer(min_sentence_len=min_buffer_size)
        )
        self._stream_pacer: tts.SentenceStreamPacer | None = None
        if text_pacing is True:
            self._stream_pacer = tts.SentenceStreamPacer()
        elif isinstance(text_pacing, tts.SentenceStreamPacer):
            self._stream_pacer = text_pacing

    @property
    def model(self) -> str:
        return self._opts.model

    @property
    def provider(self) -> str:
        return "Murf"

    async def _connect_ws(self, timeout: float) -> aiohttp.ClientWebSocketResponse:
        session = self._ensure_session()
        url = self._opts.get_ws_url(
            f"/v1/speech/stream-input?api-key={self._opts.api_key}&sample_rate={self._opts.sample_rate}&format={self._opts.encoding}&model={self._opts.model}"
        )
        return await asyncio.wait_for(session.ws_connect(url), timeout)

    async def _close_ws(self, ws: aiohttp.ClientWebSocketResponse) -> None:
        await ws.close()

    def _ensure_session(self) -> aiohttp.ClientSession:
        if not self._session:
            self._session = utils.http_context.http_session()

        return self._session

    def prewarm(self) -> None:
        self._pool.prewarm()

    def update_options(
        self,
        *,
        locale: NotGivenOr[str] = NOT_GIVEN,
        voice: NotGivenOr[str] = NOT_GIVEN,
        style: NotGivenOr[str | None] = NOT_GIVEN,
        speed: NotGivenOr[int | None] = NOT_GIVEN,
        pitch: NotGivenOr[int | None] = NOT_GIVEN,
    ) -> None:
        """
        Update the Text-to-Speech (TTS) configuration options.

        This method allows updating the TTS settings, including model, locale, voice, style,
        speed and pitch. If any parameter is not provided, the existing value will be retained.

        Args:
            locale (str, optional): The locale for synthesis (e.g., "en-US", "en-UK").
            voice (str, optional): The voice ID from Murf AI's voice library. (e.g. "en-US-matthew")
            style (str | None, optional): The voice style to apply (e.g., "Conversation", "Promo").
            speed (int | None, optional): Controls the speech speed. Positive values increase speed, negative values decrease it. Valid range: -50 to 50.
            pitch (int | None, optional): Controls the speech pitch. Positive values raise pitch, negative values lower it. Valid range: -50 to 50.
        """
        if is_given(locale):
            self._opts.locale = locale
        if is_given(voice):
            self._opts.voice = voice
        if is_given(style):
            self._opts.style = style
        if is_given(speed):
            self._opts.speed = speed
        if is_given(pitch):
            self._opts.pitch = pitch

    def synthesize(
        self, text: str, *, conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS
    ) -> ChunkedStream:
        return ChunkedStream(tts=self, input_text=text, conn_options=conn_options)

    def stream(
        self, *, conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS
    ) -> SynthesizeStream:
        stream = SynthesizeStream(tts=self, conn_options=conn_options)
        self._streams.add(stream)
        return stream

    async def aclose(self) -> None:
        for stream in list(self._streams):
            await stream.aclose()

        self._streams.clear()
        await self._pool.aclose()

Helper class that provides a standard way to create an ABC using inheritance.

Create a new instance of Murf AI TTS.

See https://murf.ai/api/docs/api-reference/text-to-speech/stream-input for more details on the the Murf AI API.

Args

api_key : str | None, optional
The Murf AI API key. If not provided, it will be read from the MURF_API_KEY environment variable.
model : TTSModels | str, optional
The Murf AI TTS model to use. Defaults to "FALCON".
locale : str | None, optional
The locale for synthesis (e.g., "en-US", "en-UK"). If not provided, will be inferred from voice.
voice : str, optional
The voice ID from Murf AI's voice library (e.g., "en-US-matthew"). Defaults to TTSDefaultVoiceId.
style : TTSStyles | str | None, optional
The voice style to apply (e.g., "Conversation"). Can be None for default style.
speed : int | None, optional
The speech speed control. Higher values = faster speech. None for default speed.
pitch : int | None, optional
The speech pitch control. Higher values = higher pitch. None for default pitch.
sample_rate : int, optional
The audio sample rate in Hz. Defaults to 24000.
encoding : str, optional
The audio encoding format. Defaults to "pcm".
http_session : aiohttp.ClientSession | None, optional
An existing aiohttp ClientSession to use. If not provided, a new session will be created.
base_url : str, optional
The base URL for the Murf AI API. Defaults to "https://global.api.murf.ai".
tokenizer : tokenize.SentenceTokenizer, optional
The tokenizer to use. Defaults to tokenize.basic.SentenceTokenizer(min_sentence_len=min_buffer_size).
text_pacing : tts.SentenceStreamPacer | bool, optional
Stream pacer for the TTS. Set to True to use the default pacer, False to disable.
min_buffer_size (int, optional):Minimum characters to buffer before sending text to audio when no sentence boundary is detected. Higher values improve quality; lower values reduce TTFB. Defaults to 3.
max_buffer_delay_in_ms : int, optional
Maximum wait time before sending buffered text if min_buffer_size isn’t reached. Defaults to 0.
streaming : bool, optional
If True, uses WebSocket streaming for real-time audio. If False, uses HTTP requests. Defaults to True.

Ancestors

  • livekit.agents.tts.tts.TTS
  • abc.ABC
  • EventEmitter
  • typing.Generic

Instance variables

prop model : str
Expand source code
@property
def model(self) -> str:
    return self._opts.model

Get the model name/identifier for this TTS instance.

Returns

The model name if available, "unknown" otherwise.

Note

Plugins should override this property to provide their model information.

prop provider : str
Expand source code
@property
def provider(self) -> str:
    return "Murf"

Get the provider name/identifier for this TTS instance.

Returns

The provider name if available, "unknown" otherwise.

Note

Plugins should override this property to provide their provider information.

Methods

async def aclose(self) ‑> None
Expand source code
async def aclose(self) -> None:
    for stream in list(self._streams):
        await stream.aclose()

    self._streams.clear()
    await self._pool.aclose()
def prewarm(self) ‑> None
Expand source code
def prewarm(self) -> None:
    self._pool.prewarm()

Pre-warm connection to the TTS service

def stream(self,
*,
conn_options: APIConnectOptions = APIConnectOptions(max_retry=3, retry_interval=2.0, timeout=10.0)) ‑> livekit.plugins.murf.tts.SynthesizeStream
Expand source code
def stream(
    self, *, conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS
) -> SynthesizeStream:
    stream = SynthesizeStream(tts=self, conn_options=conn_options)
    self._streams.add(stream)
    return stream
def synthesize(self,
text: str,
*,
conn_options: APIConnectOptions = APIConnectOptions(max_retry=3, retry_interval=2.0, timeout=10.0)) ‑> livekit.plugins.murf.tts.ChunkedStream
Expand source code
def synthesize(
    self, text: str, *, conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS
) -> ChunkedStream:
    return ChunkedStream(tts=self, input_text=text, conn_options=conn_options)
def update_options(self,
*,
locale: NotGivenOr[str] = NOT_GIVEN,
voice: NotGivenOr[str] = NOT_GIVEN,
style: NotGivenOr[str | None] = NOT_GIVEN,
speed: NotGivenOr[int | None] = NOT_GIVEN,
pitch: NotGivenOr[int | None] = NOT_GIVEN) ‑> None
Expand source code
def update_options(
    self,
    *,
    locale: NotGivenOr[str] = NOT_GIVEN,
    voice: NotGivenOr[str] = NOT_GIVEN,
    style: NotGivenOr[str | None] = NOT_GIVEN,
    speed: NotGivenOr[int | None] = NOT_GIVEN,
    pitch: NotGivenOr[int | None] = NOT_GIVEN,
) -> None:
    """
    Update the Text-to-Speech (TTS) configuration options.

    This method allows updating the TTS settings, including model, locale, voice, style,
    speed and pitch. If any parameter is not provided, the existing value will be retained.

    Args:
        locale (str, optional): The locale for synthesis (e.g., "en-US", "en-UK").
        voice (str, optional): The voice ID from Murf AI's voice library. (e.g. "en-US-matthew")
        style (str | None, optional): The voice style to apply (e.g., "Conversation", "Promo").
        speed (int | None, optional): Controls the speech speed. Positive values increase speed, negative values decrease it. Valid range: -50 to 50.
        pitch (int | None, optional): Controls the speech pitch. Positive values raise pitch, negative values lower it. Valid range: -50 to 50.
    """
    if is_given(locale):
        self._opts.locale = locale
    if is_given(voice):
        self._opts.voice = voice
    if is_given(style):
        self._opts.style = style
    if is_given(speed):
        self._opts.speed = speed
    if is_given(pitch):
        self._opts.pitch = pitch

Update the Text-to-Speech (TTS) configuration options.

This method allows updating the TTS settings, including model, locale, voice, style, speed and pitch. If any parameter is not provided, the existing value will be retained.

Args

locale : str, optional
The locale for synthesis (e.g., "en-US", "en-UK").
voice : str, optional
The voice ID from Murf AI's voice library. (e.g. "en-US-matthew")
style : str | None, optional
The voice style to apply (e.g., "Conversation", "Promo").
speed : int | None, optional
Controls the speech speed. Positive values increase speed, negative values decrease it. Valid range: -50 to 50.
pitch : int | None, optional
Controls the speech pitch. Positive values raise pitch, negative values lower it. Valid range: -50 to 50.

Inherited members