Module livekit.plugins.aws.tts
Classes
class ChunkedStream (*,
tts: TTS,
text: str,
session: aioboto3.Session,
conn_options: APIConnectOptions = APIConnectOptions(max_retry=3, retry_interval=2.0, timeout=10.0),
opts: _TTSOptions)-
Expand source code
class ChunkedStream(tts.ChunkedStream): def __init__( self, *, tts: TTS, text: str, session: aioboto3.Session, conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS, opts: _TTSOptions, ) -> None: super().__init__(tts=tts, input_text=text, conn_options=conn_options) self._opts = opts self._segment_id = utils.shortuuid() self._session = session async def _run(self): request_id = utils.shortuuid() try: async with self._session.client("polly") as client: params = { "Text": self._input_text, "OutputFormat": "mp3", "Engine": self._opts.speech_engine if is_given(self._opts.speech_engine) else DEFAULT_SPEECH_ENGINE, "VoiceId": self._opts.voice if is_given(self._opts.voice) else DEFAULT_VOICE, "TextType": "text", "SampleRate": str(self._opts.sample_rate), "LanguageCode": self._opts.language if is_given(self._opts.language) else None, } response = await client.synthesize_speech(**_strip_nones(params)) if "AudioStream" in response: decoder = utils.codecs.AudioStreamDecoder( sample_rate=self._opts.sample_rate, num_channels=1, ) # Create a task to push data to the decoder async def push_data(): try: async with response["AudioStream"] as resp: async for data, _ in resp.content.iter_chunks(): decoder.push(data) finally: decoder.end_input() # Start pushing data to the decoder push_task = asyncio.create_task(push_data()) try: # Create emitter and process decoded frames emitter = tts.SynthesizedAudioEmitter( event_ch=self._event_ch, request_id=request_id, segment_id=self._segment_id, ) async for frame in decoder: emitter.push(frame) emitter.flush() await push_task finally: await utils.aio.gracefully_cancel(push_task) except asyncio.TimeoutError: raise APITimeoutError() from None except aiohttp.ClientResponseError as e: raise APIStatusError( message=e.message, status_code=e.status, request_id=request_id, body=None, ) from None except Exception as e: raise APIConnectionError() from e
Used by the non-streamed synthesize API, some providers support chunked http responses
Ancestors
- livekit.agents.tts.tts.ChunkedStream
- abc.ABC
class TTS (*,
voice: NotGivenOr[str] = NOT_GIVEN,
language: NotGivenOr[TTS_LANGUAGE | str] = NOT_GIVEN,
speech_engine: NotGivenOr[TTS_SPEECH_ENGINE] = NOT_GIVEN,
sample_rate: int = 16000,
region: NotGivenOr[str] = NOT_GIVEN,
api_key: NotGivenOr[str] = NOT_GIVEN,
api_secret: NotGivenOr[str] = NOT_GIVEN,
session: aioboto3.Session | None = None)-
Expand source code
class TTS(tts.TTS): def __init__( self, *, voice: NotGivenOr[str] = NOT_GIVEN, language: NotGivenOr[TTS_LANGUAGE | str] = NOT_GIVEN, speech_engine: NotGivenOr[TTS_SPEECH_ENGINE] = NOT_GIVEN, sample_rate: int = DEFAULT_SAMPLE_RATE, region: NotGivenOr[str] = NOT_GIVEN, api_key: NotGivenOr[str] = NOT_GIVEN, api_secret: NotGivenOr[str] = NOT_GIVEN, session: aioboto3.Session | None = None, ) -> None: """ Create a new instance of AWS Polly TTS. ``api_key`` and ``api_secret`` must be set to your AWS Access key id and secret access key, either using the argument or by setting the ``AWS_ACCESS_KEY_ID`` and ``AWS_SECRET_ACCESS_KEY`` environmental variables. See https://docs.aws.amazon.com/polly/latest/dg/API_SynthesizeSpeech.html for more details on the the AWS Polly TTS. Args: Voice (TTSModels, optional): Voice ID to use for the synthesis. Defaults to "Ruth". language (TTS_LANGUAGE, optional): language code for the Synthesize Speech request. This is only necessary if using a bilingual voice, such as Aditi, which can be used for either Indian English (en-IN) or Hindi (hi-IN). sample_rate(int, optional): The audio frequency specified in Hz. Defaults to 16000. speech_engine(TTS_SPEECH_ENGINE, optional): The engine to use for the synthesis. Defaults to "generative". region(str, optional): The region to use for the synthesis. Defaults to "us-east-1". api_key(str, optional): AWS access key id. api_secret(str, optional): AWS secret access key. session(aioboto3.Session, optional): Optional aioboto3 session to use. """ # noqa: E501 super().__init__( capabilities=tts.TTSCapabilities( streaming=False, ), sample_rate=sample_rate, num_channels=TTS_NUM_CHANNELS, ) self._session = session or get_aws_async_session( api_key=api_key if is_given(api_key) else None, api_secret=api_secret if is_given(api_secret) else None, region=region if is_given(region) else None, ) self._opts = _TTSOptions( voice=voice, speech_engine=speech_engine, region=region, language=language, sample_rate=sample_rate, ) def synthesize( self, text: str, *, conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS, ) -> ChunkedStream: return ChunkedStream( tts=self, text=text, conn_options=conn_options, session=self._session, opts=self._opts, )
Helper class that provides a standard way to create an ABC using inheritance.
Create a new instance of AWS Polly TTS.
api_key
andapi_secret
must be set to your AWS Access key id and secret access key, either using the argument or by setting theAWS_ACCESS_KEY_ID
andAWS_SECRET_ACCESS_KEY
environmental variables.See https://docs.aws.amazon.com/polly/latest/dg/API_SynthesizeSpeech.html for more details on the the AWS Polly TTS.
Args
Voice
:TTSModels
, optional- Voice ID to use for the synthesis. Defaults to "Ruth".
language
:TTS_LANGUAGE
, optional- language code for the Synthesize Speech request. This is only necessary if using a bilingual voice, such as Aditi, which can be used for either Indian English (en-IN) or Hindi (hi-IN).
sample_rate(int, optional): The audio frequency specified in Hz. Defaults to 16000. speech_engine(TTS_SPEECH_ENGINE, optional): The engine to use for the synthesis. Defaults to "generative". region(str, optional): The region to use for the synthesis. Defaults to "us-east-1". api_key(str, optional): AWS access key id. api_secret(str, optional): AWS secret access key. session(aioboto3.Session, optional): Optional aioboto3 session to use.
Ancestors
- livekit.agents.tts.tts.TTS
- abc.ABC
- EventEmitter
- typing.Generic
Methods
def synthesize(self,
text: str,
*,
conn_options: APIConnectOptions = APIConnectOptions(max_retry=3, retry_interval=2.0, timeout=10.0)) ‑> ChunkedStream-
Expand source code
def synthesize( self, text: str, *, conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS, ) -> ChunkedStream: return ChunkedStream( tts=self, text=text, conn_options=conn_options, session=self._session, opts=self._opts, )
Inherited members