#
# Copyright (c) 2024–2025, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
from typing import AsyncGenerator, Optional
from loguru import logger
from pipecat.frames.frames import (
ErrorFrame,
Frame,
TTSAudioRawFrame,
TTSStartedFrame,
TTSStoppedFrame,
)
from pipecat.services.tts_service import TTSService
from pipecat.utils.tracing.service_decorators import traced_tts
try:
from deepgram import DeepgramClient, DeepgramClientOptions, SpeakOptions
except ModuleNotFoundError as e:
logger.error(f"Exception: {e}")
logger.error("In order to use Deepgram, you need to `pip install pipecat-ai[deepgram]`.")
raise Exception(f"Missing module: {e}")
[docs]
class DeepgramTTSService(TTSService):
def __init__(
self,
*,
api_key: str,
voice: str = "aura-2-helena-en",
base_url: str = "",
sample_rate: Optional[int] = None,
encoding: str = "linear16",
**kwargs,
):
super().__init__(sample_rate=sample_rate, **kwargs)
self._settings = {
"encoding": encoding,
}
self.set_voice(voice)
client_options = DeepgramClientOptions(url=base_url)
self._deepgram_client = DeepgramClient(api_key, config=client_options)
[docs]
def can_generate_metrics(self) -> bool:
return True
[docs]
@traced_tts
async def run_tts(self, text: str) -> AsyncGenerator[Frame, None]:
logger.debug(f"{self}: Generating TTS [{text}]")
options = SpeakOptions(
model=self._voice_id,
encoding=self._settings["encoding"],
sample_rate=self.sample_rate,
container="none",
)
try:
await self.start_ttfb_metrics()
response = await self._deepgram_client.speak.asyncrest.v("1").stream_raw(
{"text": text}, options
)
await self.start_tts_usage_metrics(text)
yield TTSStartedFrame()
async for data in response.aiter_bytes():
await self.stop_ttfb_metrics()
if data:
yield TTSAudioRawFrame(audio=data, sample_rate=self.sample_rate, num_channels=1)
yield TTSStoppedFrame()
except Exception as e:
logger.exception(f"{self} exception: {e}")
yield ErrorFrame(f"Error getting audio: {str(e)}")