Source code for pipecat.services.deepgram.tts

#
# Copyright (c) 2024–2025, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#

from typing import AsyncGenerator, Optional

from loguru import logger

from pipecat.frames.frames import (
    ErrorFrame,
    Frame,
    TTSAudioRawFrame,
    TTSStartedFrame,
    TTSStoppedFrame,
)
from pipecat.services.tts_service import TTSService
from pipecat.utils.tracing.service_decorators import traced_tts

try:
    from deepgram import DeepgramClient, DeepgramClientOptions, SpeakOptions
except ModuleNotFoundError as e:
    logger.error(f"Exception: {e}")
    logger.error("In order to use Deepgram, you need to `pip install pipecat-ai[deepgram]`.")
    raise Exception(f"Missing module: {e}")


[docs] class DeepgramTTSService(TTSService): def __init__( self, *, api_key: str, voice: str = "aura-2-helena-en", base_url: str = "", sample_rate: Optional[int] = None, encoding: str = "linear16", **kwargs, ): super().__init__(sample_rate=sample_rate, **kwargs) self._settings = { "encoding": encoding, } self.set_voice(voice) client_options = DeepgramClientOptions(url=base_url) self._deepgram_client = DeepgramClient(api_key, config=client_options)
[docs] def can_generate_metrics(self) -> bool: return True
[docs] @traced_tts async def run_tts(self, text: str) -> AsyncGenerator[Frame, None]: logger.debug(f"{self}: Generating TTS [{text}]") options = SpeakOptions( model=self._voice_id, encoding=self._settings["encoding"], sample_rate=self.sample_rate, container="none", ) try: await self.start_ttfb_metrics() response = await self._deepgram_client.speak.asyncrest.v("1").stream_raw( {"text": text}, options ) await self.start_tts_usage_metrics(text) yield TTSStartedFrame() async for data in response.aiter_bytes(): await self.stop_ttfb_metrics() if data: yield TTSAudioRawFrame(audio=data, sample_rate=self.sample_rate, num_channels=1) yield TTSStoppedFrame() except Exception as e: logger.exception(f"{self} exception: {e}") yield ErrorFrame(f"Error getting audio: {str(e)}")