Source code for pipecat.services.gladia.config

#
# Copyright (c) 2024–2025, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#

from typing import Any, Dict, List, Optional, Union

from pydantic import BaseModel

from pipecat.transcriptions.language import Language


[docs] class LanguageConfig(BaseModel): """Configuration for language detection and handling. Attributes: languages: List of language codes to use for transcription code_switching: Whether to auto-detect language changes during transcription """ languages: Optional[List[str]] = None code_switching: Optional[bool] = None
[docs] class PreProcessingConfig(BaseModel): """Configuration for audio pre-processing options. Attributes: speech_threshold: Sensitivity for speech detection (0-1) """ speech_threshold: Optional[float] = None
[docs] class CustomVocabularyItem(BaseModel): """Represents a custom vocabulary item with an intensity value. Attributes: value: The vocabulary word or phrase intensity: The bias intensity for this vocabulary item (0-1) """ value: str intensity: float
[docs] class CustomVocabularyConfig(BaseModel): """Configuration for custom vocabulary. Attributes: vocabulary: List of words/phrases or CustomVocabularyItem objects default_intensity: Default intensity for simple string vocabulary items """ vocabulary: Optional[List[Union[str, CustomVocabularyItem]]] = None default_intensity: Optional[float] = None
[docs] class CustomSpellingConfig(BaseModel): """Configuration for custom spelling rules. Attributes: spelling_dictionary: Mapping of correct spellings to phonetic variations """ spelling_dictionary: Optional[Dict[str, List[str]]] = None
[docs] class TranslationConfig(BaseModel): """Configuration for real-time translation. Attributes: target_languages: List of target language codes for translation model: Translation model to use ("base" or "enhanced") match_original_utterances: Whether to align translations with original utterances lipsync: Whether to enable lip-sync optimization for translations context_adaptation: Whether to enable context-aware translation adaptation context: Additional context to help with translation accuracy informal: Force informal language forms when available """ target_languages: Optional[List[str]] = None model: Optional[str] = None match_original_utterances: Optional[bool] = None lipsync: Optional[bool] = None context_adaptation: Optional[bool] = None context: Optional[str] = None informal: Optional[bool] = None
[docs] class RealtimeProcessingConfig(BaseModel): """Configuration for real-time processing features. Attributes: words_accurate_timestamps: Whether to provide per-word timestamps custom_vocabulary: Whether to enable custom vocabulary custom_vocabulary_config: Custom vocabulary configuration custom_spelling: Whether to enable custom spelling custom_spelling_config: Custom spelling configuration translation: Whether to enable translation translation_config: Translation configuration named_entity_recognition: Whether to enable named entity recognition sentiment_analysis: Whether to enable sentiment analysis """ words_accurate_timestamps: Optional[bool] = None custom_vocabulary: Optional[bool] = None custom_vocabulary_config: Optional[CustomVocabularyConfig] = None custom_spelling: Optional[bool] = None custom_spelling_config: Optional[CustomSpellingConfig] = None translation: Optional[bool] = None translation_config: Optional[TranslationConfig] = None named_entity_recognition: Optional[bool] = None sentiment_analysis: Optional[bool] = None
[docs] class MessagesConfig(BaseModel): """Configuration for controlling which message types are sent via WebSocket. Attributes: receive_partial_transcripts: Whether to receive intermediate transcription results receive_final_transcripts: Whether to receive final transcription results receive_speech_events: Whether to receive speech begin/end events receive_pre_processing_events: Whether to receive pre-processing events receive_realtime_processing_events: Whether to receive real-time processing events receive_post_processing_events: Whether to receive post-processing events receive_acknowledgments: Whether to receive acknowledgment messages receive_errors: Whether to receive error messages receive_lifecycle_events: Whether to receive lifecycle events """ receive_partial_transcripts: Optional[bool] = None receive_final_transcripts: Optional[bool] = None receive_speech_events: Optional[bool] = None receive_pre_processing_events: Optional[bool] = None receive_realtime_processing_events: Optional[bool] = None receive_post_processing_events: Optional[bool] = None receive_acknowledgments: Optional[bool] = None receive_errors: Optional[bool] = None receive_lifecycle_events: Optional[bool] = None
[docs] class GladiaInputParams(BaseModel): """Configuration parameters for the Gladia STT service. Attributes: encoding: Audio encoding format bit_depth: Audio bit depth channels: Number of audio channels custom_metadata: Additional metadata to include with requests endpointing: Silence duration in seconds to mark end of speech maximum_duration_without_endpointing: Maximum utterance duration without silence language: DEPRECATED - Use language_config instead language_config: Detailed language configuration pre_processing: Audio pre-processing options realtime_processing: Real-time processing features messages_config: WebSocket message filtering options """ encoding: Optional[str] = "wav/pcm" bit_depth: Optional[int] = 16 channels: Optional[int] = 1 custom_metadata: Optional[Dict[str, Any]] = None endpointing: Optional[float] = None maximum_duration_without_endpointing: Optional[int] = 10 language: Optional[Language] = None # Deprecated language_config: Optional[LanguageConfig] = None pre_processing: Optional[PreProcessingConfig] = None realtime_processing: Optional[RealtimeProcessingConfig] = None messages_config: Optional[MessagesConfig] = None