Source code for pipecat.services.assemblyai.models

from typing import List, Literal, Optional

from pydantic import BaseModel, Field



[docs]
class Word(BaseModel):
    """Represents a single word in a transcription with timing and confidence."""

    start: int
    end: int
    text: str
    confidence: float
    word_is_final: bool = Field(..., alias="word_is_final")




[docs]
class BaseMessage(BaseModel):
    """Base class for all AssemblyAI WebSocket messages."""

    type: str




[docs]
class BeginMessage(BaseMessage):
    """Message sent when a new session begins."""

    type: Literal["Begin"] = "Begin"
    id: str
    expires_at: int




[docs]
class TurnMessage(BaseMessage):
    """Message containing transcription data for a turn of speech."""

    type: Literal["Turn"] = "Turn"
    turn_order: int
    turn_is_formatted: bool
    end_of_turn: bool
    transcript: str
    end_of_turn_confidence: float
    words: List[Word]




[docs]
class TerminationMessage(BaseMessage):
    """Message sent when the session is terminated."""

    type: Literal["Termination"] = "Termination"
    audio_duration_seconds: float
    session_duration_seconds: float



# Union type for all possible message types
AnyMessage = BeginMessage | TurnMessage | TerminationMessage



[docs]
class AssemblyAIConnectionParams(BaseModel):
    sample_rate: int = 16000
    encoding: Literal["pcm_s16le", "pcm_mulaw"] = "pcm_s16le"
    formatted_finals: bool = True
    word_finalization_max_wait_time: Optional[int] = None
    end_of_turn_confidence_threshold: Optional[float] = None
    min_end_of_turn_silence_when_confident: Optional[int] = None
    max_turn_silence: Optional[int] = None