Source code for pipecat.audio.turn.smart_turn.local_smart_turn

#
# Copyright (c) 2024–2025, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#


from typing import Any, Dict

import numpy as np
from loguru import logger

from pipecat.audio.turn.smart_turn.base_smart_turn import BaseSmartTurn

try:
    import torch
    from transformers import AutoFeatureExtractor, Wav2Vec2BertForSequenceClassification
except ModuleNotFoundError as e:
    logger.error(f"Exception: {e}")
    logger.error(
        "In order to use the LocalSmartTurnAnalyzer, you need to `pip install pipecat-ai[local-smart-turn]`."
    )
    raise Exception(f"Missing module: {e}")


[docs] class LocalSmartTurnAnalyzer(BaseSmartTurn): def __init__(self, *, smart_turn_model_path: str, **kwargs): super().__init__(**kwargs) if not smart_turn_model_path: # Define the path to the pretrained model on Hugging Face smart_turn_model_path = "pipecat-ai/smart-turn" logger.debug("Loading Local Smart Turn model...") # Load the pretrained model for sequence classification self._turn_model = Wav2Vec2BertForSequenceClassification.from_pretrained( smart_turn_model_path ) # Load the corresponding feature extractor for preprocessing audio self._turn_processor = AutoFeatureExtractor.from_pretrained(smart_turn_model_path) # Set device to GPU if available, else CPU self._device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # Move model to selected device and set it to evaluation mode self._turn_model = self._turn_model.to(self._device) self._turn_model.eval() logger.debug("Loaded Local Smart Turn") async def _predict_endpoint(self, audio_array: np.ndarray) -> Dict[str, Any]: inputs = self._turn_processor( audio_array, sampling_rate=16000, padding="max_length", truncation=True, max_length=800, # Maximum length as specified in training return_attention_mask=True, return_tensors="pt", ) # Move input tensors to the same device as the model inputs = {k: v.to(self._device) for k, v in inputs.items()} # Disable gradient calculation for inference with torch.no_grad(): outputs = self._turn_model(**inputs) logits = outputs.logits probabilities = torch.nn.functional.softmax(logits, dim=1) completion_prob = probabilities[0, 1].item() # Probability of class 1 (Complete) prediction = 1 if completion_prob > 0.5 else 0 return { "prediction": prediction, "probability": completion_prob, }