Source code for pipecat.services.azure.common

#
# Copyright (c) 2024–2025, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#

from typing import Optional

from loguru import logger

from pipecat.transcriptions.language import Language


[docs] def language_to_azure_language(language: Language) -> Optional[str]: language_map = { # Afrikaans Language.AF: "af-ZA", Language.AF_ZA: "af-ZA", # Amharic Language.AM: "am-ET", Language.AM_ET: "am-ET", # Arabic Language.AR: "ar-AE", # Default to UAE Arabic Language.AR_AE: "ar-AE", Language.AR_BH: "ar-BH", Language.AR_DZ: "ar-DZ", Language.AR_EG: "ar-EG", Language.AR_IQ: "ar-IQ", Language.AR_JO: "ar-JO", Language.AR_KW: "ar-KW", Language.AR_LB: "ar-LB", Language.AR_LY: "ar-LY", Language.AR_MA: "ar-MA", Language.AR_OM: "ar-OM", Language.AR_QA: "ar-QA", Language.AR_SA: "ar-SA", Language.AR_SY: "ar-SY", Language.AR_TN: "ar-TN", Language.AR_YE: "ar-YE", # Assamese Language.AS: "as-IN", Language.AS_IN: "as-IN", # Azerbaijani Language.AZ: "az-AZ", Language.AZ_AZ: "az-AZ", # Bulgarian Language.BG: "bg-BG", Language.BG_BG: "bg-BG", # Bengali Language.BN: "bn-IN", # Default to Indian Bengali Language.BN_BD: "bn-BD", Language.BN_IN: "bn-IN", # Bosnian Language.BS: "bs-BA", Language.BS_BA: "bs-BA", # Catalan Language.CA: "ca-ES", Language.CA_ES: "ca-ES", # Czech Language.CS: "cs-CZ", Language.CS_CZ: "cs-CZ", # Welsh Language.CY: "cy-GB", Language.CY_GB: "cy-GB", # Danish Language.DA: "da-DK", Language.DA_DK: "da-DK", # German Language.DE: "de-DE", Language.DE_AT: "de-AT", Language.DE_CH: "de-CH", Language.DE_DE: "de-DE", # Greek Language.EL: "el-GR", Language.EL_GR: "el-GR", # English Language.EN: "en-US", # Default to US English Language.EN_AU: "en-AU", Language.EN_CA: "en-CA", Language.EN_GB: "en-GB", Language.EN_HK: "en-HK", Language.EN_IE: "en-IE", Language.EN_IN: "en-IN", Language.EN_KE: "en-KE", Language.EN_NG: "en-NG", Language.EN_NZ: "en-NZ", Language.EN_PH: "en-PH", Language.EN_SG: "en-SG", Language.EN_TZ: "en-TZ", Language.EN_US: "en-US", Language.EN_ZA: "en-ZA", # Spanish Language.ES: "es-ES", # Default to Spain Spanish Language.ES_AR: "es-AR", Language.ES_BO: "es-BO", Language.ES_CL: "es-CL", Language.ES_CO: "es-CO", Language.ES_CR: "es-CR", Language.ES_CU: "es-CU", Language.ES_DO: "es-DO", Language.ES_EC: "es-EC", Language.ES_ES: "es-ES", Language.ES_GQ: "es-GQ", Language.ES_GT: "es-GT", Language.ES_HN: "es-HN", Language.ES_MX: "es-MX", Language.ES_NI: "es-NI", Language.ES_PA: "es-PA", Language.ES_PE: "es-PE", Language.ES_PR: "es-PR", Language.ES_PY: "es-PY", Language.ES_SV: "es-SV", Language.ES_US: "es-US", Language.ES_UY: "es-UY", Language.ES_VE: "es-VE", # Estonian Language.ET: "et-EE", Language.ET_EE: "et-EE", # Basque Language.EU: "eu-ES", Language.EU_ES: "eu-ES", # Persian Language.FA: "fa-IR", Language.FA_IR: "fa-IR", # Finnish Language.FI: "fi-FI", Language.FI_FI: "fi-FI", # Filipino Language.FIL: "fil-PH", Language.FIL_PH: "fil-PH", # French Language.FR: "fr-FR", Language.FR_BE: "fr-BE", Language.FR_CA: "fr-CA", Language.FR_CH: "fr-CH", Language.FR_FR: "fr-FR", # Irish Language.GA: "ga-IE", Language.GA_IE: "ga-IE", # Galician Language.GL: "gl-ES", Language.GL_ES: "gl-ES", # Gujarati Language.GU: "gu-IN", Language.GU_IN: "gu-IN", # Hebrew Language.HE: "he-IL", Language.HE_IL: "he-IL", # Hindi Language.HI: "hi-IN", Language.HI_IN: "hi-IN", # Croatian Language.HR: "hr-HR", Language.HR_HR: "hr-HR", # Hungarian Language.HU: "hu-HU", Language.HU_HU: "hu-HU", # Armenian Language.HY: "hy-AM", Language.HY_AM: "hy-AM", # Indonesian Language.ID: "id-ID", Language.ID_ID: "id-ID", # Icelandic Language.IS: "is-IS", Language.IS_IS: "is-IS", # Italian Language.IT: "it-IT", Language.IT_IT: "it-IT", # Inuktitut Language.IU_CANS_CA: "iu-Cans-CA", Language.IU_LATN_CA: "iu-Latn-CA", # Japanese Language.JA: "ja-JP", Language.JA_JP: "ja-JP", # Javanese Language.JV: "jv-ID", Language.JV_ID: "jv-ID", # Georgian Language.KA: "ka-GE", Language.KA_GE: "ka-GE", # Kazakh Language.KK: "kk-KZ", Language.KK_KZ: "kk-KZ", # Khmer Language.KM: "km-KH", Language.KM_KH: "km-KH", # Kannada Language.KN: "kn-IN", Language.KN_IN: "kn-IN", # Korean Language.KO: "ko-KR", Language.KO_KR: "ko-KR", # Lao Language.LO: "lo-LA", Language.LO_LA: "lo-LA", # Lithuanian Language.LT: "lt-LT", Language.LT_LT: "lt-LT", # Latvian Language.LV: "lv-LV", Language.LV_LV: "lv-LV", # Macedonian Language.MK: "mk-MK", Language.MK_MK: "mk-MK", # Malayalam Language.ML: "ml-IN", Language.ML_IN: "ml-IN", # Mongolian Language.MN: "mn-MN", Language.MN_MN: "mn-MN", # Marathi Language.MR: "mr-IN", Language.MR_IN: "mr-IN", # Malay Language.MS: "ms-MY", Language.MS_MY: "ms-MY", # Maltese Language.MT: "mt-MT", Language.MT_MT: "mt-MT", # Burmese Language.MY: "my-MM", Language.MY_MM: "my-MM", # Norwegian Language.NB: "nb-NO", Language.NB_NO: "nb-NO", Language.NO: "nb-NO", # Nepali Language.NE: "ne-NP", Language.NE_NP: "ne-NP", # Dutch Language.NL: "nl-NL", Language.NL_BE: "nl-BE", Language.NL_NL: "nl-NL", # Odia Language.OR: "or-IN", Language.OR_IN: "or-IN", # Punjabi Language.PA: "pa-IN", Language.PA_IN: "pa-IN", # Polish Language.PL: "pl-PL", Language.PL_PL: "pl-PL", # Pashto Language.PS: "ps-AF", Language.PS_AF: "ps-AF", # Portuguese Language.PT: "pt-PT", Language.PT_BR: "pt-BR", Language.PT_PT: "pt-PT", # Romanian Language.RO: "ro-RO", Language.RO_RO: "ro-RO", # Russian Language.RU: "ru-RU", Language.RU_RU: "ru-RU", # Sinhala Language.SI: "si-LK", Language.SI_LK: "si-LK", # Slovak Language.SK: "sk-SK", Language.SK_SK: "sk-SK", # Slovenian Language.SL: "sl-SI", Language.SL_SI: "sl-SI", # Somali Language.SO: "so-SO", Language.SO_SO: "so-SO", # Albanian Language.SQ: "sq-AL", Language.SQ_AL: "sq-AL", # Serbian Language.SR: "sr-RS", Language.SR_RS: "sr-RS", Language.SR_LATN: "sr-Latn-RS", Language.SR_LATN_RS: "sr-Latn-RS", # Sundanese Language.SU: "su-ID", Language.SU_ID: "su-ID", # Swedish Language.SV: "sv-SE", Language.SV_SE: "sv-SE", # Swahili Language.SW: "sw-KE", Language.SW_KE: "sw-KE", Language.SW_TZ: "sw-TZ", # Tamil Language.TA: "ta-IN", Language.TA_IN: "ta-IN", Language.TA_LK: "ta-LK", Language.TA_MY: "ta-MY", Language.TA_SG: "ta-SG", # Telugu Language.TE: "te-IN", Language.TE_IN: "te-IN", # Thai Language.TH: "th-TH", Language.TH_TH: "th-TH", # Turkish Language.TR: "tr-TR", Language.TR_TR: "tr-TR", # Ukrainian Language.UK: "uk-UA", Language.UK_UA: "uk-UA", # Urdu Language.UR: "ur-IN", Language.UR_IN: "ur-IN", Language.UR_PK: "ur-PK", # Uzbek Language.UZ: "uz-UZ", Language.UZ_UZ: "uz-UZ", # Vietnamese Language.VI: "vi-VN", Language.VI_VN: "vi-VN", # Wu Chinese Language.WUU: "wuu-CN", Language.WUU_CN: "wuu-CN", # Yue Chinese Language.YUE: "yue-CN", Language.YUE_CN: "yue-CN", # Chinese Language.ZH: "zh-CN", Language.ZH_CN: "zh-CN", Language.ZH_CN_GUANGXI: "zh-CN-guangxi", Language.ZH_CN_HENAN: "zh-CN-henan", Language.ZH_CN_LIAONING: "zh-CN-liaoning", Language.ZH_CN_SHAANXI: "zh-CN-shaanxi", Language.ZH_CN_SHANDONG: "zh-CN-shandong", Language.ZH_CN_SICHUAN: "zh-CN-sichuan", Language.ZH_HK: "zh-HK", Language.ZH_TW: "zh-TW", # Zulu Language.ZU: "zu-ZA", Language.ZU_ZA: "zu-ZA", } return language_map.get(language)