#
# Copyright (c) 2024–2025, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
from typing import Optional
from loguru import logger
from pipecat.transcriptions.language import Language
[docs]
def language_to_azure_language(language: Language) -> Optional[str]:
language_map = {
# Afrikaans
Language.AF: "af-ZA",
Language.AF_ZA: "af-ZA",
# Amharic
Language.AM: "am-ET",
Language.AM_ET: "am-ET",
# Arabic
Language.AR: "ar-AE", # Default to UAE Arabic
Language.AR_AE: "ar-AE",
Language.AR_BH: "ar-BH",
Language.AR_DZ: "ar-DZ",
Language.AR_EG: "ar-EG",
Language.AR_IQ: "ar-IQ",
Language.AR_JO: "ar-JO",
Language.AR_KW: "ar-KW",
Language.AR_LB: "ar-LB",
Language.AR_LY: "ar-LY",
Language.AR_MA: "ar-MA",
Language.AR_OM: "ar-OM",
Language.AR_QA: "ar-QA",
Language.AR_SA: "ar-SA",
Language.AR_SY: "ar-SY",
Language.AR_TN: "ar-TN",
Language.AR_YE: "ar-YE",
# Assamese
Language.AS: "as-IN",
Language.AS_IN: "as-IN",
# Azerbaijani
Language.AZ: "az-AZ",
Language.AZ_AZ: "az-AZ",
# Bulgarian
Language.BG: "bg-BG",
Language.BG_BG: "bg-BG",
# Bengali
Language.BN: "bn-IN", # Default to Indian Bengali
Language.BN_BD: "bn-BD",
Language.BN_IN: "bn-IN",
# Bosnian
Language.BS: "bs-BA",
Language.BS_BA: "bs-BA",
# Catalan
Language.CA: "ca-ES",
Language.CA_ES: "ca-ES",
# Czech
Language.CS: "cs-CZ",
Language.CS_CZ: "cs-CZ",
# Welsh
Language.CY: "cy-GB",
Language.CY_GB: "cy-GB",
# Danish
Language.DA: "da-DK",
Language.DA_DK: "da-DK",
# German
Language.DE: "de-DE",
Language.DE_AT: "de-AT",
Language.DE_CH: "de-CH",
Language.DE_DE: "de-DE",
# Greek
Language.EL: "el-GR",
Language.EL_GR: "el-GR",
# English
Language.EN: "en-US", # Default to US English
Language.EN_AU: "en-AU",
Language.EN_CA: "en-CA",
Language.EN_GB: "en-GB",
Language.EN_HK: "en-HK",
Language.EN_IE: "en-IE",
Language.EN_IN: "en-IN",
Language.EN_KE: "en-KE",
Language.EN_NG: "en-NG",
Language.EN_NZ: "en-NZ",
Language.EN_PH: "en-PH",
Language.EN_SG: "en-SG",
Language.EN_TZ: "en-TZ",
Language.EN_US: "en-US",
Language.EN_ZA: "en-ZA",
# Spanish
Language.ES: "es-ES", # Default to Spain Spanish
Language.ES_AR: "es-AR",
Language.ES_BO: "es-BO",
Language.ES_CL: "es-CL",
Language.ES_CO: "es-CO",
Language.ES_CR: "es-CR",
Language.ES_CU: "es-CU",
Language.ES_DO: "es-DO",
Language.ES_EC: "es-EC",
Language.ES_ES: "es-ES",
Language.ES_GQ: "es-GQ",
Language.ES_GT: "es-GT",
Language.ES_HN: "es-HN",
Language.ES_MX: "es-MX",
Language.ES_NI: "es-NI",
Language.ES_PA: "es-PA",
Language.ES_PE: "es-PE",
Language.ES_PR: "es-PR",
Language.ES_PY: "es-PY",
Language.ES_SV: "es-SV",
Language.ES_US: "es-US",
Language.ES_UY: "es-UY",
Language.ES_VE: "es-VE",
# Estonian
Language.ET: "et-EE",
Language.ET_EE: "et-EE",
# Basque
Language.EU: "eu-ES",
Language.EU_ES: "eu-ES",
# Persian
Language.FA: "fa-IR",
Language.FA_IR: "fa-IR",
# Finnish
Language.FI: "fi-FI",
Language.FI_FI: "fi-FI",
# Filipino
Language.FIL: "fil-PH",
Language.FIL_PH: "fil-PH",
# French
Language.FR: "fr-FR",
Language.FR_BE: "fr-BE",
Language.FR_CA: "fr-CA",
Language.FR_CH: "fr-CH",
Language.FR_FR: "fr-FR",
# Irish
Language.GA: "ga-IE",
Language.GA_IE: "ga-IE",
# Galician
Language.GL: "gl-ES",
Language.GL_ES: "gl-ES",
# Gujarati
Language.GU: "gu-IN",
Language.GU_IN: "gu-IN",
# Hebrew
Language.HE: "he-IL",
Language.HE_IL: "he-IL",
# Hindi
Language.HI: "hi-IN",
Language.HI_IN: "hi-IN",
# Croatian
Language.HR: "hr-HR",
Language.HR_HR: "hr-HR",
# Hungarian
Language.HU: "hu-HU",
Language.HU_HU: "hu-HU",
# Armenian
Language.HY: "hy-AM",
Language.HY_AM: "hy-AM",
# Indonesian
Language.ID: "id-ID",
Language.ID_ID: "id-ID",
# Icelandic
Language.IS: "is-IS",
Language.IS_IS: "is-IS",
# Italian
Language.IT: "it-IT",
Language.IT_IT: "it-IT",
# Inuktitut
Language.IU_CANS_CA: "iu-Cans-CA",
Language.IU_LATN_CA: "iu-Latn-CA",
# Japanese
Language.JA: "ja-JP",
Language.JA_JP: "ja-JP",
# Javanese
Language.JV: "jv-ID",
Language.JV_ID: "jv-ID",
# Georgian
Language.KA: "ka-GE",
Language.KA_GE: "ka-GE",
# Kazakh
Language.KK: "kk-KZ",
Language.KK_KZ: "kk-KZ",
# Khmer
Language.KM: "km-KH",
Language.KM_KH: "km-KH",
# Kannada
Language.KN: "kn-IN",
Language.KN_IN: "kn-IN",
# Korean
Language.KO: "ko-KR",
Language.KO_KR: "ko-KR",
# Lao
Language.LO: "lo-LA",
Language.LO_LA: "lo-LA",
# Lithuanian
Language.LT: "lt-LT",
Language.LT_LT: "lt-LT",
# Latvian
Language.LV: "lv-LV",
Language.LV_LV: "lv-LV",
# Macedonian
Language.MK: "mk-MK",
Language.MK_MK: "mk-MK",
# Malayalam
Language.ML: "ml-IN",
Language.ML_IN: "ml-IN",
# Mongolian
Language.MN: "mn-MN",
Language.MN_MN: "mn-MN",
# Marathi
Language.MR: "mr-IN",
Language.MR_IN: "mr-IN",
# Malay
Language.MS: "ms-MY",
Language.MS_MY: "ms-MY",
# Maltese
Language.MT: "mt-MT",
Language.MT_MT: "mt-MT",
# Burmese
Language.MY: "my-MM",
Language.MY_MM: "my-MM",
# Norwegian
Language.NB: "nb-NO",
Language.NB_NO: "nb-NO",
Language.NO: "nb-NO",
# Nepali
Language.NE: "ne-NP",
Language.NE_NP: "ne-NP",
# Dutch
Language.NL: "nl-NL",
Language.NL_BE: "nl-BE",
Language.NL_NL: "nl-NL",
# Odia
Language.OR: "or-IN",
Language.OR_IN: "or-IN",
# Punjabi
Language.PA: "pa-IN",
Language.PA_IN: "pa-IN",
# Polish
Language.PL: "pl-PL",
Language.PL_PL: "pl-PL",
# Pashto
Language.PS: "ps-AF",
Language.PS_AF: "ps-AF",
# Portuguese
Language.PT: "pt-PT",
Language.PT_BR: "pt-BR",
Language.PT_PT: "pt-PT",
# Romanian
Language.RO: "ro-RO",
Language.RO_RO: "ro-RO",
# Russian
Language.RU: "ru-RU",
Language.RU_RU: "ru-RU",
# Sinhala
Language.SI: "si-LK",
Language.SI_LK: "si-LK",
# Slovak
Language.SK: "sk-SK",
Language.SK_SK: "sk-SK",
# Slovenian
Language.SL: "sl-SI",
Language.SL_SI: "sl-SI",
# Somali
Language.SO: "so-SO",
Language.SO_SO: "so-SO",
# Albanian
Language.SQ: "sq-AL",
Language.SQ_AL: "sq-AL",
# Serbian
Language.SR: "sr-RS",
Language.SR_RS: "sr-RS",
Language.SR_LATN: "sr-Latn-RS",
Language.SR_LATN_RS: "sr-Latn-RS",
# Sundanese
Language.SU: "su-ID",
Language.SU_ID: "su-ID",
# Swedish
Language.SV: "sv-SE",
Language.SV_SE: "sv-SE",
# Swahili
Language.SW: "sw-KE",
Language.SW_KE: "sw-KE",
Language.SW_TZ: "sw-TZ",
# Tamil
Language.TA: "ta-IN",
Language.TA_IN: "ta-IN",
Language.TA_LK: "ta-LK",
Language.TA_MY: "ta-MY",
Language.TA_SG: "ta-SG",
# Telugu
Language.TE: "te-IN",
Language.TE_IN: "te-IN",
# Thai
Language.TH: "th-TH",
Language.TH_TH: "th-TH",
# Turkish
Language.TR: "tr-TR",
Language.TR_TR: "tr-TR",
# Ukrainian
Language.UK: "uk-UA",
Language.UK_UA: "uk-UA",
# Urdu
Language.UR: "ur-IN",
Language.UR_IN: "ur-IN",
Language.UR_PK: "ur-PK",
# Uzbek
Language.UZ: "uz-UZ",
Language.UZ_UZ: "uz-UZ",
# Vietnamese
Language.VI: "vi-VN",
Language.VI_VN: "vi-VN",
# Wu Chinese
Language.WUU: "wuu-CN",
Language.WUU_CN: "wuu-CN",
# Yue Chinese
Language.YUE: "yue-CN",
Language.YUE_CN: "yue-CN",
# Chinese
Language.ZH: "zh-CN",
Language.ZH_CN: "zh-CN",
Language.ZH_CN_GUANGXI: "zh-CN-guangxi",
Language.ZH_CN_HENAN: "zh-CN-henan",
Language.ZH_CN_LIAONING: "zh-CN-liaoning",
Language.ZH_CN_SHAANXI: "zh-CN-shaanxi",
Language.ZH_CN_SHANDONG: "zh-CN-shandong",
Language.ZH_CN_SICHUAN: "zh-CN-sichuan",
Language.ZH_HK: "zh-HK",
Language.ZH_TW: "zh-TW",
# Zulu
Language.ZU: "zu-ZA",
Language.ZU_ZA: "zu-ZA",
}
return language_map.get(language)