Skip to content

Commit 7a1afa9

Browse files
tjbckNoMoreFood
andcommitted
feat: custom stt content type
Co-Authored-By: Bryan Berns <[email protected]>
1 parent 6a5aac4 commit 7a1afa9

File tree

5 files changed

+227
-182
lines changed

5 files changed

+227
-182
lines changed

backend/open_webui/config.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2906,6 +2906,12 @@ class BannerModel(BaseModel):
29062906
os.getenv("AUDIO_STT_MODEL", ""),
29072907
)
29082908

2909+
AUDIO_STT_SUPPORTED_CONTENT_TYPES = PersistentConfig(
2910+
"AUDIO_STT_SUPPORTED_CONTENT_TYPES",
2911+
"audio.stt.supported_content_types",
2912+
os.getenv("AUDIO_STT_SUPPORTED_CONTENT_TYPES", "").split(","),
2913+
)
2914+
29092915
AUDIO_STT_AZURE_API_KEY = PersistentConfig(
29102916
"AUDIO_STT_AZURE_API_KEY",
29112917
"audio.stt.azure.api_key",

backend/open_webui/main.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,7 @@
159159
# Audio
160160
AUDIO_STT_ENGINE,
161161
AUDIO_STT_MODEL,
162+
AUDIO_STT_SUPPORTED_CONTENT_TYPES,
162163
AUDIO_STT_OPENAI_API_BASE_URL,
163164
AUDIO_STT_OPENAI_API_KEY,
164165
AUDIO_STT_AZURE_API_KEY,
@@ -959,10 +960,12 @@ async def lifespan(app: FastAPI):
959960
#
960961
########################################
961962

962-
app.state.config.STT_OPENAI_API_BASE_URL = AUDIO_STT_OPENAI_API_BASE_URL
963-
app.state.config.STT_OPENAI_API_KEY = AUDIO_STT_OPENAI_API_KEY
964963
app.state.config.STT_ENGINE = AUDIO_STT_ENGINE
965964
app.state.config.STT_MODEL = AUDIO_STT_MODEL
965+
app.state.config.STT_SUPPORTED_CONTENT_TYPES = AUDIO_STT_SUPPORTED_CONTENT_TYPES
966+
967+
app.state.config.STT_OPENAI_API_BASE_URL = AUDIO_STT_OPENAI_API_BASE_URL
968+
app.state.config.STT_OPENAI_API_KEY = AUDIO_STT_OPENAI_API_KEY
966969

967970
app.state.config.WHISPER_MODEL = WHISPER_MODEL
968971
app.state.config.WHISPER_VAD_FILTER = WHISPER_VAD_FILTER

backend/open_webui/routers/audio.py

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
from concurrent.futures import ThreadPoolExecutor
1111
from typing import Optional
1212

13-
13+
from fnmatch import fnmatch
1414
import aiohttp
1515
import aiofiles
1616
import requests
@@ -168,6 +168,7 @@ class STTConfigForm(BaseModel):
168168
OPENAI_API_KEY: str
169169
ENGINE: str
170170
MODEL: str
171+
SUPPORTED_CONTENT_TYPES: list[str] = []
171172
WHISPER_MODEL: str
172173
DEEPGRAM_API_KEY: str
173174
AZURE_API_KEY: str
@@ -202,6 +203,7 @@ async def get_audio_config(request: Request, user=Depends(get_admin_user)):
202203
"OPENAI_API_KEY": request.app.state.config.STT_OPENAI_API_KEY,
203204
"ENGINE": request.app.state.config.STT_ENGINE,
204205
"MODEL": request.app.state.config.STT_MODEL,
206+
"SUPPORTED_CONTENT_TYPES": request.app.state.config.STT_SUPPORTED_CONTENT_TYPES,
205207
"WHISPER_MODEL": request.app.state.config.WHISPER_MODEL,
206208
"DEEPGRAM_API_KEY": request.app.state.config.DEEPGRAM_API_KEY,
207209
"AZURE_API_KEY": request.app.state.config.AUDIO_STT_AZURE_API_KEY,
@@ -236,6 +238,10 @@ async def update_audio_config(
236238
request.app.state.config.STT_OPENAI_API_KEY = form_data.stt.OPENAI_API_KEY
237239
request.app.state.config.STT_ENGINE = form_data.stt.ENGINE
238240
request.app.state.config.STT_MODEL = form_data.stt.MODEL
241+
request.app.state.config.STT_SUPPORTED_CONTENT_TYPES = (
242+
form_data.stt.SUPPORTED_CONTENT_TYPES
243+
)
244+
239245
request.app.state.config.WHISPER_MODEL = form_data.stt.WHISPER_MODEL
240246
request.app.state.config.DEEPGRAM_API_KEY = form_data.stt.DEEPGRAM_API_KEY
241247
request.app.state.config.AUDIO_STT_AZURE_API_KEY = form_data.stt.AZURE_API_KEY
@@ -269,6 +275,7 @@ async def update_audio_config(
269275
"OPENAI_API_KEY": request.app.state.config.STT_OPENAI_API_KEY,
270276
"ENGINE": request.app.state.config.STT_ENGINE,
271277
"MODEL": request.app.state.config.STT_MODEL,
278+
"SUPPORTED_CONTENT_TYPES": request.app.state.config.STT_SUPPORTED_CONTENT_TYPES,
272279
"WHISPER_MODEL": request.app.state.config.WHISPER_MODEL,
273280
"DEEPGRAM_API_KEY": request.app.state.config.DEEPGRAM_API_KEY,
274281
"AZURE_API_KEY": request.app.state.config.AUDIO_STT_AZURE_API_KEY,
@@ -910,10 +917,14 @@ def transcription(
910917
):
911918
log.info(f"file.content_type: {file.content_type}")
912919

913-
SUPPORTED_CONTENT_TYPES = {"video/webm"} # Extend if you add more video types!
914-
if not (
915-
file.content_type.startswith("audio/")
916-
or file.content_type in SUPPORTED_CONTENT_TYPES
920+
supported_content_types = request.app.state.config.STT_SUPPORTED_CONTENT_TYPES or [
921+
"audio/*",
922+
"video/webm",
923+
]
924+
925+
if not any(
926+
fnmatch(file.content_type, content_type)
927+
for content_type in supported_content_types
917928
):
918929
raise HTTPException(
919930
status_code=status.HTTP_400_BAD_REQUEST,

backend/open_webui/routers/files.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -155,9 +155,18 @@ def upload_file(
155155
if process:
156156
try:
157157
if file.content_type:
158-
if file.content_type.startswith("audio/") or file.content_type in {
159-
"video/webm"
160-
}:
158+
stt_supported_content_types = (
159+
request.app.state.config.STT_SUPPORTED_CONTENT_TYPES
160+
or [
161+
"audio/*",
162+
"video/webm",
163+
]
164+
)
165+
166+
if any(
167+
fnmatch(file.content_type, content_type)
168+
for content_type in stt_supported_content_types
169+
):
161170
file_path = Storage.get_file(file_path)
162171
result = transcribe(request, file_path, file_metadata)
163172

0 commit comments

Comments
 (0)