auto-git:

[add] dist/assets/index-DifKT69O.js
 [change] backend/main.py
 [change] backend/schemas.py
 [change] backend/whisper_admin.py
 [change] dist/index.html
 [change] electron/main.cjs
 [change] src/App.jsx
 [change] src/GeneralSettings.jsx
 [change] src/audioInput.js
 [unlink] dist/assets/index-Dm7DZNSo.js
This commit is contained in:
2026-04-17 04:43:28 +02:00
parent 67d9f41f17
commit 48c35698cb
9 changed files with 85 additions and 14 deletions

View File

@@ -159,6 +159,7 @@ async def transcribe_audio_route(req: schemas.AudioTranscriptionRequest):
audio_bytes,
mime_type,
req.model or DEFAULT_WHISPER_MODEL,
req.language,
)
except RuntimeError as exc:
raise HTTPException(status_code=400, detail=str(exc)) from exc

View File

@@ -84,6 +84,7 @@ class AudioTranscriptionRequest(BaseModel):
mime_type: str
audio_base64: str
model: Optional[str] = None
language: Optional[str] = None
class AudioTranscriptionResponse(BaseModel):

View File

@@ -241,6 +241,7 @@ def transcribe_audio_bytes(
audio_bytes: bytes,
mime_type: str,
model_name: str = DEFAULT_WHISPER_MODEL,
language: Optional[str] = None,
) -> Dict[str, Any]:
if not audio_bytes:
raise RuntimeError("Recorded audio was empty.")
@@ -256,7 +257,14 @@ def transcribe_audio_bytes(
_convert_audio_to_wav(input_path, wav_path)
source_path = wav_path
result = model.transcribe(str(source_path), task="transcribe", fp16=device == "cuda")
transcription_options = {
"task": "transcribe",
"fp16": device == "cuda",
}
if language:
transcription_options["language"] = str(language).strip().lower()
result = model.transcribe(str(source_path), **transcription_options)
return {
"model": model_name,
"device": device,

File diff suppressed because one or more lines are too long

2
dist/index.html vendored
View File

@@ -5,7 +5,7 @@
<meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>LLM Desktop</title>
<script type="module" crossorigin src="/assets/index-Dm7DZNSo.js"></script>
<script type="module" crossorigin src="/assets/index-DifKT69O.js"></script>
<link rel="stylesheet" crossorigin href="/assets/index-BFOWWhCS.css">
</head>
<body>

View File

@@ -28,6 +28,7 @@ const MAX_UI_SCALE = 1.3
const DEFAULT_OPEN_DEVTOOLS_ON_STARTUP = false
const DEFAULT_AUDIO_INPUT_ENABLED = true
const DEFAULT_AUDIO_INPUT_DEVICE_ID = ''
const DEFAULT_AUDIO_INPUT_LANGUAGE = ''
const defaultSettings = {
backendApiUrl: DEFAULT_BACKEND_API_URL,
@@ -38,6 +39,7 @@ const defaultSettings = {
openDevToolsOnStartup: DEFAULT_OPEN_DEVTOOLS_ON_STARTUP,
audioInputEnabled: DEFAULT_AUDIO_INPUT_ENABLED,
audioInputDeviceId: DEFAULT_AUDIO_INPUT_DEVICE_ID,
audioInputLanguage: DEFAULT_AUDIO_INPUT_LANGUAGE,
chatModel: 'llama3',
}
@@ -92,6 +94,7 @@ function migrateSettings(rawSettings) {
nextSettings.openDevToolsOnStartup = normalizeOpenDevToolsOnStartup(nextSettings.openDevToolsOnStartup)
nextSettings.audioInputEnabled = normalizeBooleanSetting(nextSettings.audioInputEnabled)
nextSettings.audioInputDeviceId = String(nextSettings.audioInputDeviceId || '').trim()
nextSettings.audioInputLanguage = String(nextSettings.audioInputLanguage || '').trim().toLowerCase()
return { nextSettings, migrated }
}
@@ -553,6 +556,8 @@ ipcMain.handle('set-setting', (event, key, value) => {
appSettings[key] = normalizeBooleanSetting(value)
} else if (key === 'audioInputDeviceId') {
appSettings[key] = String(value || '').trim()
} else if (key === 'audioInputLanguage') {
appSettings[key] = String(value || '').trim().toLowerCase()
} else {
appSettings[key] = value
}
@@ -572,6 +577,7 @@ ipcMain.handle('update-settings', (event, settings) => {
appSettings.openDevToolsOnStartup = normalizeOpenDevToolsOnStartup(appSettings.openDevToolsOnStartup)
appSettings.audioInputEnabled = normalizeBooleanSetting(appSettings.audioInputEnabled)
appSettings.audioInputDeviceId = String(appSettings.audioInputDeviceId || '').trim()
appSettings.audioInputLanguage = String(appSettings.audioInputLanguage || '').trim().toLowerCase()
saveSettings()
if (Object.prototype.hasOwnProperty.call(settings, 'uiScale')) {
applyUiScaleToAllWindows()

View File

@@ -295,6 +295,7 @@ export default function App() {
const imageDragDepthRef = useRef(0)
const [audioInputEnabled, setAudioInputEnabled] = useState(false)
const [audioInputDeviceId, setAudioInputDeviceId] = useState('')
const [audioInputLanguage, setAudioInputLanguage] = useState('')
const [isRecordingAudio, setIsRecordingAudio] = useState(false)
const [isTranscribingAudio, setIsTranscribingAudio] = useState(false)
const [audioRecordingMs, setAudioRecordingMs] = useState(0)
@@ -554,6 +555,7 @@ export default function App() {
body: JSON.stringify({
mime_type: mimeType || detectedMimeType || 'audio/webm',
audio_base64: payload,
language: audioInputLanguage || null,
}),
})
const data = await expectBackendJson(response)
@@ -1144,6 +1146,7 @@ async function regenerateFromIndex(index, overrideUserText = null) {
setStreamOutput(settings.streamOutput || false);
setAudioInputEnabled(settings.audioInputEnabled === true);
setAudioInputDeviceId(typeof settings.audioInputDeviceId === 'string' ? settings.audioInputDeviceId : '');
setAudioInputLanguage(typeof settings.audioInputLanguage === 'string' ? settings.audioInputLanguage : '');
setScrollPositions(settings.scrollPositions || {}); // Load scroll positions
applyColorScheme(settings.colorScheme || 'Default'); // Apply initial scheme
}).finally(() => {
@@ -2788,6 +2791,7 @@ async function createNewChat() {
onStreamOutputChange={setStreamOutput}
onAudioInputEnabledChange={setAudioInputEnabled}
onAudioInputDeviceChange={setAudioInputDeviceId}
onAudioInputLanguageChange={setAudioInputLanguage}
onLibrariesPurged={handleLibrariesPurged}
/>
)}

View File

@@ -2,6 +2,8 @@ import React, { useEffect, useState } from 'react';
import {
AUDIO_INPUT_DEVICE_ID_KEY,
AUDIO_INPUT_ENABLED_KEY,
AUDIO_INPUT_LANGUAGE_KEY,
AUDIO_INPUT_LANGUAGE_OPTIONS,
ensureAudioInputPermission,
listAudioInputDevices,
supportsAudioInputCapture,
@@ -13,6 +15,7 @@ const EMBED_MODEL_KEY = 'embedModel';
const MODEL_KEY = 'chatModel';
const STREAM_KEY = 'streamOutput';
const DEFAULT_AUDIO_INPUT_DEVICE_ID = '';
const DEFAULT_AUDIO_INPUT_LANGUAGE = '';
const DEFAULT_BACKEND_API_URL = 'http://127.0.0.1:8000';
const DEFAULT_OLLAMA_API_URL = 'http://127.0.0.1:11434';
const DEFAULT_EMBED_MODEL = 'nomic-embed-text:latest';
@@ -47,6 +50,7 @@ export default function GeneralSettings({
onBackendApiUrlChange,
onAudioInputEnabledChange,
onAudioInputDeviceChange,
onAudioInputLanguageChange,
}) {
const [backendApiUrl, setBackendApiUrl] = useState('');
const [ollamaApiUrl, setOllamaApiUrl] = useState('');
@@ -56,6 +60,7 @@ export default function GeneralSettings({
const [streamOutput, setStreamOutput] = useState(false);
const [audioInputEnabled, setAudioInputEnabled] = useState(false);
const [audioInputDeviceId, setAudioInputDeviceId] = useState(DEFAULT_AUDIO_INPUT_DEVICE_ID);
const [audioInputLanguage, setAudioInputLanguage] = useState(DEFAULT_AUDIO_INPUT_LANGUAGE);
const [audioInputDevices, setAudioInputDevices] = useState([]);
const [isRefreshingAudioDevices, setIsRefreshingAudioDevices] = useState(false);
const [audioInputStatus, setAudioInputStatus] = useState({ tone: 'neutral', message: '' });
@@ -87,6 +92,11 @@ export default function GeneralSettings({
? settings.audioInputDeviceId
: DEFAULT_AUDIO_INPUT_DEVICE_ID
);
setAudioInputLanguage(
typeof settings.audioInputLanguage === 'string'
? settings.audioInputLanguage
: DEFAULT_AUDIO_INPUT_LANGUAGE
);
setUpdateStatus(status || DEFAULT_UPDATE_STATUS);
});
@@ -268,6 +278,15 @@ export default function GeneralSettings({
}
};
const handleAudioInputLanguageChange = (event) => {
const nextLanguage = event.target.value;
setAudioInputLanguage(nextLanguage);
window.electronAPI.setSetting(AUDIO_INPUT_LANGUAGE_KEY, nextLanguage);
if (onAudioInputLanguageChange) {
onAudioInputLanguageChange(nextLanguage);
}
};
const handleCheckForUpdates = async () => {
setIsCheckingForUpdates(true);
try {
@@ -425,6 +444,18 @@ export default function GeneralSettings({
</option>
))}
</select>
<select
className="select"
value={audioInputLanguage}
onChange={handleAudioInputLanguageChange}
disabled={!audioInputSupported}
>
{AUDIO_INPUT_LANGUAGE_OPTIONS.map(language => (
<option key={language.value || 'auto'} value={language.value}>
{language.label}
</option>
))}
</select>
<button
type="button"
className="button"
@@ -437,6 +468,9 @@ export default function GeneralSettings({
{audioInputStatus.message && (
<p className={`setting-status ${audioInputStatus.tone}`}>{audioInputStatus.message}</p>
)}
<p className="setting-description">
Whisper can auto-detect the spoken language, but you can force a fixed input language here when auto-detection drifts.
</p>
</>
)}
</div>

View File

@@ -1,5 +1,22 @@
export const AUDIO_INPUT_ENABLED_KEY = 'audioInputEnabled'
export const AUDIO_INPUT_DEVICE_ID_KEY = 'audioInputDeviceId'
export const AUDIO_INPUT_LANGUAGE_KEY = 'audioInputLanguage'
export const AUDIO_INPUT_LANGUAGE_OPTIONS = [
{ value: '', label: 'Auto' },
{ value: 'de', label: 'German' },
{ value: 'en', label: 'English' },
{ value: 'fr', label: 'French' },
{ value: 'es', label: 'Spanish' },
{ value: 'it', label: 'Italian' },
{ value: 'pt', label: 'Portuguese' },
{ value: 'nl', label: 'Dutch' },
{ value: 'pl', label: 'Polish' },
{ value: 'tr', label: 'Turkish' },
{ value: 'ru', label: 'Russian' },
{ value: 'ja', label: 'Japanese' },
{ value: 'zh', label: 'Chinese' },
]
const AUDIO_RECORDER_MIME_CANDIDATES = [
'audio/webm;codecs=opus',