auto-git:
[add] dist/assets/index-DifKT69O.js [change] backend/main.py [change] backend/schemas.py [change] backend/whisper_admin.py [change] dist/index.html [change] electron/main.cjs [change] src/App.jsx [change] src/GeneralSettings.jsx [change] src/audioInput.js [unlink] dist/assets/index-Dm7DZNSo.js
This commit is contained in:
@@ -159,6 +159,7 @@ async def transcribe_audio_route(req: schemas.AudioTranscriptionRequest):
|
||||
audio_bytes,
|
||||
mime_type,
|
||||
req.model or DEFAULT_WHISPER_MODEL,
|
||||
req.language,
|
||||
)
|
||||
except RuntimeError as exc:
|
||||
raise HTTPException(status_code=400, detail=str(exc)) from exc
|
||||
|
||||
@@ -84,6 +84,7 @@ class AudioTranscriptionRequest(BaseModel):
|
||||
mime_type: str
|
||||
audio_base64: str
|
||||
model: Optional[str] = None
|
||||
language: Optional[str] = None
|
||||
|
||||
|
||||
class AudioTranscriptionResponse(BaseModel):
|
||||
|
||||
@@ -241,6 +241,7 @@ def transcribe_audio_bytes(
|
||||
audio_bytes: bytes,
|
||||
mime_type: str,
|
||||
model_name: str = DEFAULT_WHISPER_MODEL,
|
||||
language: Optional[str] = None,
|
||||
) -> Dict[str, Any]:
|
||||
if not audio_bytes:
|
||||
raise RuntimeError("Recorded audio was empty.")
|
||||
@@ -256,7 +257,14 @@ def transcribe_audio_bytes(
|
||||
_convert_audio_to_wav(input_path, wav_path)
|
||||
source_path = wav_path
|
||||
|
||||
result = model.transcribe(str(source_path), task="transcribe", fp16=device == "cuda")
|
||||
transcription_options = {
|
||||
"task": "transcribe",
|
||||
"fp16": device == "cuda",
|
||||
}
|
||||
if language:
|
||||
transcription_options["language"] = str(language).strip().lower()
|
||||
|
||||
result = model.transcribe(str(source_path), **transcription_options)
|
||||
return {
|
||||
"model": model_name,
|
||||
"device": device,
|
||||
|
||||
File diff suppressed because one or more lines are too long
2
dist/index.html
vendored
2
dist/index.html
vendored
@@ -5,7 +5,7 @@
|
||||
<meta charset="UTF-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>LLM Desktop</title>
|
||||
<script type="module" crossorigin src="/assets/index-Dm7DZNSo.js"></script>
|
||||
<script type="module" crossorigin src="/assets/index-DifKT69O.js"></script>
|
||||
<link rel="stylesheet" crossorigin href="/assets/index-BFOWWhCS.css">
|
||||
</head>
|
||||
<body>
|
||||
|
||||
@@ -28,6 +28,7 @@ const MAX_UI_SCALE = 1.3
|
||||
const DEFAULT_OPEN_DEVTOOLS_ON_STARTUP = false
|
||||
const DEFAULT_AUDIO_INPUT_ENABLED = true
|
||||
const DEFAULT_AUDIO_INPUT_DEVICE_ID = ''
|
||||
const DEFAULT_AUDIO_INPUT_LANGUAGE = ''
|
||||
|
||||
const defaultSettings = {
|
||||
backendApiUrl: DEFAULT_BACKEND_API_URL,
|
||||
@@ -38,6 +39,7 @@ const defaultSettings = {
|
||||
openDevToolsOnStartup: DEFAULT_OPEN_DEVTOOLS_ON_STARTUP,
|
||||
audioInputEnabled: DEFAULT_AUDIO_INPUT_ENABLED,
|
||||
audioInputDeviceId: DEFAULT_AUDIO_INPUT_DEVICE_ID,
|
||||
audioInputLanguage: DEFAULT_AUDIO_INPUT_LANGUAGE,
|
||||
chatModel: 'llama3',
|
||||
}
|
||||
|
||||
@@ -92,6 +94,7 @@ function migrateSettings(rawSettings) {
|
||||
nextSettings.openDevToolsOnStartup = normalizeOpenDevToolsOnStartup(nextSettings.openDevToolsOnStartup)
|
||||
nextSettings.audioInputEnabled = normalizeBooleanSetting(nextSettings.audioInputEnabled)
|
||||
nextSettings.audioInputDeviceId = String(nextSettings.audioInputDeviceId || '').trim()
|
||||
nextSettings.audioInputLanguage = String(nextSettings.audioInputLanguage || '').trim().toLowerCase()
|
||||
|
||||
return { nextSettings, migrated }
|
||||
}
|
||||
@@ -553,6 +556,8 @@ ipcMain.handle('set-setting', (event, key, value) => {
|
||||
appSettings[key] = normalizeBooleanSetting(value)
|
||||
} else if (key === 'audioInputDeviceId') {
|
||||
appSettings[key] = String(value || '').trim()
|
||||
} else if (key === 'audioInputLanguage') {
|
||||
appSettings[key] = String(value || '').trim().toLowerCase()
|
||||
} else {
|
||||
appSettings[key] = value
|
||||
}
|
||||
@@ -572,6 +577,7 @@ ipcMain.handle('update-settings', (event, settings) => {
|
||||
appSettings.openDevToolsOnStartup = normalizeOpenDevToolsOnStartup(appSettings.openDevToolsOnStartup)
|
||||
appSettings.audioInputEnabled = normalizeBooleanSetting(appSettings.audioInputEnabled)
|
||||
appSettings.audioInputDeviceId = String(appSettings.audioInputDeviceId || '').trim()
|
||||
appSettings.audioInputLanguage = String(appSettings.audioInputLanguage || '').trim().toLowerCase()
|
||||
saveSettings()
|
||||
if (Object.prototype.hasOwnProperty.call(settings, 'uiScale')) {
|
||||
applyUiScaleToAllWindows()
|
||||
|
||||
@@ -295,6 +295,7 @@ export default function App() {
|
||||
const imageDragDepthRef = useRef(0)
|
||||
const [audioInputEnabled, setAudioInputEnabled] = useState(false)
|
||||
const [audioInputDeviceId, setAudioInputDeviceId] = useState('')
|
||||
const [audioInputLanguage, setAudioInputLanguage] = useState('')
|
||||
const [isRecordingAudio, setIsRecordingAudio] = useState(false)
|
||||
const [isTranscribingAudio, setIsTranscribingAudio] = useState(false)
|
||||
const [audioRecordingMs, setAudioRecordingMs] = useState(0)
|
||||
@@ -554,6 +555,7 @@ export default function App() {
|
||||
body: JSON.stringify({
|
||||
mime_type: mimeType || detectedMimeType || 'audio/webm',
|
||||
audio_base64: payload,
|
||||
language: audioInputLanguage || null,
|
||||
}),
|
||||
})
|
||||
const data = await expectBackendJson(response)
|
||||
@@ -1144,6 +1146,7 @@ async function regenerateFromIndex(index, overrideUserText = null) {
|
||||
setStreamOutput(settings.streamOutput || false);
|
||||
setAudioInputEnabled(settings.audioInputEnabled === true);
|
||||
setAudioInputDeviceId(typeof settings.audioInputDeviceId === 'string' ? settings.audioInputDeviceId : '');
|
||||
setAudioInputLanguage(typeof settings.audioInputLanguage === 'string' ? settings.audioInputLanguage : '');
|
||||
setScrollPositions(settings.scrollPositions || {}); // Load scroll positions
|
||||
applyColorScheme(settings.colorScheme || 'Default'); // Apply initial scheme
|
||||
}).finally(() => {
|
||||
@@ -2788,6 +2791,7 @@ async function createNewChat() {
|
||||
onStreamOutputChange={setStreamOutput}
|
||||
onAudioInputEnabledChange={setAudioInputEnabled}
|
||||
onAudioInputDeviceChange={setAudioInputDeviceId}
|
||||
onAudioInputLanguageChange={setAudioInputLanguage}
|
||||
onLibrariesPurged={handleLibrariesPurged}
|
||||
/>
|
||||
)}
|
||||
|
||||
@@ -2,6 +2,8 @@ import React, { useEffect, useState } from 'react';
|
||||
import {
|
||||
AUDIO_INPUT_DEVICE_ID_KEY,
|
||||
AUDIO_INPUT_ENABLED_KEY,
|
||||
AUDIO_INPUT_LANGUAGE_KEY,
|
||||
AUDIO_INPUT_LANGUAGE_OPTIONS,
|
||||
ensureAudioInputPermission,
|
||||
listAudioInputDevices,
|
||||
supportsAudioInputCapture,
|
||||
@@ -13,6 +15,7 @@ const EMBED_MODEL_KEY = 'embedModel';
|
||||
const MODEL_KEY = 'chatModel';
|
||||
const STREAM_KEY = 'streamOutput';
|
||||
const DEFAULT_AUDIO_INPUT_DEVICE_ID = '';
|
||||
const DEFAULT_AUDIO_INPUT_LANGUAGE = '';
|
||||
const DEFAULT_BACKEND_API_URL = 'http://127.0.0.1:8000';
|
||||
const DEFAULT_OLLAMA_API_URL = 'http://127.0.0.1:11434';
|
||||
const DEFAULT_EMBED_MODEL = 'nomic-embed-text:latest';
|
||||
@@ -47,6 +50,7 @@ export default function GeneralSettings({
|
||||
onBackendApiUrlChange,
|
||||
onAudioInputEnabledChange,
|
||||
onAudioInputDeviceChange,
|
||||
onAudioInputLanguageChange,
|
||||
}) {
|
||||
const [backendApiUrl, setBackendApiUrl] = useState('');
|
||||
const [ollamaApiUrl, setOllamaApiUrl] = useState('');
|
||||
@@ -56,6 +60,7 @@ export default function GeneralSettings({
|
||||
const [streamOutput, setStreamOutput] = useState(false);
|
||||
const [audioInputEnabled, setAudioInputEnabled] = useState(false);
|
||||
const [audioInputDeviceId, setAudioInputDeviceId] = useState(DEFAULT_AUDIO_INPUT_DEVICE_ID);
|
||||
const [audioInputLanguage, setAudioInputLanguage] = useState(DEFAULT_AUDIO_INPUT_LANGUAGE);
|
||||
const [audioInputDevices, setAudioInputDevices] = useState([]);
|
||||
const [isRefreshingAudioDevices, setIsRefreshingAudioDevices] = useState(false);
|
||||
const [audioInputStatus, setAudioInputStatus] = useState({ tone: 'neutral', message: '' });
|
||||
@@ -87,6 +92,11 @@ export default function GeneralSettings({
|
||||
? settings.audioInputDeviceId
|
||||
: DEFAULT_AUDIO_INPUT_DEVICE_ID
|
||||
);
|
||||
setAudioInputLanguage(
|
||||
typeof settings.audioInputLanguage === 'string'
|
||||
? settings.audioInputLanguage
|
||||
: DEFAULT_AUDIO_INPUT_LANGUAGE
|
||||
);
|
||||
setUpdateStatus(status || DEFAULT_UPDATE_STATUS);
|
||||
});
|
||||
|
||||
@@ -268,6 +278,15 @@ export default function GeneralSettings({
|
||||
}
|
||||
};
|
||||
|
||||
const handleAudioInputLanguageChange = (event) => {
|
||||
const nextLanguage = event.target.value;
|
||||
setAudioInputLanguage(nextLanguage);
|
||||
window.electronAPI.setSetting(AUDIO_INPUT_LANGUAGE_KEY, nextLanguage);
|
||||
if (onAudioInputLanguageChange) {
|
||||
onAudioInputLanguageChange(nextLanguage);
|
||||
}
|
||||
};
|
||||
|
||||
const handleCheckForUpdates = async () => {
|
||||
setIsCheckingForUpdates(true);
|
||||
try {
|
||||
@@ -425,6 +444,18 @@ export default function GeneralSettings({
|
||||
</option>
|
||||
))}
|
||||
</select>
|
||||
<select
|
||||
className="select"
|
||||
value={audioInputLanguage}
|
||||
onChange={handleAudioInputLanguageChange}
|
||||
disabled={!audioInputSupported}
|
||||
>
|
||||
{AUDIO_INPUT_LANGUAGE_OPTIONS.map(language => (
|
||||
<option key={language.value || 'auto'} value={language.value}>
|
||||
{language.label}
|
||||
</option>
|
||||
))}
|
||||
</select>
|
||||
<button
|
||||
type="button"
|
||||
className="button"
|
||||
@@ -437,6 +468,9 @@ export default function GeneralSettings({
|
||||
{audioInputStatus.message && (
|
||||
<p className={`setting-status ${audioInputStatus.tone}`}>{audioInputStatus.message}</p>
|
||||
)}
|
||||
<p className="setting-description">
|
||||
Whisper can auto-detect the spoken language, but you can force a fixed input language here when auto-detection drifts.
|
||||
</p>
|
||||
</>
|
||||
)}
|
||||
</div>
|
||||
|
||||
@@ -1,5 +1,22 @@
|
||||
export const AUDIO_INPUT_ENABLED_KEY = 'audioInputEnabled'
|
||||
export const AUDIO_INPUT_DEVICE_ID_KEY = 'audioInputDeviceId'
|
||||
export const AUDIO_INPUT_LANGUAGE_KEY = 'audioInputLanguage'
|
||||
|
||||
export const AUDIO_INPUT_LANGUAGE_OPTIONS = [
|
||||
{ value: '', label: 'Auto' },
|
||||
{ value: 'de', label: 'German' },
|
||||
{ value: 'en', label: 'English' },
|
||||
{ value: 'fr', label: 'French' },
|
||||
{ value: 'es', label: 'Spanish' },
|
||||
{ value: 'it', label: 'Italian' },
|
||||
{ value: 'pt', label: 'Portuguese' },
|
||||
{ value: 'nl', label: 'Dutch' },
|
||||
{ value: 'pl', label: 'Polish' },
|
||||
{ value: 'tr', label: 'Turkish' },
|
||||
{ value: 'ru', label: 'Russian' },
|
||||
{ value: 'ja', label: 'Japanese' },
|
||||
{ value: 'zh', label: 'Chinese' },
|
||||
]
|
||||
|
||||
const AUDIO_RECORDER_MIME_CANDIDATES = [
|
||||
'audio/webm;codecs=opus',
|
||||
|
||||
Reference in New Issue
Block a user