fitness: TTS volume control, audio ducking, and workout start/finish announcements

Add TTS volume slider (default 80%) and audio duck toggle to voice guidance settings. Announce "Workout started" when TTS initializes and speak a full workout summary (time, distance, avg pace) on finish. The finish summary reuses the existing TTS instance via handoff so it plays fully without blocking the completion screen.
2026-03-30 08:48:46 +02:00
parent 660fec44c2
commit 5bed3f3781
4 changed files with 268 additions and 49 deletions
--- a/src-tauri/gen/android/app/src/main/java/org/bocken/app/AndroidBridge.kt
+++ b/src-tauri/gen/android/app/src/main/java/org/bocken/app/AndroidBridge.kt
@@ -16,8 +16,6 @@ import java.util.Locale
 class AndroidBridge(private val context: Context) {
    private var ttsForVoices: TextToSpeech? = null
    @JavascriptInterface
    fun startLocationService(ttsConfigJson: String, startPaused: Boolean) {
        if (context is Activity) {
@@ -122,8 +120,6 @@ class AndroidBridge(private val context: Context) {
    /**
     * Returns available TTS voices as a JSON array.
     * Each entry: { "id": "...", "name": "...", "language": "en-US" }
     * This initializes a temporary TTS engine; the result is returned asynchronously
     * via a callback, but since @JavascriptInterface is synchronous we block briefly.
     */
    @JavascriptInterface
    fun getAvailableTtsVoices(): String {
--- a/src-tauri/gen/android/app/src/main/java/org/bocken/app/LocationForegroundService.kt
+++ b/src-tauri/gen/android/app/src/main/java/org/bocken/app/LocationForegroundService.kt
@@ -9,11 +9,16 @@ import android.content.Context
 import android.content.Intent
 import android.location.LocationListener
 import android.location.LocationManager
 import android.media.AudioAttributes
 import android.media.AudioFocusRequest
 import android.media.AudioManager
 import android.os.Build
 import android.os.Bundle
 import android.os.Handler
 import android.os.IBinder
 import android.os.Looper
 import android.speech.tts.TextToSpeech
 import android.speech.tts.UtteranceProgressListener
 import android.util.Log
 import org.json.JSONArray
 import org.json.JSONObject
@@ -55,6 +60,11 @@ class LocationForegroundService : Service(), TextToSpeech.OnInitListener {
    private var intervalStartTimeMs: Long = 0L
    private var intervalsComplete: Boolean = false
    // Audio focus / ducking
    private var audioManager: AudioManager? = null
    private var audioFocusRequest: AudioFocusRequest? = null
    private var hasAudioFocus = false
    data class IntervalStep(
        val label: String,
        val durationType: String, // "distance" or "time"
@@ -68,6 +78,8 @@ class LocationForegroundService : Service(), TextToSpeech.OnInitListener {
        val metrics: List<String> = listOf("totalTime", "totalDistance", "avgPace"),
        val language: String = "en",
        val voiceId: String? = null,
        val ttsVolume: Float = 0.8f,           // 0.0–1.0 relative TTS volume
        val audioDuck: Boolean = false,        // duck other audio during TTS
        val intervals: List<IntervalStep> = emptyList()
    ) {
        companion object {
@@ -100,6 +112,8 @@ class LocationForegroundService : Service(), TextToSpeech.OnInitListener {
                        metrics = metrics,
                        language = obj.optString("language", "en"),
                        voiceId = obj.optString("voiceId", null),
                        ttsVolume = obj.optDouble("ttsVolume", 0.8).toFloat().coerceIn(0f, 1f),
                        audioDuck = obj.optBoolean("audioDuck", false),
                        intervals = intervals
                    )
                } catch (_: Exception) {
@@ -246,13 +260,11 @@ class LocationForegroundService : Service(), TextToSpeech.OnInitListener {
            splitDistanceAtLastAnnouncement = 0.0
            splitTimeAtLastAnnouncement = startTimeMs
            // Log available TTS engines
            val dummyTts = TextToSpeech(applicationContext, null)
            val engines = dummyTts.engines
            Log.d(TAG, "Available TTS engines: ${engines.map { "${it.label} (${it.name})" }}")
            dummyTts.shutdown()
            // Try with explicit engine if available
            if (engines.isNotEmpty()) {
                val engineName = engines[0].name
                Log.d(TAG, "Trying TTS with explicit engine: $engineName")
@@ -268,6 +280,45 @@ class LocationForegroundService : Service(), TextToSpeech.OnInitListener {
    // --- TTS ---
    /** Called when TTS is ready — either immediately (pre-warmed) or from onInit (cold start). */
    private fun onTtsReady() {
        val config = ttsConfig ?: return
        Log.d(TAG, "TTS ready! triggerType=${config.triggerType}, triggerValue=${config.triggerValue}")
        // Set specific voice if requested
        if (!config.voiceId.isNullOrEmpty()) {
            tts?.voices?.find { it.name == config.voiceId }?.let { voice ->
                tts?.voice = voice
            }
        }
        // Announce workout started
        speakWithConfig("Workout started", "workout_started")
        // Announce first interval step if intervals are configured (queue after "Workout started")
        if (intervalSteps.isNotEmpty() && !intervalsComplete) {
            val first = intervalSteps[0]
            val durationText = if (first.durationType == "distance") {
                "${first.durationValue.toInt()} meters"
            } else {
                val secs = first.durationValue.toInt()
                if (secs >= 60) {
                    val m = secs / 60
                    val s = secs % 60
                    if (s > 0) "$m minutes $s seconds" else "$m minutes"
                } else {
                    "$secs seconds"
                }
            }
            speakWithConfig("${first.label}. $durationText", "interval_announcement", flush = false)
        }
        // Set up time-based trigger if configured
        if (config.triggerType == "time") {
            startTimeTrigger(config.triggerValue)
        }
    }
    override fun onInit(status: Int) {
        Log.d(TAG, "TTS onInit status=$status (SUCCESS=${TextToSpeech.SUCCESS})")
        if (status == TextToSpeech.SUCCESS) {
@@ -275,44 +326,79 @@ class LocationForegroundService : Service(), TextToSpeech.OnInitListener {
            val locale = Locale.forLanguageTag(config.language)
            val langResult = tts?.setLanguage(locale)
            Log.d(TAG, "TTS setLanguage($locale) result=$langResult")
            // Set specific voice if requested
            if (!config.voiceId.isNullOrEmpty()) {
                tts?.voices?.find { it.name == config.voiceId }?.let { voice ->
                    tts?.voice = voice
                }
            }
            ttsReady = true
-            Log.d(TAG, "TTS ready! triggerType=${config.triggerType}, triggerValue=${config.triggerValue}")
+            onTtsReady()
            // Announce first interval step if intervals are configured
            if (intervalSteps.isNotEmpty() && !intervalsComplete) {
                val first = intervalSteps[0]
                val durationText = if (first.durationType == "distance") {
                    "${first.durationValue.toInt()} meters"
                } else {
                    val secs = first.durationValue.toInt()
                    if (secs >= 60) {
                        val m = secs / 60
                        val s = secs % 60
                        if (s > 0) "$m minutes $s seconds" else "$m minutes"
                    } else {
                        "$secs seconds"
                    }
                }
                announceIntervalTransition("${first.label}. $durationText")
            }
            // Set up time-based trigger if configured
            if (config.triggerType == "time") {
                startTimeTrigger(config.triggerValue)
            }
        } else {
            Log.e(TAG, "TTS init FAILED with status=$status")
        }
    }
    private fun requestAudioFocus() {
        val config = ttsConfig ?: return
        if (!config.audioDuck) return
        if (hasAudioFocus) return
        audioManager = audioManager ?: getSystemService(Context.AUDIO_SERVICE) as AudioManager
        if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.O) {
            val focusReq = AudioFocusRequest.Builder(AudioManager.AUDIOFOCUS_GAIN_TRANSIENT_MAY_DUCK)
                .setAudioAttributes(
                    AudioAttributes.Builder()
                        .setUsage(AudioAttributes.USAGE_ASSISTANCE_NAVIGATION_GUIDANCE)
                        .setContentType(AudioAttributes.CONTENT_TYPE_SPEECH)
                        .build()
                )
                .setOnAudioFocusChangeListener { }
                .build()
            audioFocusRequest = focusReq
            val result = audioManager?.requestAudioFocus(focusReq)
            hasAudioFocus = result == AudioManager.AUDIOFOCUS_REQUEST_GRANTED
            Log.d(TAG, "Audio focus request (duck): granted=$hasAudioFocus")
        } else {
            @Suppress("DEPRECATION")
            val result = audioManager?.requestAudioFocus(
                { },
                AudioManager.STREAM_MUSIC,
                AudioManager.AUDIOFOCUS_GAIN_TRANSIENT_MAY_DUCK
            )
            hasAudioFocus = result == AudioManager.AUDIOFOCUS_REQUEST_GRANTED
        }
    }
    private fun abandonAudioFocus() {
        if (!hasAudioFocus) return
        if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.O) {
            audioFocusRequest?.let { audioManager?.abandonAudioFocusRequest(it) }
        } else {
            @Suppress("DEPRECATION")
            audioManager?.abandonAudioFocus { }
        }
        hasAudioFocus = false
    }
    /** Speak text with configured volume; requests/abandons audio focus for ducking. */
    private fun speakWithConfig(text: String, utteranceId: String, flush: Boolean = true) {
        if (!ttsReady) return
        val config = ttsConfig ?: return
        val queueMode = if (flush) TextToSpeech.QUEUE_FLUSH else TextToSpeech.QUEUE_ADD
        requestAudioFocus()
        val params = Bundle().apply {
            putFloat(TextToSpeech.Engine.KEY_PARAM_VOLUME, config.ttsVolume)
        }
        // Set up listener to abandon audio focus after utterance completes
        tts?.setOnUtteranceProgressListener(object : UtteranceProgressListener() {
            override fun onStart(id: String?) {}
            override fun onDone(id: String?) { abandonAudioFocus() }
            @Deprecated("Deprecated in Java")
            override fun onError(id: String?) { abandonAudioFocus() }
        })
        val result = tts?.speak(text, queueMode, params, utteranceId)
        Log.d(TAG, "speakWithConfig($utteranceId) result=$result vol=${config.ttsVolume} duck=${config.audioDuck}")
    }
    private fun startTimeTrigger(intervalMinutes: Double) {
        val intervalMs = (intervalMinutes * 60 * 1000).toLong()
        Log.d(TAG, "Starting time trigger: every ${intervalMs}ms (${intervalMinutes} min)")
@@ -428,7 +514,7 @@ class LocationForegroundService : Service(), TextToSpeech.OnInitListener {
    private fun announceIntervalTransition(text: String) {
        if (!ttsReady) return
        Log.d(TAG, "Interval announcement: $text")
-        tts?.speak(text, TextToSpeech.QUEUE_FLUSH, null, "interval_announcement")
+        speakWithConfig(text, "interval_announcement")
    }
    private fun announceMetrics() {
@@ -493,8 +579,7 @@ class LocationForegroundService : Service(), TextToSpeech.OnInitListener {
        if (parts.isNotEmpty()) {
            val text = parts.joinToString(". ")
            Log.d(TAG, "Announcing: $text")
-            val result = tts?.speak(text, TextToSpeech.QUEUE_FLUSH, null, "workout_announcement")
+            speakWithConfig(text, "workout_announcement")
            Log.d(TAG, "TTS speak() result=$result (SUCCESS=${TextToSpeech.SUCCESS})")
        } else {
            Log.d(TAG, "announceMetrics: no parts to announce")
        }
@@ -623,22 +708,105 @@ class LocationForegroundService : Service(), TextToSpeech.OnInitListener {
        )
    }
    /**
     * Build the finish summary text from current stats.
     * Must be called while service state is still valid (before clearing fields).
     */
    private fun buildFinishSummaryText(): String? {
        val config = ttsConfig ?: return null
        if (!config.enabled) return null
        val activeSecs = activeElapsedSecs()
        val h = activeSecs / 3600
        val m = (activeSecs % 3600) / 60
        val s = activeSecs % 60
        val parts = mutableListOf<String>()
        parts.add("Workout finished")
        val timeStr = if (h > 0) "$h hours $m minutes" else "$m minutes $s seconds"
        parts.add("Total time: $timeStr")
        if (totalDistanceKm > 0.01) {
            parts.add("Distance: ${"%.2f".format(totalDistanceKm)} kilometers")
        }
        if (totalDistanceKm > 0.01) {
            val avgPace = (activeSecs / 60.0) / totalDistanceKm
            val mins = avgPace.toInt()
            val secs = ((avgPace - mins) * 60).toInt()
            parts.add("Average pace: $mins minutes $secs seconds per kilometer")
        }
        return parts.joinToString(". ")
    }
    override fun onDestroy() {
        // Snapshot summary text while stats are still valid
        val summaryText = buildFinishSummaryText()
        val config = ttsConfig
        // Stop time-based TTS triggers
        ttsTimeRunnable?.let { ttsTimeHandler?.removeCallbacks(it) }
        ttsTimeHandler = null
        ttsTimeRunnable = null
        // Hand off the existing TTS instance for the finish summary.
        // We do NOT call tts?.stop() or tts?.shutdown() here — the utterance
        // listener will clean up after the summary finishes speaking.
        val finishTts = tts
        tts = null
        ttsReady = false
        tracking = false
        paused = false
        instance = null
        locationListener?.let { locationManager?.removeUpdates(it) }
        locationListener = null
        locationManager = null
        abandonAudioFocus()
-        // Clean up TTS
+        // Speak finish summary using the handed-off TTS instance (already initialized)
-        ttsTimeRunnable?.let { ttsTimeHandler?.removeCallbacks(it) }
+        if (summaryText != null && finishTts != null && config != null) {
-        ttsTimeHandler = null
+            Log.d(TAG, "Finish summary: $summaryText")
-        ttsTimeRunnable = null
+
-        tts?.stop()
+            // Audio focus for ducking
-        tts?.shutdown()
+            val am = getSystemService(Context.AUDIO_SERVICE) as AudioManager
-        tts = null
+            var focusReq: AudioFocusRequest? = null
-        ttsReady = false
+            if (config.audioDuck && Build.VERSION.SDK_INT >= Build.VERSION_CODES.O) {
                focusReq = AudioFocusRequest.Builder(AudioManager.AUDIOFOCUS_GAIN_TRANSIENT_MAY_DUCK)
                    .setAudioAttributes(
                        AudioAttributes.Builder()
                            .setUsage(AudioAttributes.USAGE_ASSISTANCE_NAVIGATION_GUIDANCE)
                            .setContentType(AudioAttributes.CONTENT_TYPE_SPEECH)
                            .build()
                    )
                    .setOnAudioFocusChangeListener { }
                    .build()
                am.requestAudioFocus(focusReq)
            }
            finishTts.setOnUtteranceProgressListener(object : UtteranceProgressListener() {
                override fun onStart(id: String?) {}
                override fun onDone(id: String?) { cleanup() }
                @Deprecated("Deprecated in Java")
                override fun onError(id: String?) { cleanup() }
                private fun cleanup() {
                    if (focusReq != null && Build.VERSION.SDK_INT >= Build.VERSION_CODES.O) {
                        am.abandonAudioFocusRequest(focusReq)
                    }
                    finishTts.shutdown()
                }
            })
            val params = Bundle().apply {
                putFloat(TextToSpeech.Engine.KEY_PARAM_VOLUME, config.ttsVolume)
            }
            finishTts.speak(summaryText, TextToSpeech.QUEUE_FLUSH, params, "workout_finished")
        } else {
            finishTts?.shutdown()
        }
        super.onDestroy()
    }
--- a/src/lib/js/gps.svelte.ts
+++ b/src/lib/js/gps.svelte.ts
@@ -26,6 +26,8 @@ export interface VoiceGuidanceConfig {
 	metrics: string[];
 	language: string;
 	voiceId?: string;
 	ttsVolume?: number; // 0.0–1.0, relative TTS volume
 	audioDuck?: boolean; // duck other audio during TTS
 	intervals?: IntervalStep[];
 }
--- a/src/routes/fitness/[workout=fitnessWorkout]/[active=fitnessActive]/+page.svelte
+++ b/src/routes/fitness/[workout=fitnessWorkout]/[active=fitnessActive]/+page.svelte
@@ -47,6 +47,8 @@
 	let vgTriggerType = $state('distance');
 	let vgTriggerValue = $state(1);
 	let vgMetrics = $state(['totalTime', 'totalDistance', 'avgPace']);
 	let vgVolume = $state(0.8);
 	let vgAudioDuck = $state(false);
 	const vgLanguage = $derived(lang);
 	let vgShowPanel = $state(false);
 	let vgLoaded = $state(false);
@@ -58,6 +60,8 @@
 			triggerType: vgTriggerType,
 			triggerValue: vgTriggerValue,
 			metrics: vgMetrics,
 			volume: vgVolume,
 			audioDuck: vgAudioDuck,
 		};
 		if (!vgLoaded) return;
 		localStorage.setItem('vg_settings', JSON.stringify(settings));
@@ -196,6 +200,8 @@
 			triggerValue: vgTriggerValue,
 			metrics: vgEnabled ? vgMetrics : [],
 			language: vgLanguage,
 			ttsVolume: vgVolume,
 			audioDuck: vgAudioDuck,
 			...(hasIntervals ? { intervals: selectedInterval.steps } : {})
 		};
 	}
@@ -367,6 +373,8 @@
 				if (s.triggerType === 'distance' || s.triggerType === 'time') vgTriggerType = s.triggerType;
 				if (typeof s.triggerValue === 'number' && s.triggerValue > 0) vgTriggerValue = s.triggerValue;
 				if (Array.isArray(s.metrics)) vgMetrics = s.metrics;
 				if (typeof s.volume === 'number' && s.volume >= 0 && s.volume <= 1) vgVolume = s.volume;
 				if (typeof s.audioDuck === 'boolean') vgAudioDuck = s.audioDuck;
 			}
 		} catch {}
 		vgLoaded = true;
@@ -1150,6 +1158,23 @@
 								</div>
 							</div>
 							<div class="vg-group">
 								<label class="vg-label">
 									TTS Volume
 									<span class="vg-volume-value">{Math.round(vgVolume * 100)}%</span>
 								</label>
 								<input
 									class="vg-range"
 									type="range"
 									min="0"
 									max="1"
 									step="0.05"
 									bind:value={vgVolume}
 								/>
 							</div>
 							<Toggle bind:checked={vgAudioDuck} label="Duck other audio during TTS" />
 						{/if}
 						{/if}
 					</div>
@@ -1375,6 +1400,24 @@
 										{/each}
 									</div>
 								</div>
 								<div class="vg-group">
 									<label class="vg-label">
 										TTS Volume
 										<span class="vg-volume-value">{Math.round(vgVolume * 100)}%</span>
 									</label>
 									<input
 										class="vg-range"
 										type="range"
 										min="0"
 										max="1"
 										step="0.05"
 										bind:value={vgVolume}
 									/>
 								</div>
 								<Toggle bind:checked={vgAudioDuck} label="Duck other audio during TTS" />
 							{/if}
 							{/if}
 						</div>
@@ -2013,6 +2056,16 @@
 		text-transform: uppercase;
 		letter-spacing: 0.05em;
 		color: var(--color-text-secondary);
 		display: flex;
 		justify-content: space-between;
 		align-items: center;
 	}
 	.vg-volume-value {
 		font-variant-numeric: tabular-nums;
 	}
 	.vg-range {
 		width: 100%;
 		accent-color: var(--nord10);
 	}
 	.vg-trigger-row {
 		display: flex;