From 5bed3f3781c5bdf0da695c9102ff3392c2377a58 Mon Sep 17 00:00:00 2001
From: Alexander Bocken <alexander@bocken.org>
Date: Mon, 30 Mar 2026 08:48:46 +0200
Subject: [PATCH] fitness: TTS volume control, audio ducking, and workout
 start/finish announcements

Add TTS volume slider (default 80%) and audio duck toggle to voice
guidance settings. Announce "Workout started" when TTS initializes and
speak a full workout summary (time, distance, avg pace) on finish.
The finish summary reuses the existing TTS instance via handoff so it
plays fully without blocking the completion screen.
---
 .../main/java/org/bocken/app/AndroidBridge.kt |   4 -
 .../bocken/app/LocationForegroundService.kt   | 258 +++++++++++++++---
 src/lib/js/gps.svelte.ts                      |   2 +
 .../[active=fitnessActive]/+page.svelte       |  53 ++++
 4 files changed, 268 insertions(+), 49 deletions(-)
diff --git a/src-tauri/gen/android/app/src/main/java/org/bocken/app/AndroidBridge.kt b/src-tauri/gen/android/app/src/main/java/org/bocken/app/AndroidBridge.kt
index 3f1df67..598f9c8 100644
--- a/src-tauri/gen/android/app/src/main/java/org/bocken/app/AndroidBridge.kt
+++ b/src-tauri/gen/android/app/src/main/java/org/bocken/app/AndroidBridge.kt
@@ -16,8 +16,6 @@ import java.util.Locale
 
 class AndroidBridge(private val context: Context) {
 
-    private var ttsForVoices: TextToSpeech? = null
-
     @JavascriptInterface
     fun startLocationService(ttsConfigJson: String, startPaused: Boolean) {
         if (context is Activity) {
@@ -122,8 +120,6 @@ class AndroidBridge(private val context: Context) {
     /**
      * Returns available TTS voices as a JSON array.
      * Each entry: { "id": "...", "name": "...", "language": "en-US" }
-     * This initializes a temporary TTS engine; the result is returned asynchronously
-     * via a callback, but since @JavascriptInterface is synchronous we block briefly.
      */
     @JavascriptInterface
     fun getAvailableTtsVoices(): String {
diff --git a/src-tauri/gen/android/app/src/main/java/org/bocken/app/LocationForegroundService.kt b/src-tauri/gen/android/app/src/main/java/org/bocken/app/LocationForegroundService.kt
index dd9ec7b..a718829 100644
--- a/src-tauri/gen/android/app/src/main/java/org/bocken/app/LocationForegroundService.kt
+++ b/src-tauri/gen/android/app/src/main/java/org/bocken/app/LocationForegroundService.kt
@@ -9,11 +9,16 @@ import android.content.Context
 import android.content.Intent
 import android.location.LocationListener
 import android.location.LocationManager
+import android.media.AudioAttributes
+import android.media.AudioFocusRequest
+import android.media.AudioManager
 import android.os.Build
+import android.os.Bundle
 import android.os.Handler
 import android.os.IBinder
 import android.os.Looper
 import android.speech.tts.TextToSpeech
+import android.speech.tts.UtteranceProgressListener
 import android.util.Log
 import org.json.JSONArray
 import org.json.JSONObject
@@ -55,6 +60,11 @@ class LocationForegroundService : Service(), TextToSpeech.OnInitListener {
     private var intervalStartTimeMs: Long = 0L
     private var intervalsComplete: Boolean = false
 
+    // Audio focus / ducking
+    private var audioManager: AudioManager? = null
+    private var audioFocusRequest: AudioFocusRequest? = null
+    private var hasAudioFocus = false
+
     data class IntervalStep(
         val label: String,
         val durationType: String, // "distance" or "time"
@@ -68,6 +78,8 @@ class LocationForegroundService : Service(), TextToSpeech.OnInitListener {
         val metrics: List<String> = listOf("totalTime", "totalDistance", "avgPace"),
         val language: String = "en",
         val voiceId: String? = null,
+        val ttsVolume: Float = 0.8f,           // 0.0–1.0 relative TTS volume
+        val audioDuck: Boolean = false,        // duck other audio during TTS
         val intervals: List<IntervalStep> = emptyList()
     ) {
         companion object {
@@ -100,6 +112,8 @@ class LocationForegroundService : Service(), TextToSpeech.OnInitListener {
                         metrics = metrics,
                         language = obj.optString("language", "en"),
                         voiceId = obj.optString("voiceId", null),
+                        ttsVolume = obj.optDouble("ttsVolume", 0.8).toFloat().coerceIn(0f, 1f),
+                        audioDuck = obj.optBoolean("audioDuck", false),
                         intervals = intervals
                     )
                 } catch (_: Exception) {
@@ -246,13 +260,11 @@ class LocationForegroundService : Service(), TextToSpeech.OnInitListener {
             splitDistanceAtLastAnnouncement = 0.0
             splitTimeAtLastAnnouncement = startTimeMs
 
-            // Log available TTS engines
             val dummyTts = TextToSpeech(applicationContext, null)
             val engines = dummyTts.engines
             Log.d(TAG, "Available TTS engines: ${engines.map { "${it.label} (${it.name})" }}")
             dummyTts.shutdown()
 
-            // Try with explicit engine if available
             if (engines.isNotEmpty()) {
                 val engineName = engines[0].name
                 Log.d(TAG, "Trying TTS with explicit engine: $engineName")
@@ -268,6 +280,45 @@ class LocationForegroundService : Service(), TextToSpeech.OnInitListener {
 
     // --- TTS ---
 
+    /** Called when TTS is ready — either immediately (pre-warmed) or from onInit (cold start). */
+    private fun onTtsReady() {
+        val config = ttsConfig ?: return
+        Log.d(TAG, "TTS ready! triggerType=${config.triggerType}, triggerValue=${config.triggerValue}")
+
+        // Set specific voice if requested
+        if (!config.voiceId.isNullOrEmpty()) {
+            tts?.voices?.find { it.name == config.voiceId }?.let { voice ->
+                tts?.voice = voice
+            }
+        }
+
+        // Announce workout started
+        speakWithConfig("Workout started", "workout_started")
+
+        // Announce first interval step if intervals are configured (queue after "Workout started")
+        if (intervalSteps.isNotEmpty() && !intervalsComplete) {
+            val first = intervalSteps[0]
+            val durationText = if (first.durationType == "distance") {
+                "${first.durationValue.toInt()} meters"
+            } else {
+                val secs = first.durationValue.toInt()
+                if (secs >= 60) {
+                    val m = secs / 60
+                    val s = secs % 60
+                    if (s > 0) "$m minutes $s seconds" else "$m minutes"
+                } else {
+                    "$secs seconds"
+                }
+            }
+            speakWithConfig("${first.label}. $durationText", "interval_announcement", flush = false)
+        }
+
+        // Set up time-based trigger if configured
+        if (config.triggerType == "time") {
+            startTimeTrigger(config.triggerValue)
+        }
+    }
+
     override fun onInit(status: Int) {
         Log.d(TAG, "TTS onInit status=$status (SUCCESS=${TextToSpeech.SUCCESS})")
         if (status == TextToSpeech.SUCCESS) {
@@ -275,44 +326,79 @@ class LocationForegroundService : Service(), TextToSpeech.OnInitListener {
             val locale = Locale.forLanguageTag(config.language)
             val langResult = tts?.setLanguage(locale)
             Log.d(TAG, "TTS setLanguage($locale) result=$langResult")
-
-            // Set specific voice if requested
-            if (!config.voiceId.isNullOrEmpty()) {
-                tts?.voices?.find { it.name == config.voiceId }?.let { voice ->
-                    tts?.voice = voice
-                }
-            }
-
             ttsReady = true
-            Log.d(TAG, "TTS ready! triggerType=${config.triggerType}, triggerValue=${config.triggerValue}")
-
-            // Announce first interval step if intervals are configured
-            if (intervalSteps.isNotEmpty() && !intervalsComplete) {
-                val first = intervalSteps[0]
-                val durationText = if (first.durationType == "distance") {
-                    "${first.durationValue.toInt()} meters"
-                } else {
-                    val secs = first.durationValue.toInt()
-                    if (secs >= 60) {
-                        val m = secs / 60
-                        val s = secs % 60
-                        if (s > 0) "$m minutes $s seconds" else "$m minutes"
-                    } else {
-                        "$secs seconds"
-                    }
-                }
-                announceIntervalTransition("${first.label}. $durationText")
-            }
-
-            // Set up time-based trigger if configured
-            if (config.triggerType == "time") {
-                startTimeTrigger(config.triggerValue)
-            }
+            onTtsReady()
         } else {
             Log.e(TAG, "TTS init FAILED with status=$status")
         }
     }
 
+    private fun requestAudioFocus() {
+        val config = ttsConfig ?: return
+        if (!config.audioDuck) return
+        if (hasAudioFocus) return
+
+        audioManager = audioManager ?: getSystemService(Context.AUDIO_SERVICE) as AudioManager
+        if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.O) {
+            val focusReq = AudioFocusRequest.Builder(AudioManager.AUDIOFOCUS_GAIN_TRANSIENT_MAY_DUCK)
+                .setAudioAttributes(
+                    AudioAttributes.Builder()
+                        .setUsage(AudioAttributes.USAGE_ASSISTANCE_NAVIGATION_GUIDANCE)
+                        .setContentType(AudioAttributes.CONTENT_TYPE_SPEECH)
+                        .build()
+                )
+                .setOnAudioFocusChangeListener { }
+                .build()
+            audioFocusRequest = focusReq
+            val result = audioManager?.requestAudioFocus(focusReq)
+            hasAudioFocus = result == AudioManager.AUDIOFOCUS_REQUEST_GRANTED
+            Log.d(TAG, "Audio focus request (duck): granted=$hasAudioFocus")
+        } else {
+            @Suppress("DEPRECATION")
+            val result = audioManager?.requestAudioFocus(
+                { },
+                AudioManager.STREAM_MUSIC,
+                AudioManager.AUDIOFOCUS_GAIN_TRANSIENT_MAY_DUCK
+            )
+            hasAudioFocus = result == AudioManager.AUDIOFOCUS_REQUEST_GRANTED
+        }
+    }
+
+    private fun abandonAudioFocus() {
+        if (!hasAudioFocus) return
+        if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.O) {
+            audioFocusRequest?.let { audioManager?.abandonAudioFocusRequest(it) }
+        } else {
+            @Suppress("DEPRECATION")
+            audioManager?.abandonAudioFocus { }
+        }
+        hasAudioFocus = false
+    }
+
+    /** Speak text with configured volume; requests/abandons audio focus for ducking. */
+    private fun speakWithConfig(text: String, utteranceId: String, flush: Boolean = true) {
+        if (!ttsReady) return
+        val config = ttsConfig ?: return
+        val queueMode = if (flush) TextToSpeech.QUEUE_FLUSH else TextToSpeech.QUEUE_ADD
+
+        requestAudioFocus()
+
+        val params = Bundle().apply {
+            putFloat(TextToSpeech.Engine.KEY_PARAM_VOLUME, config.ttsVolume)
+        }
+
+        // Set up listener to abandon audio focus after utterance completes
+        tts?.setOnUtteranceProgressListener(object : UtteranceProgressListener() {
+            override fun onStart(id: String?) {}
+            override fun onDone(id: String?) { abandonAudioFocus() }
+            @Deprecated("Deprecated in Java")
+            override fun onError(id: String?) { abandonAudioFocus() }
+        })
+
+        val result = tts?.speak(text, queueMode, params, utteranceId)
+        Log.d(TAG, "speakWithConfig($utteranceId) result=$result vol=${config.ttsVolume} duck=${config.audioDuck}")
+    }
+
     private fun startTimeTrigger(intervalMinutes: Double) {
         val intervalMs = (intervalMinutes * 60 * 1000).toLong()
         Log.d(TAG, "Starting time trigger: every ${intervalMs}ms (${intervalMinutes} min)")
@@ -428,7 +514,7 @@ class LocationForegroundService : Service(), TextToSpeech.OnInitListener {
     private fun announceIntervalTransition(text: String) {
         if (!ttsReady) return
         Log.d(TAG, "Interval announcement: $text")
-        tts?.speak(text, TextToSpeech.QUEUE_FLUSH, null, "interval_announcement")
+        speakWithConfig(text, "interval_announcement")
     }
 
     private fun announceMetrics() {
@@ -493,8 +579,7 @@ class LocationForegroundService : Service(), TextToSpeech.OnInitListener {
         if (parts.isNotEmpty()) {
             val text = parts.joinToString(". ")
             Log.d(TAG, "Announcing: $text")
-            val result = tts?.speak(text, TextToSpeech.QUEUE_FLUSH, null, "workout_announcement")
-            Log.d(TAG, "TTS speak() result=$result (SUCCESS=${TextToSpeech.SUCCESS})")
+            speakWithConfig(text, "workout_announcement")
         } else {
             Log.d(TAG, "announceMetrics: no parts to announce")
         }
@@ -623,22 +708,105 @@ class LocationForegroundService : Service(), TextToSpeech.OnInitListener {
         )
     }
 
+    /**
+     * Build the finish summary text from current stats.
+     * Must be called while service state is still valid (before clearing fields).
+     */
+    private fun buildFinishSummaryText(): String? {
+        val config = ttsConfig ?: return null
+        if (!config.enabled) return null
+
+        val activeSecs = activeElapsedSecs()
+        val h = activeSecs / 3600
+        val m = (activeSecs % 3600) / 60
+        val s = activeSecs % 60
+
+        val parts = mutableListOf<String>()
+        parts.add("Workout finished")
+
+        val timeStr = if (h > 0) "$h hours $m minutes" else "$m minutes $s seconds"
+        parts.add("Total time: $timeStr")
+
+        if (totalDistanceKm > 0.01) {
+            parts.add("Distance: ${"%.2f".format(totalDistanceKm)} kilometers")
+        }
+
+        if (totalDistanceKm > 0.01) {
+            val avgPace = (activeSecs / 60.0) / totalDistanceKm
+            val mins = avgPace.toInt()
+            val secs = ((avgPace - mins) * 60).toInt()
+            parts.add("Average pace: $mins minutes $secs seconds per kilometer")
+        }
+
+        return parts.joinToString(". ")
+    }
+
     override fun onDestroy() {
+        // Snapshot summary text while stats are still valid
+        val summaryText = buildFinishSummaryText()
+        val config = ttsConfig
+
+        // Stop time-based TTS triggers
+        ttsTimeRunnable?.let { ttsTimeHandler?.removeCallbacks(it) }
+        ttsTimeHandler = null
+        ttsTimeRunnable = null
+
+        // Hand off the existing TTS instance for the finish summary.
+        // We do NOT call tts?.stop() or tts?.shutdown() here — the utterance
+        // listener will clean up after the summary finishes speaking.
+        val finishTts = tts
+        tts = null
+        ttsReady = false
+
         tracking = false
         paused = false
         instance = null
         locationListener?.let { locationManager?.removeUpdates(it) }
         locationListener = null
         locationManager = null
+        abandonAudioFocus()
 
-        // Clean up TTS
-        ttsTimeRunnable?.let { ttsTimeHandler?.removeCallbacks(it) }
-        ttsTimeHandler = null
-        ttsTimeRunnable = null
-        tts?.stop()
-        tts?.shutdown()
-        tts = null
-        ttsReady = false
+        // Speak finish summary using the handed-off TTS instance (already initialized)
+        if (summaryText != null && finishTts != null && config != null) {
+            Log.d(TAG, "Finish summary: $summaryText")
+
+            // Audio focus for ducking
+            val am = getSystemService(Context.AUDIO_SERVICE) as AudioManager
+            var focusReq: AudioFocusRequest? = null
+            if (config.audioDuck && Build.VERSION.SDK_INT >= Build.VERSION_CODES.O) {
+                focusReq = AudioFocusRequest.Builder(AudioManager.AUDIOFOCUS_GAIN_TRANSIENT_MAY_DUCK)
+                    .setAudioAttributes(
+                        AudioAttributes.Builder()
+                            .setUsage(AudioAttributes.USAGE_ASSISTANCE_NAVIGATION_GUIDANCE)
+                            .setContentType(AudioAttributes.CONTENT_TYPE_SPEECH)
+                            .build()
+                    )
+                    .setOnAudioFocusChangeListener { }
+                    .build()
+                am.requestAudioFocus(focusReq)
+            }
+
+            finishTts.setOnUtteranceProgressListener(object : UtteranceProgressListener() {
+                override fun onStart(id: String?) {}
+                override fun onDone(id: String?) { cleanup() }
+                @Deprecated("Deprecated in Java")
+                override fun onError(id: String?) { cleanup() }
+
+                private fun cleanup() {
+                    if (focusReq != null && Build.VERSION.SDK_INT >= Build.VERSION_CODES.O) {
+                        am.abandonAudioFocusRequest(focusReq)
+                    }
+                    finishTts.shutdown()
+                }
+            })
+
+            val params = Bundle().apply {
+                putFloat(TextToSpeech.Engine.KEY_PARAM_VOLUME, config.ttsVolume)
+            }
+            finishTts.speak(summaryText, TextToSpeech.QUEUE_FLUSH, params, "workout_finished")
+        } else {
+            finishTts?.shutdown()
+        }
 
         super.onDestroy()
     }
diff --git a/src/lib/js/gps.svelte.ts b/src/lib/js/gps.svelte.ts
index 92c6fbf..b7761c1 100644
--- a/src/lib/js/gps.svelte.ts
+++ b/src/lib/js/gps.svelte.ts
@@ -26,6 +26,8 @@ export interface VoiceGuidanceConfig {
 	metrics: string[];
 	language: string;
 	voiceId?: string;
+	ttsVolume?: number; // 0.0–1.0, relative TTS volume
+	audioDuck?: boolean; // duck other audio during TTS
 	intervals?: IntervalStep[];
 }
 
diff --git a/src/routes/fitness/[workout=fitnessWorkout]/[active=fitnessActive]/+page.svelte b/src/routes/fitness/[workout=fitnessWorkout]/[active=fitnessActive]/+page.svelte
index b9a8a43..252f763 100644
--- a/src/routes/fitness/[workout=fitnessWorkout]/[active=fitnessActive]/+page.svelte
+++ b/src/routes/fitness/[workout=fitnessWorkout]/[active=fitnessActive]/+page.svelte
@@ -47,6 +47,8 @@
 	let vgTriggerType = $state('distance');
 	let vgTriggerValue = $state(1);
 	let vgMetrics = $state(['totalTime', 'totalDistance', 'avgPace']);
+	let vgVolume = $state(0.8);
+	let vgAudioDuck = $state(false);
 	const vgLanguage = $derived(lang);
 	let vgShowPanel = $state(false);
 	let vgLoaded = $state(false);
@@ -58,6 +60,8 @@
 			triggerType: vgTriggerType,
 			triggerValue: vgTriggerValue,
 			metrics: vgMetrics,
+			volume: vgVolume,
+			audioDuck: vgAudioDuck,
 		};
 		if (!vgLoaded) return;
 		localStorage.setItem('vg_settings', JSON.stringify(settings));
@@ -196,6 +200,8 @@
 			triggerValue: vgTriggerValue,
 			metrics: vgEnabled ? vgMetrics : [],
 			language: vgLanguage,
+			ttsVolume: vgVolume,
+			audioDuck: vgAudioDuck,
 			...(hasIntervals ? { intervals: selectedInterval.steps } : {})
 		};
 	}
@@ -367,6 +373,8 @@
 				if (s.triggerType === 'distance' || s.triggerType === 'time') vgTriggerType = s.triggerType;
 				if (typeof s.triggerValue === 'number' && s.triggerValue > 0) vgTriggerValue = s.triggerValue;
 				if (Array.isArray(s.metrics)) vgMetrics = s.metrics;
+				if (typeof s.volume === 'number' && s.volume >= 0 && s.volume <= 1) vgVolume = s.volume;
+				if (typeof s.audioDuck === 'boolean') vgAudioDuck = s.audioDuck;
 			}
 		} catch {}
 		vgLoaded = true;
@@ -1150,6 +1158,23 @@
 								</div>
 							</div>
 
+							<div class="vg-group">
+								<label class="vg-label">
+									TTS Volume
+									<span class="vg-volume-value">{Math.round(vgVolume * 100)}%</span>
+								</label>
+								<input
+									class="vg-range"
+									type="range"
+									min="0"
+									max="1"
+									step="0.05"
+									bind:value={vgVolume}
+								/>
+							</div>
+
+							<Toggle bind:checked={vgAudioDuck} label="Duck other audio during TTS" />
+
 						{/if}
 						{/if}
 					</div>
@@ -1375,6 +1400,24 @@
 										{/each}
 									</div>
 								</div>
+
+								<div class="vg-group">
+									<label class="vg-label">
+										TTS Volume
+										<span class="vg-volume-value">{Math.round(vgVolume * 100)}%</span>
+									</label>
+									<input
+										class="vg-range"
+										type="range"
+										min="0"
+										max="1"
+										step="0.05"
+										bind:value={vgVolume}
+									/>
+								</div>
+
+								<Toggle bind:checked={vgAudioDuck} label="Duck other audio during TTS" />
+
 							{/if}
 							{/if}
 						</div>
@@ -2013,6 +2056,16 @@
 		text-transform: uppercase;
 		letter-spacing: 0.05em;
 		color: var(--color-text-secondary);
+		display: flex;
+		justify-content: space-between;
+		align-items: center;
+	}
+	.vg-volume-value {
+		font-variant-numeric: tabular-nums;
+	}
+	.vg-range {
+		width: 100%;
+		accent-color: var(--nord10);
 	}
 	.vg-trigger-row {
 		display: flex;