fitness: TTS volume control, audio ducking, and workout start/finish announcements
All checks were successful
CI / update (push) Successful in 2m37s

Add TTS volume slider (default 80%) and audio duck toggle to voice
guidance settings. Announce "Workout started" when TTS initializes and
speak a full workout summary (time, distance, avg pace) on finish.
The finish summary reuses the existing TTS instance via handoff so it
plays fully without blocking the completion screen.
This commit is contained in:
2026-03-30 08:48:46 +02:00
parent 660fec44c2
commit 5bed3f3781
4 changed files with 268 additions and 49 deletions

View File

@@ -16,8 +16,6 @@ import java.util.Locale
class AndroidBridge(private val context: Context) { class AndroidBridge(private val context: Context) {
private var ttsForVoices: TextToSpeech? = null
@JavascriptInterface @JavascriptInterface
fun startLocationService(ttsConfigJson: String, startPaused: Boolean) { fun startLocationService(ttsConfigJson: String, startPaused: Boolean) {
if (context is Activity) { if (context is Activity) {
@@ -122,8 +120,6 @@ class AndroidBridge(private val context: Context) {
/** /**
* Returns available TTS voices as a JSON array. * Returns available TTS voices as a JSON array.
* Each entry: { "id": "...", "name": "...", "language": "en-US" } * Each entry: { "id": "...", "name": "...", "language": "en-US" }
* This initializes a temporary TTS engine; the result is returned asynchronously
* via a callback, but since @JavascriptInterface is synchronous we block briefly.
*/ */
@JavascriptInterface @JavascriptInterface
fun getAvailableTtsVoices(): String { fun getAvailableTtsVoices(): String {

View File

@@ -9,11 +9,16 @@ import android.content.Context
import android.content.Intent import android.content.Intent
import android.location.LocationListener import android.location.LocationListener
import android.location.LocationManager import android.location.LocationManager
import android.media.AudioAttributes
import android.media.AudioFocusRequest
import android.media.AudioManager
import android.os.Build import android.os.Build
import android.os.Bundle
import android.os.Handler import android.os.Handler
import android.os.IBinder import android.os.IBinder
import android.os.Looper import android.os.Looper
import android.speech.tts.TextToSpeech import android.speech.tts.TextToSpeech
import android.speech.tts.UtteranceProgressListener
import android.util.Log import android.util.Log
import org.json.JSONArray import org.json.JSONArray
import org.json.JSONObject import org.json.JSONObject
@@ -55,6 +60,11 @@ class LocationForegroundService : Service(), TextToSpeech.OnInitListener {
private var intervalStartTimeMs: Long = 0L private var intervalStartTimeMs: Long = 0L
private var intervalsComplete: Boolean = false private var intervalsComplete: Boolean = false
// Audio focus / ducking
private var audioManager: AudioManager? = null
private var audioFocusRequest: AudioFocusRequest? = null
private var hasAudioFocus = false
data class IntervalStep( data class IntervalStep(
val label: String, val label: String,
val durationType: String, // "distance" or "time" val durationType: String, // "distance" or "time"
@@ -68,6 +78,8 @@ class LocationForegroundService : Service(), TextToSpeech.OnInitListener {
val metrics: List<String> = listOf("totalTime", "totalDistance", "avgPace"), val metrics: List<String> = listOf("totalTime", "totalDistance", "avgPace"),
val language: String = "en", val language: String = "en",
val voiceId: String? = null, val voiceId: String? = null,
val ttsVolume: Float = 0.8f, // 0.01.0 relative TTS volume
val audioDuck: Boolean = false, // duck other audio during TTS
val intervals: List<IntervalStep> = emptyList() val intervals: List<IntervalStep> = emptyList()
) { ) {
companion object { companion object {
@@ -100,6 +112,8 @@ class LocationForegroundService : Service(), TextToSpeech.OnInitListener {
metrics = metrics, metrics = metrics,
language = obj.optString("language", "en"), language = obj.optString("language", "en"),
voiceId = obj.optString("voiceId", null), voiceId = obj.optString("voiceId", null),
ttsVolume = obj.optDouble("ttsVolume", 0.8).toFloat().coerceIn(0f, 1f),
audioDuck = obj.optBoolean("audioDuck", false),
intervals = intervals intervals = intervals
) )
} catch (_: Exception) { } catch (_: Exception) {
@@ -246,13 +260,11 @@ class LocationForegroundService : Service(), TextToSpeech.OnInitListener {
splitDistanceAtLastAnnouncement = 0.0 splitDistanceAtLastAnnouncement = 0.0
splitTimeAtLastAnnouncement = startTimeMs splitTimeAtLastAnnouncement = startTimeMs
// Log available TTS engines
val dummyTts = TextToSpeech(applicationContext, null) val dummyTts = TextToSpeech(applicationContext, null)
val engines = dummyTts.engines val engines = dummyTts.engines
Log.d(TAG, "Available TTS engines: ${engines.map { "${it.label} (${it.name})" }}") Log.d(TAG, "Available TTS engines: ${engines.map { "${it.label} (${it.name})" }}")
dummyTts.shutdown() dummyTts.shutdown()
// Try with explicit engine if available
if (engines.isNotEmpty()) { if (engines.isNotEmpty()) {
val engineName = engines[0].name val engineName = engines[0].name
Log.d(TAG, "Trying TTS with explicit engine: $engineName") Log.d(TAG, "Trying TTS with explicit engine: $engineName")
@@ -268,6 +280,45 @@ class LocationForegroundService : Service(), TextToSpeech.OnInitListener {
// --- TTS --- // --- TTS ---
/** Called when TTS is ready — either immediately (pre-warmed) or from onInit (cold start). */
private fun onTtsReady() {
val config = ttsConfig ?: return
Log.d(TAG, "TTS ready! triggerType=${config.triggerType}, triggerValue=${config.triggerValue}")
// Set specific voice if requested
if (!config.voiceId.isNullOrEmpty()) {
tts?.voices?.find { it.name == config.voiceId }?.let { voice ->
tts?.voice = voice
}
}
// Announce workout started
speakWithConfig("Workout started", "workout_started")
// Announce first interval step if intervals are configured (queue after "Workout started")
if (intervalSteps.isNotEmpty() && !intervalsComplete) {
val first = intervalSteps[0]
val durationText = if (first.durationType == "distance") {
"${first.durationValue.toInt()} meters"
} else {
val secs = first.durationValue.toInt()
if (secs >= 60) {
val m = secs / 60
val s = secs % 60
if (s > 0) "$m minutes $s seconds" else "$m minutes"
} else {
"$secs seconds"
}
}
speakWithConfig("${first.label}. $durationText", "interval_announcement", flush = false)
}
// Set up time-based trigger if configured
if (config.triggerType == "time") {
startTimeTrigger(config.triggerValue)
}
}
override fun onInit(status: Int) { override fun onInit(status: Int) {
Log.d(TAG, "TTS onInit status=$status (SUCCESS=${TextToSpeech.SUCCESS})") Log.d(TAG, "TTS onInit status=$status (SUCCESS=${TextToSpeech.SUCCESS})")
if (status == TextToSpeech.SUCCESS) { if (status == TextToSpeech.SUCCESS) {
@@ -275,44 +326,79 @@ class LocationForegroundService : Service(), TextToSpeech.OnInitListener {
val locale = Locale.forLanguageTag(config.language) val locale = Locale.forLanguageTag(config.language)
val langResult = tts?.setLanguage(locale) val langResult = tts?.setLanguage(locale)
Log.d(TAG, "TTS setLanguage($locale) result=$langResult") Log.d(TAG, "TTS setLanguage($locale) result=$langResult")
// Set specific voice if requested
if (!config.voiceId.isNullOrEmpty()) {
tts?.voices?.find { it.name == config.voiceId }?.let { voice ->
tts?.voice = voice
}
}
ttsReady = true ttsReady = true
Log.d(TAG, "TTS ready! triggerType=${config.triggerType}, triggerValue=${config.triggerValue}") onTtsReady()
// Announce first interval step if intervals are configured
if (intervalSteps.isNotEmpty() && !intervalsComplete) {
val first = intervalSteps[0]
val durationText = if (first.durationType == "distance") {
"${first.durationValue.toInt()} meters"
} else {
val secs = first.durationValue.toInt()
if (secs >= 60) {
val m = secs / 60
val s = secs % 60
if (s > 0) "$m minutes $s seconds" else "$m minutes"
} else {
"$secs seconds"
}
}
announceIntervalTransition("${first.label}. $durationText")
}
// Set up time-based trigger if configured
if (config.triggerType == "time") {
startTimeTrigger(config.triggerValue)
}
} else { } else {
Log.e(TAG, "TTS init FAILED with status=$status") Log.e(TAG, "TTS init FAILED with status=$status")
} }
} }
private fun requestAudioFocus() {
val config = ttsConfig ?: return
if (!config.audioDuck) return
if (hasAudioFocus) return
audioManager = audioManager ?: getSystemService(Context.AUDIO_SERVICE) as AudioManager
if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.O) {
val focusReq = AudioFocusRequest.Builder(AudioManager.AUDIOFOCUS_GAIN_TRANSIENT_MAY_DUCK)
.setAudioAttributes(
AudioAttributes.Builder()
.setUsage(AudioAttributes.USAGE_ASSISTANCE_NAVIGATION_GUIDANCE)
.setContentType(AudioAttributes.CONTENT_TYPE_SPEECH)
.build()
)
.setOnAudioFocusChangeListener { }
.build()
audioFocusRequest = focusReq
val result = audioManager?.requestAudioFocus(focusReq)
hasAudioFocus = result == AudioManager.AUDIOFOCUS_REQUEST_GRANTED
Log.d(TAG, "Audio focus request (duck): granted=$hasAudioFocus")
} else {
@Suppress("DEPRECATION")
val result = audioManager?.requestAudioFocus(
{ },
AudioManager.STREAM_MUSIC,
AudioManager.AUDIOFOCUS_GAIN_TRANSIENT_MAY_DUCK
)
hasAudioFocus = result == AudioManager.AUDIOFOCUS_REQUEST_GRANTED
}
}
private fun abandonAudioFocus() {
if (!hasAudioFocus) return
if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.O) {
audioFocusRequest?.let { audioManager?.abandonAudioFocusRequest(it) }
} else {
@Suppress("DEPRECATION")
audioManager?.abandonAudioFocus { }
}
hasAudioFocus = false
}
/** Speak text with configured volume; requests/abandons audio focus for ducking. */
private fun speakWithConfig(text: String, utteranceId: String, flush: Boolean = true) {
if (!ttsReady) return
val config = ttsConfig ?: return
val queueMode = if (flush) TextToSpeech.QUEUE_FLUSH else TextToSpeech.QUEUE_ADD
requestAudioFocus()
val params = Bundle().apply {
putFloat(TextToSpeech.Engine.KEY_PARAM_VOLUME, config.ttsVolume)
}
// Set up listener to abandon audio focus after utterance completes
tts?.setOnUtteranceProgressListener(object : UtteranceProgressListener() {
override fun onStart(id: String?) {}
override fun onDone(id: String?) { abandonAudioFocus() }
@Deprecated("Deprecated in Java")
override fun onError(id: String?) { abandonAudioFocus() }
})
val result = tts?.speak(text, queueMode, params, utteranceId)
Log.d(TAG, "speakWithConfig($utteranceId) result=$result vol=${config.ttsVolume} duck=${config.audioDuck}")
}
private fun startTimeTrigger(intervalMinutes: Double) { private fun startTimeTrigger(intervalMinutes: Double) {
val intervalMs = (intervalMinutes * 60 * 1000).toLong() val intervalMs = (intervalMinutes * 60 * 1000).toLong()
Log.d(TAG, "Starting time trigger: every ${intervalMs}ms (${intervalMinutes} min)") Log.d(TAG, "Starting time trigger: every ${intervalMs}ms (${intervalMinutes} min)")
@@ -428,7 +514,7 @@ class LocationForegroundService : Service(), TextToSpeech.OnInitListener {
private fun announceIntervalTransition(text: String) { private fun announceIntervalTransition(text: String) {
if (!ttsReady) return if (!ttsReady) return
Log.d(TAG, "Interval announcement: $text") Log.d(TAG, "Interval announcement: $text")
tts?.speak(text, TextToSpeech.QUEUE_FLUSH, null, "interval_announcement") speakWithConfig(text, "interval_announcement")
} }
private fun announceMetrics() { private fun announceMetrics() {
@@ -493,8 +579,7 @@ class LocationForegroundService : Service(), TextToSpeech.OnInitListener {
if (parts.isNotEmpty()) { if (parts.isNotEmpty()) {
val text = parts.joinToString(". ") val text = parts.joinToString(". ")
Log.d(TAG, "Announcing: $text") Log.d(TAG, "Announcing: $text")
val result = tts?.speak(text, TextToSpeech.QUEUE_FLUSH, null, "workout_announcement") speakWithConfig(text, "workout_announcement")
Log.d(TAG, "TTS speak() result=$result (SUCCESS=${TextToSpeech.SUCCESS})")
} else { } else {
Log.d(TAG, "announceMetrics: no parts to announce") Log.d(TAG, "announceMetrics: no parts to announce")
} }
@@ -623,22 +708,105 @@ class LocationForegroundService : Service(), TextToSpeech.OnInitListener {
) )
} }
/**
* Build the finish summary text from current stats.
* Must be called while service state is still valid (before clearing fields).
*/
private fun buildFinishSummaryText(): String? {
val config = ttsConfig ?: return null
if (!config.enabled) return null
val activeSecs = activeElapsedSecs()
val h = activeSecs / 3600
val m = (activeSecs % 3600) / 60
val s = activeSecs % 60
val parts = mutableListOf<String>()
parts.add("Workout finished")
val timeStr = if (h > 0) "$h hours $m minutes" else "$m minutes $s seconds"
parts.add("Total time: $timeStr")
if (totalDistanceKm > 0.01) {
parts.add("Distance: ${"%.2f".format(totalDistanceKm)} kilometers")
}
if (totalDistanceKm > 0.01) {
val avgPace = (activeSecs / 60.0) / totalDistanceKm
val mins = avgPace.toInt()
val secs = ((avgPace - mins) * 60).toInt()
parts.add("Average pace: $mins minutes $secs seconds per kilometer")
}
return parts.joinToString(". ")
}
override fun onDestroy() { override fun onDestroy() {
// Snapshot summary text while stats are still valid
val summaryText = buildFinishSummaryText()
val config = ttsConfig
// Stop time-based TTS triggers
ttsTimeRunnable?.let { ttsTimeHandler?.removeCallbacks(it) }
ttsTimeHandler = null
ttsTimeRunnable = null
// Hand off the existing TTS instance for the finish summary.
// We do NOT call tts?.stop() or tts?.shutdown() here — the utterance
// listener will clean up after the summary finishes speaking.
val finishTts = tts
tts = null
ttsReady = false
tracking = false tracking = false
paused = false paused = false
instance = null instance = null
locationListener?.let { locationManager?.removeUpdates(it) } locationListener?.let { locationManager?.removeUpdates(it) }
locationListener = null locationListener = null
locationManager = null locationManager = null
abandonAudioFocus()
// Clean up TTS // Speak finish summary using the handed-off TTS instance (already initialized)
ttsTimeRunnable?.let { ttsTimeHandler?.removeCallbacks(it) } if (summaryText != null && finishTts != null && config != null) {
ttsTimeHandler = null Log.d(TAG, "Finish summary: $summaryText")
ttsTimeRunnable = null
tts?.stop() // Audio focus for ducking
tts?.shutdown() val am = getSystemService(Context.AUDIO_SERVICE) as AudioManager
tts = null var focusReq: AudioFocusRequest? = null
ttsReady = false if (config.audioDuck && Build.VERSION.SDK_INT >= Build.VERSION_CODES.O) {
focusReq = AudioFocusRequest.Builder(AudioManager.AUDIOFOCUS_GAIN_TRANSIENT_MAY_DUCK)
.setAudioAttributes(
AudioAttributes.Builder()
.setUsage(AudioAttributes.USAGE_ASSISTANCE_NAVIGATION_GUIDANCE)
.setContentType(AudioAttributes.CONTENT_TYPE_SPEECH)
.build()
)
.setOnAudioFocusChangeListener { }
.build()
am.requestAudioFocus(focusReq)
}
finishTts.setOnUtteranceProgressListener(object : UtteranceProgressListener() {
override fun onStart(id: String?) {}
override fun onDone(id: String?) { cleanup() }
@Deprecated("Deprecated in Java")
override fun onError(id: String?) { cleanup() }
private fun cleanup() {
if (focusReq != null && Build.VERSION.SDK_INT >= Build.VERSION_CODES.O) {
am.abandonAudioFocusRequest(focusReq)
}
finishTts.shutdown()
}
})
val params = Bundle().apply {
putFloat(TextToSpeech.Engine.KEY_PARAM_VOLUME, config.ttsVolume)
}
finishTts.speak(summaryText, TextToSpeech.QUEUE_FLUSH, params, "workout_finished")
} else {
finishTts?.shutdown()
}
super.onDestroy() super.onDestroy()
} }

View File

@@ -26,6 +26,8 @@ export interface VoiceGuidanceConfig {
metrics: string[]; metrics: string[];
language: string; language: string;
voiceId?: string; voiceId?: string;
ttsVolume?: number; // 0.01.0, relative TTS volume
audioDuck?: boolean; // duck other audio during TTS
intervals?: IntervalStep[]; intervals?: IntervalStep[];
} }

View File

@@ -47,6 +47,8 @@
let vgTriggerType = $state('distance'); let vgTriggerType = $state('distance');
let vgTriggerValue = $state(1); let vgTriggerValue = $state(1);
let vgMetrics = $state(['totalTime', 'totalDistance', 'avgPace']); let vgMetrics = $state(['totalTime', 'totalDistance', 'avgPace']);
let vgVolume = $state(0.8);
let vgAudioDuck = $state(false);
const vgLanguage = $derived(lang); const vgLanguage = $derived(lang);
let vgShowPanel = $state(false); let vgShowPanel = $state(false);
let vgLoaded = $state(false); let vgLoaded = $state(false);
@@ -58,6 +60,8 @@
triggerType: vgTriggerType, triggerType: vgTriggerType,
triggerValue: vgTriggerValue, triggerValue: vgTriggerValue,
metrics: vgMetrics, metrics: vgMetrics,
volume: vgVolume,
audioDuck: vgAudioDuck,
}; };
if (!vgLoaded) return; if (!vgLoaded) return;
localStorage.setItem('vg_settings', JSON.stringify(settings)); localStorage.setItem('vg_settings', JSON.stringify(settings));
@@ -196,6 +200,8 @@
triggerValue: vgTriggerValue, triggerValue: vgTriggerValue,
metrics: vgEnabled ? vgMetrics : [], metrics: vgEnabled ? vgMetrics : [],
language: vgLanguage, language: vgLanguage,
ttsVolume: vgVolume,
audioDuck: vgAudioDuck,
...(hasIntervals ? { intervals: selectedInterval.steps } : {}) ...(hasIntervals ? { intervals: selectedInterval.steps } : {})
}; };
} }
@@ -367,6 +373,8 @@
if (s.triggerType === 'distance' || s.triggerType === 'time') vgTriggerType = s.triggerType; if (s.triggerType === 'distance' || s.triggerType === 'time') vgTriggerType = s.triggerType;
if (typeof s.triggerValue === 'number' && s.triggerValue > 0) vgTriggerValue = s.triggerValue; if (typeof s.triggerValue === 'number' && s.triggerValue > 0) vgTriggerValue = s.triggerValue;
if (Array.isArray(s.metrics)) vgMetrics = s.metrics; if (Array.isArray(s.metrics)) vgMetrics = s.metrics;
if (typeof s.volume === 'number' && s.volume >= 0 && s.volume <= 1) vgVolume = s.volume;
if (typeof s.audioDuck === 'boolean') vgAudioDuck = s.audioDuck;
} }
} catch {} } catch {}
vgLoaded = true; vgLoaded = true;
@@ -1150,6 +1158,23 @@
</div> </div>
</div> </div>
<div class="vg-group">
<label class="vg-label">
TTS Volume
<span class="vg-volume-value">{Math.round(vgVolume * 100)}%</span>
</label>
<input
class="vg-range"
type="range"
min="0"
max="1"
step="0.05"
bind:value={vgVolume}
/>
</div>
<Toggle bind:checked={vgAudioDuck} label="Duck other audio during TTS" />
{/if} {/if}
{/if} {/if}
</div> </div>
@@ -1375,6 +1400,24 @@
{/each} {/each}
</div> </div>
</div> </div>
<div class="vg-group">
<label class="vg-label">
TTS Volume
<span class="vg-volume-value">{Math.round(vgVolume * 100)}%</span>
</label>
<input
class="vg-range"
type="range"
min="0"
max="1"
step="0.05"
bind:value={vgVolume}
/>
</div>
<Toggle bind:checked={vgAudioDuck} label="Duck other audio during TTS" />
{/if} {/if}
{/if} {/if}
</div> </div>
@@ -2013,6 +2056,16 @@
text-transform: uppercase; text-transform: uppercase;
letter-spacing: 0.05em; letter-spacing: 0.05em;
color: var(--color-text-secondary); color: var(--color-text-secondary);
display: flex;
justify-content: space-between;
align-items: center;
}
.vg-volume-value {
font-variant-numeric: tabular-nums;
}
.vg-range {
width: 100%;
accent-color: var(--nord10);
} }
.vg-trigger-row { .vg-trigger-row {
display: flex; display: flex;