feat: add AI-powered alt text generation for recipe images

- Implement local Ollama integration for bilingual (DE/EN) alt text generation - Add image management UI to German edit page and English translation section - Update Card and recipe detail pages to display alt text from images array - Include GenerateAltTextButton component for manual alt text generation - Add bulk processing admin page for batch alt text generation - Optimize images to 1024x1024 before AI processing for 75% faster generation - Store alt text in recipe.images[].alt and translations.en.images[].alt
2026-01-05 17:28:17 +01:00
parent cc978e73b4
commit 4ffc0940ef
14 changed files with 1512 additions and 11 deletions
--- a/src/lib/components/Card.svelte
+++ b/src/lib/components/Card.svelte
@@ -32,6 +32,11 @@ const img_name = $derived(
 	recipe.images?.[0]?.mediapath ||
 	`${recipe.germanShortName || recipe.short_name}.webp`
 );
+
+// Get alt text from images array
+const img_alt = $derived(
+	recipe.images?.[0]?.alt || recipe.name
+);
 </script>
 <style>
 .card_anchor{
@@ -288,9 +293,9 @@ const img_name = $derived(
 	<div class=div_div_image >
 		<div class=div_image style="background-image:url(https://bocken.org/static/rezepte/placeholder/{img_name})">
 			<noscript>
-				<img class="image backdrop_blur" src="https://bocken.org/static/rezepte/thumb/{img_name}" loading={loading_strat} alt="{recipe.alt}"/>
+				<img class="image backdrop_blur" src="https://bocken.org/static/rezepte/thumb/{img_name}" loading={loading_strat} alt="{img_alt}"/>
 			</noscript>
-			<img class="image backdrop_blur" class:blur={!isloaded} src={'https://bocken.org/static/rezepte/thumb/' + img_name} loading={loading_strat} alt="{recipe.alt}" on:load={() => isloaded=true}/>
+			<img class="image backdrop_blur" class:blur={!isloaded} src={'https://bocken.org/static/rezepte/thumb/' + img_name} loading={loading_strat} alt="{img_alt}" on:load={() => isloaded=true}/>
 		</div>
 	</div>
 	{#if showFavoriteIndicator && isFavorite}
--- a/src/lib/components/GenerateAltTextButton.svelte
+++ b/src/lib/components/GenerateAltTextButton.svelte
@@ -0,0 +1,92 @@
+<script lang="ts">
+	let { shortName, imageIndex }: { shortName: string; imageIndex: number } = $props();
+
+	let loading = $state(false);
+	let error = $state('');
+	let success = $state('');
+
+	async function generateAltText() {
+		loading = true;
+		error = '';
+		success = '';
+
+		try {
+			const response = await fetch('/api/generate-alt-text', {
+				method: 'POST',
+				headers: { 'Content-Type': 'application/json' },
+				body: JSON.stringify({
+					shortName,
+					imageIndex,
+				}),
+			});
+
+			const data = await response.json();
+
+			if (!response.ok) {
+				throw new Error(data.message || 'Failed to generate alt text');
+			}
+
+			success = `Generated: DE: "${data.altText.de}" | EN: "${data.altText.en}"`;
+
+			// Reload page to show updated alt text
+			setTimeout(() => {
+				window.location.reload();
+			}, 2000);
+		} catch (err) {
+			error = err instanceof Error ? err.message : 'An error occurred';
+		} finally {
+			loading = false;
+		}
+	}
+</script>
+
+<style>
+	button {
+		padding: 0.5rem 1rem;
+		background-color: var(--nord8);
+		color: white;
+		border: none;
+		border-radius: 0.25rem;
+		cursor: pointer;
+		font-size: 0.9rem;
+		transition: background-color 0.2s;
+	}
+
+	button:hover {
+		background-color: var(--nord7);
+	}
+
+	button:disabled {
+		background-color: var(--nord3);
+		cursor: not-allowed;
+	}
+
+	.message {
+		margin-top: 0.5rem;
+		padding: 0.5rem;
+		border-radius: 0.25rem;
+		font-size: 0.85rem;
+	}
+
+	.success {
+		background-color: var(--nord14);
+		color: var(--nord0);
+	}
+
+	.error {
+		background-color: var(--nord11);
+		color: white;
+	}
+</style>
+
+<button onclick={generateAltText} disabled={loading}>
+	{loading ? '🤖 Generating...' : '✨ Generate Alt Text (AI)'}
+</button>
+
+{#if success}
+	<div class="message success">{success}</div>
+{/if}
+
+{#if error}
+	<div class="message error">{error}</div>
+{/if}
--- a/src/lib/components/TitleImgParallax.svelte
+++ b/src/lib/components/TitleImgParallax.svelte
@@ -1,7 +1,7 @@
 <script>
 	import { onMount } from "svelte";

-	let { src, placeholder_src } = $props();
+	let { src, placeholder_src, alt = "" } = $props();

 	let isloaded = $state(false);
 	let isredirected = $state(false);
@@ -179,12 +179,12 @@ dialog button{
    	<div class:zoom-in={isloaded && !isredirected} onclick={show_dialog_img}>
 		<div class=placeholder style="background-image:url({placeholder_src})" >
 			<div class=placeholder_blur>
-			<img class="image" class:unblur={isloaded} {src} onload={() => {isloaded=true}}  alt=""/>
+			<img class="image" class:unblur={isloaded} {src} onload={() => {isloaded=true}}  {alt}/>
 			</div>
 		</div>
 		<noscript>
 			<div class=placeholder style="background-image:url({placeholder_src})" >
-				<img class="image unblur" {src} onload={() => {isloaded=true}}  alt=""/>
+				<img class="image unblur" {src} onload={() => {isloaded=true}}  {alt}/>
 			</div>
 		</noscript>
 	</div>
@@ -194,7 +194,7 @@ dialog button{

 <dialog id=img_carousel>
 	<div>
-	<img class:unblur={isloaded} {src} alt="">
+	<img class:unblur={isloaded} {src} {alt}>
 		<button class=action_button onkeydown={(event) => do_on_key(event, 'Enter', false, close_dialog_img)} onclick={close_dialog_img}>
 	<Cross fill=white width=2rem height=2rem></Cross>
 		</button>
--- a/src/lib/components/TranslationApproval.svelte
+++ b/src/lib/components/TranslationApproval.svelte
@@ -4,6 +4,7 @@
 	import TranslationFieldComparison from './TranslationFieldComparison.svelte';
 	import CreateIngredientList from './CreateIngredientList.svelte';
 	import CreateStepList from './CreateStepList.svelte';
+	import GenerateAltTextButton from './GenerateAltTextButton.svelte';

 	export let germanData: any;
 	export let englishData: TranslatedRecipeType | null = null;
@@ -17,8 +18,21 @@
 	let errorMessage: string = '';
 	let validationErrors: string[] = [];

+	// Helper function to initialize images array for English translation
+	function initializeImagesArray(germanImages: any[]): any[] {
+		if (!germanImages || germanImages.length === 0) return [];
+		return germanImages.map(() => ({
+			alt: '',
+			caption: ''
+		}));
+	}
+
 	// Editable English data (clone of englishData or initialized from germanData)
-	let editableEnglish: any = englishData ? { ...englishData } : null;
+	let editableEnglish: any = englishData ? {
+		...englishData,
+		// Ensure images array exists and matches German images length
+		images: englishData.images || initializeImagesArray(germanData.images || [])
+	} : null;

 	// Store old recipe data for granular change detection
 	export let oldRecipeData: any = null;
@@ -67,7 +81,8 @@
 					...germanData,
 					translationStatus: 'pending',
 					ingredients: JSON.parse(JSON.stringify(germanData.ingredients || [])),
-					instructions: JSON.parse(JSON.stringify(germanData.instructions || []))
+					instructions: JSON.parse(JSON.stringify(germanData.instructions || [])),
+					images: editableEnglish?.images || initializeImagesArray(germanData.images || [])
 				};
 			}
 			checkingBaseRecipes = false;
@@ -128,7 +143,8 @@
 						return translation ? { ...inst, name: translation.enName } : inst;
 					}
 					return inst;
-				})
+				}),
+				images: initializeImagesArray(germanData.images || [])
 			};
 		} else {
 			// Existing English translation - merge German structure with English translations
@@ -182,7 +198,12 @@
 						// If no English translation exists, use German structure (will be translated later)
 						return germanInst;
 					}
-				})
+				}),
+				// Sync images array - keep existing English alt/caption or initialize empty
+				images: germanData.images?.map((germanImg: any, index: number) => {
+					const existingEnImage = editableEnglish.images?.[index];
+					return existingEnImage || { alt: '', caption: '' };
+				}) || []
 			};
 		}
 	}
@@ -758,6 +779,73 @@ button:disabled {
 				</div>
 			{/if}

+			<!-- Images Section -->
+			{#if germanData.images && germanData.images.length > 0}
+				<div class="field-section" style="background-color: var(--nord13); padding: 1rem; border-radius: 5px; margin-top: 1.5rem;">
+					<h4 style="margin-top: 0; color: var(--nord0);">🖼️ Images - English Alt Texts & Captions</h4>
+					{#each germanData.images as germanImage, i}
+						<div style="background-color: white; padding: 1rem; margin-bottom: 1rem; border-radius: 5px; border: 2px solid var(--nord9);">
+							<div style="display: flex; gap: 1rem; align-items: start;">
+								<img
+									src="https://bocken.org/static/rezepte/thumb/{germanImage.mediapath}"
+									alt={germanImage.alt || 'Recipe image'}
+									style="width: 100px; height: 100px; object-fit: cover; border-radius: 5px;"
+								/>
+								<div style="flex: 1;">
+									<p style="margin: 0 0 0.5rem 0; font-size: 0.85rem; color: var(--nord3);"><strong>Image {i + 1}:</strong> {germanImage.mediapath}</p>
+
+									<div style="display: grid; grid-template-columns: 1fr 1fr; gap: 1rem; margin-bottom: 0.75rem;">
+										<div>
+											<label style="display: block; margin-bottom: 0.25rem; font-weight: bold; font-size: 0.85rem; color: var(--nord0);">🇩🇪 German Alt-Text:</label>
+											<input
+												type="text"
+												value={germanImage.alt || ''}
+												disabled
+												style="width: 100%; padding: 0.4rem; border: 1px solid var(--nord4); border-radius: 3px; background-color: var(--nord5); color: var(--nord2); font-size: 0.85rem;"
+											/>
+										</div>
+										<div>
+											<label style="display: block; margin-bottom: 0.25rem; font-weight: bold; font-size: 0.85rem; color: var(--nord0);">🇬🇧 English Alt-Text:</label>
+											<input
+												type="text"
+												bind:value={editableEnglish.images[i].alt}
+												placeholder="English image description for screen readers"
+												style="width: 100%; padding: 0.4rem; border: 1px solid var(--nord8); border-radius: 3px; font-size: 0.85rem;"
+											/>
+										</div>
+									</div>
+
+									<div style="display: grid; grid-template-columns: 1fr 1fr; gap: 1rem;">
+										<div>
+											<label style="display: block; margin-bottom: 0.25rem; font-weight: bold; font-size: 0.85rem; color: var(--nord0);">🇩🇪 German Caption:</label>
+											<input
+												type="text"
+												value={germanImage.caption || ''}
+												disabled
+												style="width: 100%; padding: 0.4rem; border: 1px solid var(--nord4); border-radius: 3px; background-color: var(--nord5); color: var(--nord2); font-size: 0.85rem;"
+											/>
+										</div>
+										<div>
+											<label style="display: block; margin-bottom: 0.25rem; font-weight: bold; font-size: 0.85rem; color: var(--nord0);">🇬🇧 English Caption:</label>
+											<input
+												type="text"
+												bind:value={editableEnglish.images[i].caption}
+												placeholder="English caption (optional)"
+												style="width: 100%; padding: 0.4rem; border: 1px solid var(--nord8); border-radius: 3px; font-size: 0.85rem;"
+											/>
+										</div>
+									</div>
+
+									<div style="margin-top: 0.75rem;">
+										<GenerateAltTextButton shortName={germanData.short_name} imageIndex={i} />
+									</div>
+								</div>
+							</div>
+						</div>
+					{/each}
+				</div>
+			{/if}
+
 			<!-- Ingredients and Instructions in two-column layout -->
 			{#if editableEnglish?.ingredients || editableEnglish?.instructions}
 				<div class="list-wrapper">
--- a/src/lib/server/ai/alttext.ts
+++ b/src/lib/server/ai/alttext.ts
@@ -0,0 +1,176 @@
+import { generateWithOllama, imageToBase64 } from './ollama.js';
+import { resizeAndEncodeImage } from './imageUtils.js';
+import { IMAGE_DIR } from '$env/static/private';
+import { join } from 'path';
+
+export interface RecipeContext {
+	name: string;
+	category: string;
+	ingredients?: string[];
+	tags?: string[];
+}
+
+export interface AltTextResult {
+	de: string;
+	en: string;
+}
+
+/**
+ * Generate alt text for a recipe image in both German and English
+ * @param imagePath - Relative path or filename of the image (e.g., "brot.a1b2c3d4.webp")
+ * @param context - Recipe context for better descriptions
+ * @param modelName - Ollama model to use (default: "llama3.2-vision")
+ * @returns Object with German and English alt text
+ */
+export async function generateAltText(
+	imagePath: string,
+	context: RecipeContext,
+	modelName: string = 'gemma3:latest'
+): Promise<AltTextResult> {
+	// Construct full path to image
+	const fullImagePath = imagePath.startsWith('/')
+		? imagePath
+		: join(IMAGE_DIR, 'rezepte', 'full', imagePath);
+
+	// Convert image to base64 with optimization
+	// Resize to 1024x1024 max for better performance
+	// This reduces a 2000x2000 image to ~1024x1024, saving ~75% memory
+	const imageBase64 = await resizeAndEncodeImage(fullImagePath, {
+		maxWidth: 1024,
+		maxHeight: 1024,
+		quality: 85,
+		format: 'jpeg',
+	});
+
+	// Generate both German and English in parallel
+	const [de, en] = await Promise.all([
+		generateGermanAltText(imageBase64, context, modelName),
+		generateEnglishAltText(imageBase64, context, modelName),
+	]);
+
+	return { de, en };
+}
+
+/**
+ * Generate German alt text
+ */
+async function generateGermanAltText(
+	imageBase64: string,
+	context: RecipeContext,
+	modelName: string
+): Promise<string> {
+	const prompt = buildPrompt('de', context);
+
+	const response = await generateWithOllama({
+		model: modelName,
+		prompt,
+		images: [imageBase64],
+		options: {
+			temperature: 0.3, // Lower temperature for consistent descriptions
+			max_tokens: 100,
+		},
+	});
+
+	return cleanAltText(response);
+}
+
+/**
+ * Generate English alt text
+ */
+async function generateEnglishAltText(
+	imageBase64: string,
+	context: RecipeContext,
+	modelName: string
+): Promise<string> {
+	const prompt = buildPrompt('en', context);
+
+	const response = await generateWithOllama({
+		model: modelName,
+		prompt,
+		images: [imageBase64],
+		options: {
+			temperature: 0.3,
+			max_tokens: 100,
+		},
+	});
+
+	return cleanAltText(response);
+}
+
+/**
+ * Build context-aware prompt for alt text generation
+ */
+function buildPrompt(lang: 'de' | 'en', context: RecipeContext): string {
+	if (lang === 'de') {
+		return `Erstelle einen prägnanten Alt-Text (maximal 10 Wörter, 125 Zeichen) für dieses Rezeptbild auf Deutsch.
+
+Rezept: ${context.name}
+Kategorie: ${context.category}
+${context.tags ? `Stichwörter: ${context.tags.slice(0, 3).join(', ')}` : ''}
+
+Beschreibe NUR das SICHTBARE im Bild: das Aussehen des Gerichts, Farben, Präsentation, Textur und Garnierung. Sei beschreibend aber prägnant für Screenreader. Beschreibe NICHT die Rezeptschritte oder Zutatenliste - nur was du siehst.
+
+Antworte NUR mit dem Alt-Text, ohne Erklärung oder Anführungszeichen.`;
+	} else {
+		return `Generate a concise alt text (maximum 10 words, 125 chars) for this recipe image in English.
+
+Recipe: ${context.name}
+Category: ${context.category}
+${context.tags ? `Keywords: ${context.tags.slice(0, 3).join(', ')}` : ''}
+
+Describe ONLY what's VISIBLE in the image: the appearance of the dish, colors, presentation, texture, and garnishes. Be descriptive but concise for screen readers. Do NOT describe the recipe steps or ingredients list - only what you see.
+
+Respond with ONLY the alt text, no explanation or quotes.`;
+	}
+}
+
+/**
+ * Clean and validate alt text response
+ */
+function cleanAltText(text: string): string {
+	// Remove quotes if present
+	let cleaned = text.replace(/^["']|["']$/g, '');
+
+	// Remove "Alt text:" prefix if present
+	cleaned = cleaned.replace(/^(Alt[- ]?text|Alternativer Text):\s*/i, '');
+
+	// Trim whitespace
+	cleaned = cleaned.trim();
+
+	// Truncate to 125 characters if too long
+	if (cleaned.length > 125) {
+		cleaned = cleaned.substring(0, 122) + '...';
+	}
+
+	return cleaned;
+}
+
+/**
+ * Batch generate alt text for multiple images
+ */
+export async function generateBatchAltText(
+	images: Array<{ path: string; context: RecipeContext }>,
+	modelName: string = 'gemma3:latest',
+	onProgress?: (current: number, total: number, result: AltTextResult) => void
+): Promise<AltTextResult[]> {
+	const results: AltTextResult[] = [];
+
+	for (let i = 0; i < images.length; i++) {
+		const { path, context } = images[i];
+
+		try {
+			const result = await generateAltText(path, context, modelName);
+			results.push(result);
+
+			if (onProgress) {
+				onProgress(i + 1, images.length, result);
+			}
+		} catch (error) {
+			console.error(`Failed to generate alt text for ${path}:`, error);
+			// Return empty strings on error
+			results.push({ de: '', en: '' });
+		}
+	}
+
+	return results;
+}
--- a/src/lib/server/ai/imageUtils.ts
+++ b/src/lib/server/ai/imageUtils.ts
@@ -0,0 +1,82 @@
+import sharp from 'sharp';
+import { readFile } from 'fs/promises';
+
+export interface ResizeOptions {
+	maxWidth?: number;
+	maxHeight?: number;
+	quality?: number;
+	format?: 'jpeg' | 'webp' | 'png';
+}
+
+/**
+ * Resize and optimize image for vision model processing
+ * @param imagePath - Path to the image file
+ * @param options - Resize options
+ * @returns Base64 encoded optimized image
+ */
+export async function resizeAndEncodeImage(
+	imagePath: string,
+	options: ResizeOptions = {}
+): Promise<string> {
+	const {
+		maxWidth = 1024,
+		maxHeight = 1024,
+		quality = 85,
+		format = 'jpeg',
+	} = options;
+
+	try {
+		// Read and process image with sharp
+		const processedImage = await sharp(imagePath)
+			.resize(maxWidth, maxHeight, {
+				fit: 'inside', // Maintain aspect ratio
+				withoutEnlargement: true, // Don't upscale smaller images
+			})
+			.toFormat(format, { quality })
+			.toBuffer();
+
+		return processedImage.toString('base64');
+	} catch (error) {
+		console.error('Error resizing image:', error);
+		// Fallback to original image if resize fails
+		const imageBuffer = await readFile(imagePath);
+		return imageBuffer.toString('base64');
+	}
+}
+
+/**
+ * Get image dimensions without loading full image into memory
+ */
+export async function getImageDimensions(
+	imagePath: string
+): Promise<{ width: number; height: number }> {
+	const metadata = await sharp(imagePath).metadata();
+	return {
+		width: metadata.width || 0,
+		height: metadata.height || 0,
+	};
+}
+
+/**
+ * Estimate optimal resize dimensions for vision models
+ * Balance between quality and performance
+ */
+export function calculateOptimalDimensions(
+	originalWidth: number,
+	originalHeight: number,
+	targetSize: number = 1024
+): { width: number; height: number } {
+	const aspectRatio = originalWidth / originalHeight;
+
+	if (originalWidth > originalHeight) {
+		return {
+			width: Math.min(targetSize, originalWidth),
+			height: Math.min(Math.round(targetSize / aspectRatio), originalHeight),
+		};
+	} else {
+		return {
+			width: Math.min(Math.round(targetSize * aspectRatio), originalWidth),
+			height: Math.min(targetSize, originalHeight),
+		};
+	}
+}
--- a/src/lib/server/ai/ollama.ts
+++ b/src/lib/server/ai/ollama.ts
@@ -0,0 +1,104 @@
+import { OLLAMA_URL } from '$env/static/private';
+import { readFile } from 'fs/promises';
+
+/**
+ * Ollama API client for local vision model inference
+ */
+
+export interface OllamaGenerateRequest {
+	model: string;
+	prompt: string;
+	images?: string[]; // base64 encoded images
+	stream?: boolean;
+	options?: {
+		temperature?: number;
+		top_p?: number;
+		max_tokens?: number;
+	};
+}
+
+export interface OllamaGenerateResponse {
+	model: string;
+	created_at: string;
+	response: string;
+	done: boolean;
+}
+
+/**
+ * Generate text response from Ollama model with optional image input
+ */
+export async function generateWithOllama(
+	request: OllamaGenerateRequest
+): Promise<string> {
+	const ollamaUrl = OLLAMA_URL || 'http://localhost:11434';
+
+	try {
+		const response = await fetch(`${ollamaUrl}/api/generate`, {
+			method: 'POST',
+			headers: {
+				'Content-Type': 'application/json',
+			},
+			body: JSON.stringify({
+				...request,
+				stream: false, // Always use non-streaming for simpler handling
+			}),
+		});
+
+		if (!response.ok) {
+			throw new Error(`Ollama API error: ${response.status} ${response.statusText}`);
+		}
+
+		const data = (await response.json()) as OllamaGenerateResponse;
+		return data.response.trim();
+	} catch (error) {
+		console.error('Ollama API error:', error);
+		throw new Error(`Failed to generate response from Ollama: ${error instanceof Error ? error.message : 'Unknown error'}`);
+	}
+}
+
+/**
+ * Convert image file path to base64 string
+ */
+export async function imageToBase64(imagePath: string): Promise<string> {
+	const imageBuffer = await readFile(imagePath);
+	return imageBuffer.toString('base64');
+}
+
+/**
+ * Check if Ollama server is available
+ */
+export async function checkOllamaHealth(): Promise<boolean> {
+	const ollamaUrl = OLLAMA_URL || 'http://localhost:11434';
+
+	try {
+		const response = await fetch(`${ollamaUrl}/api/tags`, {
+			method: 'GET',
+		});
+		return response.ok;
+	} catch {
+		return false;
+	}
+}
+
+/**
+ * List available models on Ollama server
+ */
+export async function listOllamaModels(): Promise<string[]> {
+	const ollamaUrl = OLLAMA_URL || 'http://localhost:11434';
+
+	try {
+		const response = await fetch(`${ollamaUrl}/api/tags`, {
+			method: 'GET',
+		});
+
+		if (!response.ok) {
+			throw new Error('Failed to fetch models');
+		}
+
+		const data = await response.json();
+		return data.models?.map((m: any) => m.name) || [];
+	} catch (error) {
+		console.error('Failed to list Ollama models:', error);
+		return [];
+	}
+}