feat: add AI-powered alt text generation for recipe images

- Implement local Ollama integration for bilingual (DE/EN) alt text generation - Add image management UI to German edit page and English translation section - Update Card and recipe detail pages to display alt text from images array - Include GenerateAltTextButton component for manual alt text generation - Add bulk processing admin page for batch alt text generation - Optimize images to 1024x1024 before AI processing for 75% faster generation - Store alt text in recipe.images[].alt and translations.en.images[].alt
2026-01-05 17:28:17 +01:00
parent cc978e73b4
commit 4ffc0940ef
14 changed files with 1512 additions and 11 deletions
--- a/src/lib/server/ai/alttext.ts
+++ b/src/lib/server/ai/alttext.ts
@@ -0,0 +1,176 @@
+import { generateWithOllama, imageToBase64 } from './ollama.js';
+import { resizeAndEncodeImage } from './imageUtils.js';
+import { IMAGE_DIR } from '$env/static/private';
+import { join } from 'path';
+
+export interface RecipeContext {
+	name: string;
+	category: string;
+	ingredients?: string[];
+	tags?: string[];
+}
+
+export interface AltTextResult {
+	de: string;
+	en: string;
+}
+
+/**
+ * Generate alt text for a recipe image in both German and English
+ * @param imagePath - Relative path or filename of the image (e.g., "brot.a1b2c3d4.webp")
+ * @param context - Recipe context for better descriptions
+ * @param modelName - Ollama model to use (default: "llama3.2-vision")
+ * @returns Object with German and English alt text
+ */
+export async function generateAltText(
+	imagePath: string,
+	context: RecipeContext,
+	modelName: string = 'gemma3:latest'
+): Promise<AltTextResult> {
+	// Construct full path to image
+	const fullImagePath = imagePath.startsWith('/')
+		? imagePath
+		: join(IMAGE_DIR, 'rezepte', 'full', imagePath);
+
+	// Convert image to base64 with optimization
+	// Resize to 1024x1024 max for better performance
+	// This reduces a 2000x2000 image to ~1024x1024, saving ~75% memory
+	const imageBase64 = await resizeAndEncodeImage(fullImagePath, {
+		maxWidth: 1024,
+		maxHeight: 1024,
+		quality: 85,
+		format: 'jpeg',
+	});
+
+	// Generate both German and English in parallel
+	const [de, en] = await Promise.all([
+		generateGermanAltText(imageBase64, context, modelName),
+		generateEnglishAltText(imageBase64, context, modelName),
+	]);
+
+	return { de, en };
+}
+
+/**
+ * Generate German alt text
+ */
+async function generateGermanAltText(
+	imageBase64: string,
+	context: RecipeContext,
+	modelName: string
+): Promise<string> {
+	const prompt = buildPrompt('de', context);
+
+	const response = await generateWithOllama({
+		model: modelName,
+		prompt,
+		images: [imageBase64],
+		options: {
+			temperature: 0.3, // Lower temperature for consistent descriptions
+			max_tokens: 100,
+		},
+	});
+
+	return cleanAltText(response);
+}
+
+/**
+ * Generate English alt text
+ */
+async function generateEnglishAltText(
+	imageBase64: string,
+	context: RecipeContext,
+	modelName: string
+): Promise<string> {
+	const prompt = buildPrompt('en', context);
+
+	const response = await generateWithOllama({
+		model: modelName,
+		prompt,
+		images: [imageBase64],
+		options: {
+			temperature: 0.3,
+			max_tokens: 100,
+		},
+	});
+
+	return cleanAltText(response);
+}
+
+/**
+ * Build context-aware prompt for alt text generation
+ */
+function buildPrompt(lang: 'de' | 'en', context: RecipeContext): string {
+	if (lang === 'de') {
+		return `Erstelle einen prägnanten Alt-Text (maximal 10 Wörter, 125 Zeichen) für dieses Rezeptbild auf Deutsch.
+
+Rezept: ${context.name}
+Kategorie: ${context.category}
+${context.tags ? `Stichwörter: ${context.tags.slice(0, 3).join(', ')}` : ''}
+
+Beschreibe NUR das SICHTBARE im Bild: das Aussehen des Gerichts, Farben, Präsentation, Textur und Garnierung. Sei beschreibend aber prägnant für Screenreader. Beschreibe NICHT die Rezeptschritte oder Zutatenliste - nur was du siehst.
+
+Antworte NUR mit dem Alt-Text, ohne Erklärung oder Anführungszeichen.`;
+	} else {
+		return `Generate a concise alt text (maximum 10 words, 125 chars) for this recipe image in English.
+
+Recipe: ${context.name}
+Category: ${context.category}
+${context.tags ? `Keywords: ${context.tags.slice(0, 3).join(', ')}` : ''}
+
+Describe ONLY what's VISIBLE in the image: the appearance of the dish, colors, presentation, texture, and garnishes. Be descriptive but concise for screen readers. Do NOT describe the recipe steps or ingredients list - only what you see.
+
+Respond with ONLY the alt text, no explanation or quotes.`;
+	}
+}
+
+/**
+ * Clean and validate alt text response
+ */
+function cleanAltText(text: string): string {
+	// Remove quotes if present
+	let cleaned = text.replace(/^["']|["']$/g, '');
+
+	// Remove "Alt text:" prefix if present
+	cleaned = cleaned.replace(/^(Alt[- ]?text|Alternativer Text):\s*/i, '');
+
+	// Trim whitespace
+	cleaned = cleaned.trim();
+
+	// Truncate to 125 characters if too long
+	if (cleaned.length > 125) {
+		cleaned = cleaned.substring(0, 122) + '...';
+	}
+
+	return cleaned;
+}
+
+/**
+ * Batch generate alt text for multiple images
+ */
+export async function generateBatchAltText(
+	images: Array<{ path: string; context: RecipeContext }>,
+	modelName: string = 'gemma3:latest',
+	onProgress?: (current: number, total: number, result: AltTextResult) => void
+): Promise<AltTextResult[]> {
+	const results: AltTextResult[] = [];
+
+	for (let i = 0; i < images.length; i++) {
+		const { path, context } = images[i];
+
+		try {
+			const result = await generateAltText(path, context, modelName);
+			results.push(result);
+
+			if (onProgress) {
+				onProgress(i + 1, images.length, result);
+			}
+		} catch (error) {
+			console.error(`Failed to generate alt text for ${path}:`, error);
+			// Return empty strings on error
+			results.push({ de: '', en: '' });
+		}
+	}
+
+	return results;
+}
--- a/src/lib/server/ai/imageUtils.ts
+++ b/src/lib/server/ai/imageUtils.ts
@@ -0,0 +1,82 @@
+import sharp from 'sharp';
+import { readFile } from 'fs/promises';
+
+export interface ResizeOptions {
+	maxWidth?: number;
+	maxHeight?: number;
+	quality?: number;
+	format?: 'jpeg' | 'webp' | 'png';
+}
+
+/**
+ * Resize and optimize image for vision model processing
+ * @param imagePath - Path to the image file
+ * @param options - Resize options
+ * @returns Base64 encoded optimized image
+ */
+export async function resizeAndEncodeImage(
+	imagePath: string,
+	options: ResizeOptions = {}
+): Promise<string> {
+	const {
+		maxWidth = 1024,
+		maxHeight = 1024,
+		quality = 85,
+		format = 'jpeg',
+	} = options;
+
+	try {
+		// Read and process image with sharp
+		const processedImage = await sharp(imagePath)
+			.resize(maxWidth, maxHeight, {
+				fit: 'inside', // Maintain aspect ratio
+				withoutEnlargement: true, // Don't upscale smaller images
+			})
+			.toFormat(format, { quality })
+			.toBuffer();
+
+		return processedImage.toString('base64');
+	} catch (error) {
+		console.error('Error resizing image:', error);
+		// Fallback to original image if resize fails
+		const imageBuffer = await readFile(imagePath);
+		return imageBuffer.toString('base64');
+	}
+}
+
+/**
+ * Get image dimensions without loading full image into memory
+ */
+export async function getImageDimensions(
+	imagePath: string
+): Promise<{ width: number; height: number }> {
+	const metadata = await sharp(imagePath).metadata();
+	return {
+		width: metadata.width || 0,
+		height: metadata.height || 0,
+	};
+}
+
+/**
+ * Estimate optimal resize dimensions for vision models
+ * Balance between quality and performance
+ */
+export function calculateOptimalDimensions(
+	originalWidth: number,
+	originalHeight: number,
+	targetSize: number = 1024
+): { width: number; height: number } {
+	const aspectRatio = originalWidth / originalHeight;
+
+	if (originalWidth > originalHeight) {
+		return {
+			width: Math.min(targetSize, originalWidth),
+			height: Math.min(Math.round(targetSize / aspectRatio), originalHeight),
+		};
+	} else {
+		return {
+			width: Math.min(Math.round(targetSize * aspectRatio), originalWidth),
+			height: Math.min(targetSize, originalHeight),
+		};
+	}
+}
--- a/src/lib/server/ai/ollama.ts
+++ b/src/lib/server/ai/ollama.ts
@@ -0,0 +1,104 @@
+import { OLLAMA_URL } from '$env/static/private';
+import { readFile } from 'fs/promises';
+
+/**
+ * Ollama API client for local vision model inference
+ */
+
+export interface OllamaGenerateRequest {
+	model: string;
+	prompt: string;
+	images?: string[]; // base64 encoded images
+	stream?: boolean;
+	options?: {
+		temperature?: number;
+		top_p?: number;
+		max_tokens?: number;
+	};
+}
+
+export interface OllamaGenerateResponse {
+	model: string;
+	created_at: string;
+	response: string;
+	done: boolean;
+}
+
+/**
+ * Generate text response from Ollama model with optional image input
+ */
+export async function generateWithOllama(
+	request: OllamaGenerateRequest
+): Promise<string> {
+	const ollamaUrl = OLLAMA_URL || 'http://localhost:11434';
+
+	try {
+		const response = await fetch(`${ollamaUrl}/api/generate`, {
+			method: 'POST',
+			headers: {
+				'Content-Type': 'application/json',
+			},
+			body: JSON.stringify({
+				...request,
+				stream: false, // Always use non-streaming for simpler handling
+			}),
+		});
+
+		if (!response.ok) {
+			throw new Error(`Ollama API error: ${response.status} ${response.statusText}`);
+		}
+
+		const data = (await response.json()) as OllamaGenerateResponse;
+		return data.response.trim();
+	} catch (error) {
+		console.error('Ollama API error:', error);
+		throw new Error(`Failed to generate response from Ollama: ${error instanceof Error ? error.message : 'Unknown error'}`);
+	}
+}
+
+/**
+ * Convert image file path to base64 string
+ */
+export async function imageToBase64(imagePath: string): Promise<string> {
+	const imageBuffer = await readFile(imagePath);
+	return imageBuffer.toString('base64');
+}
+
+/**
+ * Check if Ollama server is available
+ */
+export async function checkOllamaHealth(): Promise<boolean> {
+	const ollamaUrl = OLLAMA_URL || 'http://localhost:11434';
+
+	try {
+		const response = await fetch(`${ollamaUrl}/api/tags`, {
+			method: 'GET',
+		});
+		return response.ok;
+	} catch {
+		return false;
+	}
+}
+
+/**
+ * List available models on Ollama server
+ */
+export async function listOllamaModels(): Promise<string[]> {
+	const ollamaUrl = OLLAMA_URL || 'http://localhost:11434';
+
+	try {
+		const response = await fetch(`${ollamaUrl}/api/tags`, {
+			method: 'GET',
+		});
+
+		if (!response.ok) {
+			throw new Error('Failed to fetch models');
+		}
+
+		const data = await response.json();
+		return data.models?.map((m: any) => m.name) || [];
+	} catch (error) {
+		console.error('Failed to list Ollama models:', error);
+		return [];
+	}
+}