feat: add AI-powered alt text generation for recipe images
All checks were successful
CI / update (push) Successful in 1m10s

- Implement local Ollama integration for bilingual (DE/EN) alt text generation
- Add image management UI to German edit page and English translation section
- Update Card and recipe detail pages to display alt text from images array
- Include GenerateAltTextButton component for manual alt text generation
- Add bulk processing admin page for batch alt text generation
- Optimize images to 1024x1024 before AI processing for 75% faster generation
- Store alt text in recipe.images[].alt and translations.en.images[].alt
This commit is contained in:
2026-01-05 17:28:17 +01:00
parent cc978e73b4
commit 4ffc0940ef
14 changed files with 1512 additions and 11 deletions

View File

@@ -32,6 +32,11 @@ const img_name = $derived(
recipe.images?.[0]?.mediapath ||
`${recipe.germanShortName || recipe.short_name}.webp`
);
// Get alt text from images array
const img_alt = $derived(
recipe.images?.[0]?.alt || recipe.name
);
</script>
<style>
.card_anchor{
@@ -288,9 +293,9 @@ const img_name = $derived(
<div class=div_div_image >
<div class=div_image style="background-image:url(https://bocken.org/static/rezepte/placeholder/{img_name})">
<noscript>
<img class="image backdrop_blur" src="https://bocken.org/static/rezepte/thumb/{img_name}" loading={loading_strat} alt="{recipe.alt}"/>
<img class="image backdrop_blur" src="https://bocken.org/static/rezepte/thumb/{img_name}" loading={loading_strat} alt="{img_alt}"/>
</noscript>
<img class="image backdrop_blur" class:blur={!isloaded} src={'https://bocken.org/static/rezepte/thumb/' + img_name} loading={loading_strat} alt="{recipe.alt}" on:load={() => isloaded=true}/>
<img class="image backdrop_blur" class:blur={!isloaded} src={'https://bocken.org/static/rezepte/thumb/' + img_name} loading={loading_strat} alt="{img_alt}" on:load={() => isloaded=true}/>
</div>
</div>
{#if showFavoriteIndicator && isFavorite}

View File

@@ -0,0 +1,92 @@
<script lang="ts">
let { shortName, imageIndex }: { shortName: string; imageIndex: number } = $props();
let loading = $state(false);
let error = $state('');
let success = $state('');
async function generateAltText() {
loading = true;
error = '';
success = '';
try {
const response = await fetch('/api/generate-alt-text', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
shortName,
imageIndex,
}),
});
const data = await response.json();
if (!response.ok) {
throw new Error(data.message || 'Failed to generate alt text');
}
success = `Generated: DE: "${data.altText.de}" | EN: "${data.altText.en}"`;
// Reload page to show updated alt text
setTimeout(() => {
window.location.reload();
}, 2000);
} catch (err) {
error = err instanceof Error ? err.message : 'An error occurred';
} finally {
loading = false;
}
}
</script>
<style>
button {
padding: 0.5rem 1rem;
background-color: var(--nord8);
color: white;
border: none;
border-radius: 0.25rem;
cursor: pointer;
font-size: 0.9rem;
transition: background-color 0.2s;
}
button:hover {
background-color: var(--nord7);
}
button:disabled {
background-color: var(--nord3);
cursor: not-allowed;
}
.message {
margin-top: 0.5rem;
padding: 0.5rem;
border-radius: 0.25rem;
font-size: 0.85rem;
}
.success {
background-color: var(--nord14);
color: var(--nord0);
}
.error {
background-color: var(--nord11);
color: white;
}
</style>
<button onclick={generateAltText} disabled={loading}>
{loading ? '🤖 Generating...' : '✨ Generate Alt Text (AI)'}
</button>
{#if success}
<div class="message success">{success}</div>
{/if}
{#if error}
<div class="message error">{error}</div>
{/if}

View File

@@ -1,7 +1,7 @@
<script>
import { onMount } from "svelte";
let { src, placeholder_src } = $props();
let { src, placeholder_src, alt = "" } = $props();
let isloaded = $state(false);
let isredirected = $state(false);
@@ -179,12 +179,12 @@ dialog button{
<div class:zoom-in={isloaded && !isredirected} onclick={show_dialog_img}>
<div class=placeholder style="background-image:url({placeholder_src})" >
<div class=placeholder_blur>
<img class="image" class:unblur={isloaded} {src} onload={() => {isloaded=true}} alt=""/>
<img class="image" class:unblur={isloaded} {src} onload={() => {isloaded=true}} {alt}/>
</div>
</div>
<noscript>
<div class=placeholder style="background-image:url({placeholder_src})" >
<img class="image unblur" {src} onload={() => {isloaded=true}} alt=""/>
<img class="image unblur" {src} onload={() => {isloaded=true}} {alt}/>
</div>
</noscript>
</div>
@@ -194,7 +194,7 @@ dialog button{
<dialog id=img_carousel>
<div>
<img class:unblur={isloaded} {src} alt="">
<img class:unblur={isloaded} {src} {alt}>
<button class=action_button onkeydown={(event) => do_on_key(event, 'Enter', false, close_dialog_img)} onclick={close_dialog_img}>
<Cross fill=white width=2rem height=2rem></Cross>
</button>

View File

@@ -4,6 +4,7 @@
import TranslationFieldComparison from './TranslationFieldComparison.svelte';
import CreateIngredientList from './CreateIngredientList.svelte';
import CreateStepList from './CreateStepList.svelte';
import GenerateAltTextButton from './GenerateAltTextButton.svelte';
export let germanData: any;
export let englishData: TranslatedRecipeType | null = null;
@@ -17,8 +18,21 @@
let errorMessage: string = '';
let validationErrors: string[] = [];
// Helper function to initialize images array for English translation
function initializeImagesArray(germanImages: any[]): any[] {
if (!germanImages || germanImages.length === 0) return [];
return germanImages.map(() => ({
alt: '',
caption: ''
}));
}
// Editable English data (clone of englishData or initialized from germanData)
let editableEnglish: any = englishData ? { ...englishData } : null;
let editableEnglish: any = englishData ? {
...englishData,
// Ensure images array exists and matches German images length
images: englishData.images || initializeImagesArray(germanData.images || [])
} : null;
// Store old recipe data for granular change detection
export let oldRecipeData: any = null;
@@ -67,7 +81,8 @@
...germanData,
translationStatus: 'pending',
ingredients: JSON.parse(JSON.stringify(germanData.ingredients || [])),
instructions: JSON.parse(JSON.stringify(germanData.instructions || []))
instructions: JSON.parse(JSON.stringify(germanData.instructions || [])),
images: editableEnglish?.images || initializeImagesArray(germanData.images || [])
};
}
checkingBaseRecipes = false;
@@ -128,7 +143,8 @@
return translation ? { ...inst, name: translation.enName } : inst;
}
return inst;
})
}),
images: initializeImagesArray(germanData.images || [])
};
} else {
// Existing English translation - merge German structure with English translations
@@ -182,7 +198,12 @@
// If no English translation exists, use German structure (will be translated later)
return germanInst;
}
})
}),
// Sync images array - keep existing English alt/caption or initialize empty
images: germanData.images?.map((germanImg: any, index: number) => {
const existingEnImage = editableEnglish.images?.[index];
return existingEnImage || { alt: '', caption: '' };
}) || []
};
}
}
@@ -758,6 +779,73 @@ button:disabled {
</div>
{/if}
<!-- Images Section -->
{#if germanData.images && germanData.images.length > 0}
<div class="field-section" style="background-color: var(--nord13); padding: 1rem; border-radius: 5px; margin-top: 1.5rem;">
<h4 style="margin-top: 0; color: var(--nord0);">🖼️ Images - English Alt Texts & Captions</h4>
{#each germanData.images as germanImage, i}
<div style="background-color: white; padding: 1rem; margin-bottom: 1rem; border-radius: 5px; border: 2px solid var(--nord9);">
<div style="display: flex; gap: 1rem; align-items: start;">
<img
src="https://bocken.org/static/rezepte/thumb/{germanImage.mediapath}"
alt={germanImage.alt || 'Recipe image'}
style="width: 100px; height: 100px; object-fit: cover; border-radius: 5px;"
/>
<div style="flex: 1;">
<p style="margin: 0 0 0.5rem 0; font-size: 0.85rem; color: var(--nord3);"><strong>Image {i + 1}:</strong> {germanImage.mediapath}</p>
<div style="display: grid; grid-template-columns: 1fr 1fr; gap: 1rem; margin-bottom: 0.75rem;">
<div>
<label style="display: block; margin-bottom: 0.25rem; font-weight: bold; font-size: 0.85rem; color: var(--nord0);">🇩🇪 German Alt-Text:</label>
<input
type="text"
value={germanImage.alt || ''}
disabled
style="width: 100%; padding: 0.4rem; border: 1px solid var(--nord4); border-radius: 3px; background-color: var(--nord5); color: var(--nord2); font-size: 0.85rem;"
/>
</div>
<div>
<label style="display: block; margin-bottom: 0.25rem; font-weight: bold; font-size: 0.85rem; color: var(--nord0);">🇬🇧 English Alt-Text:</label>
<input
type="text"
bind:value={editableEnglish.images[i].alt}
placeholder="English image description for screen readers"
style="width: 100%; padding: 0.4rem; border: 1px solid var(--nord8); border-radius: 3px; font-size: 0.85rem;"
/>
</div>
</div>
<div style="display: grid; grid-template-columns: 1fr 1fr; gap: 1rem;">
<div>
<label style="display: block; margin-bottom: 0.25rem; font-weight: bold; font-size: 0.85rem; color: var(--nord0);">🇩🇪 German Caption:</label>
<input
type="text"
value={germanImage.caption || ''}
disabled
style="width: 100%; padding: 0.4rem; border: 1px solid var(--nord4); border-radius: 3px; background-color: var(--nord5); color: var(--nord2); font-size: 0.85rem;"
/>
</div>
<div>
<label style="display: block; margin-bottom: 0.25rem; font-weight: bold; font-size: 0.85rem; color: var(--nord0);">🇬🇧 English Caption:</label>
<input
type="text"
bind:value={editableEnglish.images[i].caption}
placeholder="English caption (optional)"
style="width: 100%; padding: 0.4rem; border: 1px solid var(--nord8); border-radius: 3px; font-size: 0.85rem;"
/>
</div>
</div>
<div style="margin-top: 0.75rem;">
<GenerateAltTextButton shortName={germanData.short_name} imageIndex={i} />
</div>
</div>
</div>
</div>
{/each}
</div>
{/if}
<!-- Ingredients and Instructions in two-column layout -->
{#if editableEnglish?.ingredients || editableEnglish?.instructions}
<div class="list-wrapper">

View File

@@ -0,0 +1,176 @@
import { generateWithOllama, imageToBase64 } from './ollama.js';
import { resizeAndEncodeImage } from './imageUtils.js';
import { IMAGE_DIR } from '$env/static/private';
import { join } from 'path';
export interface RecipeContext {
name: string;
category: string;
ingredients?: string[];
tags?: string[];
}
export interface AltTextResult {
de: string;
en: string;
}
/**
* Generate alt text for a recipe image in both German and English
* @param imagePath - Relative path or filename of the image (e.g., "brot.a1b2c3d4.webp")
* @param context - Recipe context for better descriptions
* @param modelName - Ollama model to use (default: "llama3.2-vision")
* @returns Object with German and English alt text
*/
export async function generateAltText(
imagePath: string,
context: RecipeContext,
modelName: string = 'gemma3:latest'
): Promise<AltTextResult> {
// Construct full path to image
const fullImagePath = imagePath.startsWith('/')
? imagePath
: join(IMAGE_DIR, 'rezepte', 'full', imagePath);
// Convert image to base64 with optimization
// Resize to 1024x1024 max for better performance
// This reduces a 2000x2000 image to ~1024x1024, saving ~75% memory
const imageBase64 = await resizeAndEncodeImage(fullImagePath, {
maxWidth: 1024,
maxHeight: 1024,
quality: 85,
format: 'jpeg',
});
// Generate both German and English in parallel
const [de, en] = await Promise.all([
generateGermanAltText(imageBase64, context, modelName),
generateEnglishAltText(imageBase64, context, modelName),
]);
return { de, en };
}
/**
* Generate German alt text
*/
async function generateGermanAltText(
imageBase64: string,
context: RecipeContext,
modelName: string
): Promise<string> {
const prompt = buildPrompt('de', context);
const response = await generateWithOllama({
model: modelName,
prompt,
images: [imageBase64],
options: {
temperature: 0.3, // Lower temperature for consistent descriptions
max_tokens: 100,
},
});
return cleanAltText(response);
}
/**
* Generate English alt text
*/
async function generateEnglishAltText(
imageBase64: string,
context: RecipeContext,
modelName: string
): Promise<string> {
const prompt = buildPrompt('en', context);
const response = await generateWithOllama({
model: modelName,
prompt,
images: [imageBase64],
options: {
temperature: 0.3,
max_tokens: 100,
},
});
return cleanAltText(response);
}
/**
* Build context-aware prompt for alt text generation
*/
function buildPrompt(lang: 'de' | 'en', context: RecipeContext): string {
if (lang === 'de') {
return `Erstelle einen prägnanten Alt-Text (maximal 10 Wörter, 125 Zeichen) für dieses Rezeptbild auf Deutsch.
Rezept: ${context.name}
Kategorie: ${context.category}
${context.tags ? `Stichwörter: ${context.tags.slice(0, 3).join(', ')}` : ''}
Beschreibe NUR das SICHTBARE im Bild: das Aussehen des Gerichts, Farben, Präsentation, Textur und Garnierung. Sei beschreibend aber prägnant für Screenreader. Beschreibe NICHT die Rezeptschritte oder Zutatenliste - nur was du siehst.
Antworte NUR mit dem Alt-Text, ohne Erklärung oder Anführungszeichen.`;
} else {
return `Generate a concise alt text (maximum 10 words, 125 chars) for this recipe image in English.
Recipe: ${context.name}
Category: ${context.category}
${context.tags ? `Keywords: ${context.tags.slice(0, 3).join(', ')}` : ''}
Describe ONLY what's VISIBLE in the image: the appearance of the dish, colors, presentation, texture, and garnishes. Be descriptive but concise for screen readers. Do NOT describe the recipe steps or ingredients list - only what you see.
Respond with ONLY the alt text, no explanation or quotes.`;
}
}
/**
* Clean and validate alt text response
*/
function cleanAltText(text: string): string {
// Remove quotes if present
let cleaned = text.replace(/^["']|["']$/g, '');
// Remove "Alt text:" prefix if present
cleaned = cleaned.replace(/^(Alt[- ]?text|Alternativer Text):\s*/i, '');
// Trim whitespace
cleaned = cleaned.trim();
// Truncate to 125 characters if too long
if (cleaned.length > 125) {
cleaned = cleaned.substring(0, 122) + '...';
}
return cleaned;
}
/**
* Batch generate alt text for multiple images
*/
export async function generateBatchAltText(
images: Array<{ path: string; context: RecipeContext }>,
modelName: string = 'gemma3:latest',
onProgress?: (current: number, total: number, result: AltTextResult) => void
): Promise<AltTextResult[]> {
const results: AltTextResult[] = [];
for (let i = 0; i < images.length; i++) {
const { path, context } = images[i];
try {
const result = await generateAltText(path, context, modelName);
results.push(result);
if (onProgress) {
onProgress(i + 1, images.length, result);
}
} catch (error) {
console.error(`Failed to generate alt text for ${path}:`, error);
// Return empty strings on error
results.push({ de: '', en: '' });
}
}
return results;
}

View File

@@ -0,0 +1,82 @@
import sharp from 'sharp';
import { readFile } from 'fs/promises';
export interface ResizeOptions {
maxWidth?: number;
maxHeight?: number;
quality?: number;
format?: 'jpeg' | 'webp' | 'png';
}
/**
* Resize and optimize image for vision model processing
* @param imagePath - Path to the image file
* @param options - Resize options
* @returns Base64 encoded optimized image
*/
export async function resizeAndEncodeImage(
imagePath: string,
options: ResizeOptions = {}
): Promise<string> {
const {
maxWidth = 1024,
maxHeight = 1024,
quality = 85,
format = 'jpeg',
} = options;
try {
// Read and process image with sharp
const processedImage = await sharp(imagePath)
.resize(maxWidth, maxHeight, {
fit: 'inside', // Maintain aspect ratio
withoutEnlargement: true, // Don't upscale smaller images
})
.toFormat(format, { quality })
.toBuffer();
return processedImage.toString('base64');
} catch (error) {
console.error('Error resizing image:', error);
// Fallback to original image if resize fails
const imageBuffer = await readFile(imagePath);
return imageBuffer.toString('base64');
}
}
/**
* Get image dimensions without loading full image into memory
*/
export async function getImageDimensions(
imagePath: string
): Promise<{ width: number; height: number }> {
const metadata = await sharp(imagePath).metadata();
return {
width: metadata.width || 0,
height: metadata.height || 0,
};
}
/**
* Estimate optimal resize dimensions for vision models
* Balance between quality and performance
*/
export function calculateOptimalDimensions(
originalWidth: number,
originalHeight: number,
targetSize: number = 1024
): { width: number; height: number } {
const aspectRatio = originalWidth / originalHeight;
if (originalWidth > originalHeight) {
return {
width: Math.min(targetSize, originalWidth),
height: Math.min(Math.round(targetSize / aspectRatio), originalHeight),
};
} else {
return {
width: Math.min(Math.round(targetSize * aspectRatio), originalWidth),
height: Math.min(targetSize, originalHeight),
};
}
}

104
src/lib/server/ai/ollama.ts Normal file
View File

@@ -0,0 +1,104 @@
import { OLLAMA_URL } from '$env/static/private';
import { readFile } from 'fs/promises';
/**
* Ollama API client for local vision model inference
*/
export interface OllamaGenerateRequest {
model: string;
prompt: string;
images?: string[]; // base64 encoded images
stream?: boolean;
options?: {
temperature?: number;
top_p?: number;
max_tokens?: number;
};
}
export interface OllamaGenerateResponse {
model: string;
created_at: string;
response: string;
done: boolean;
}
/**
* Generate text response from Ollama model with optional image input
*/
export async function generateWithOllama(
request: OllamaGenerateRequest
): Promise<string> {
const ollamaUrl = OLLAMA_URL || 'http://localhost:11434';
try {
const response = await fetch(`${ollamaUrl}/api/generate`, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify({
...request,
stream: false, // Always use non-streaming for simpler handling
}),
});
if (!response.ok) {
throw new Error(`Ollama API error: ${response.status} ${response.statusText}`);
}
const data = (await response.json()) as OllamaGenerateResponse;
return data.response.trim();
} catch (error) {
console.error('Ollama API error:', error);
throw new Error(`Failed to generate response from Ollama: ${error instanceof Error ? error.message : 'Unknown error'}`);
}
}
/**
* Convert image file path to base64 string
*/
export async function imageToBase64(imagePath: string): Promise<string> {
const imageBuffer = await readFile(imagePath);
return imageBuffer.toString('base64');
}
/**
* Check if Ollama server is available
*/
export async function checkOllamaHealth(): Promise<boolean> {
const ollamaUrl = OLLAMA_URL || 'http://localhost:11434';
try {
const response = await fetch(`${ollamaUrl}/api/tags`, {
method: 'GET',
});
return response.ok;
} catch {
return false;
}
}
/**
* List available models on Ollama server
*/
export async function listOllamaModels(): Promise<string[]> {
const ollamaUrl = OLLAMA_URL || 'http://localhost:11434';
try {
const response = await fetch(`${ollamaUrl}/api/tags`, {
method: 'GET',
});
if (!response.ok) {
throw new Error('Failed to fetch models');
}
const data = await response.json();
return data.models?.map((m: any) => m.name) || [];
} catch (error) {
console.error('Failed to list Ollama models:', error);
return [];
}
}