feat: add AI-powered alt text generation for recipe images
All checks were successful
CI / update (push) Successful in 1m10s
All checks were successful
CI / update (push) Successful in 1m10s
- Implement local Ollama integration for bilingual (DE/EN) alt text generation - Add image management UI to German edit page and English translation section - Update Card and recipe detail pages to display alt text from images array - Include GenerateAltTextButton component for manual alt text generation - Add bulk processing admin page for batch alt text generation - Optimize images to 1024x1024 before AI processing for 75% faster generation - Store alt text in recipe.images[].alt and translations.en.images[].alt
This commit is contained in:
176
src/lib/server/ai/alttext.ts
Normal file
176
src/lib/server/ai/alttext.ts
Normal file
@@ -0,0 +1,176 @@
|
||||
import { generateWithOllama, imageToBase64 } from './ollama.js';
|
||||
import { resizeAndEncodeImage } from './imageUtils.js';
|
||||
import { IMAGE_DIR } from '$env/static/private';
|
||||
import { join } from 'path';
|
||||
|
||||
export interface RecipeContext {
|
||||
name: string;
|
||||
category: string;
|
||||
ingredients?: string[];
|
||||
tags?: string[];
|
||||
}
|
||||
|
||||
export interface AltTextResult {
|
||||
de: string;
|
||||
en: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate alt text for a recipe image in both German and English
|
||||
* @param imagePath - Relative path or filename of the image (e.g., "brot.a1b2c3d4.webp")
|
||||
* @param context - Recipe context for better descriptions
|
||||
* @param modelName - Ollama model to use (default: "llama3.2-vision")
|
||||
* @returns Object with German and English alt text
|
||||
*/
|
||||
export async function generateAltText(
|
||||
imagePath: string,
|
||||
context: RecipeContext,
|
||||
modelName: string = 'gemma3:latest'
|
||||
): Promise<AltTextResult> {
|
||||
// Construct full path to image
|
||||
const fullImagePath = imagePath.startsWith('/')
|
||||
? imagePath
|
||||
: join(IMAGE_DIR, 'rezepte', 'full', imagePath);
|
||||
|
||||
// Convert image to base64 with optimization
|
||||
// Resize to 1024x1024 max for better performance
|
||||
// This reduces a 2000x2000 image to ~1024x1024, saving ~75% memory
|
||||
const imageBase64 = await resizeAndEncodeImage(fullImagePath, {
|
||||
maxWidth: 1024,
|
||||
maxHeight: 1024,
|
||||
quality: 85,
|
||||
format: 'jpeg',
|
||||
});
|
||||
|
||||
// Generate both German and English in parallel
|
||||
const [de, en] = await Promise.all([
|
||||
generateGermanAltText(imageBase64, context, modelName),
|
||||
generateEnglishAltText(imageBase64, context, modelName),
|
||||
]);
|
||||
|
||||
return { de, en };
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate German alt text
|
||||
*/
|
||||
async function generateGermanAltText(
|
||||
imageBase64: string,
|
||||
context: RecipeContext,
|
||||
modelName: string
|
||||
): Promise<string> {
|
||||
const prompt = buildPrompt('de', context);
|
||||
|
||||
const response = await generateWithOllama({
|
||||
model: modelName,
|
||||
prompt,
|
||||
images: [imageBase64],
|
||||
options: {
|
||||
temperature: 0.3, // Lower temperature for consistent descriptions
|
||||
max_tokens: 100,
|
||||
},
|
||||
});
|
||||
|
||||
return cleanAltText(response);
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate English alt text
|
||||
*/
|
||||
async function generateEnglishAltText(
|
||||
imageBase64: string,
|
||||
context: RecipeContext,
|
||||
modelName: string
|
||||
): Promise<string> {
|
||||
const prompt = buildPrompt('en', context);
|
||||
|
||||
const response = await generateWithOllama({
|
||||
model: modelName,
|
||||
prompt,
|
||||
images: [imageBase64],
|
||||
options: {
|
||||
temperature: 0.3,
|
||||
max_tokens: 100,
|
||||
},
|
||||
});
|
||||
|
||||
return cleanAltText(response);
|
||||
}
|
||||
|
||||
/**
|
||||
* Build context-aware prompt for alt text generation
|
||||
*/
|
||||
function buildPrompt(lang: 'de' | 'en', context: RecipeContext): string {
|
||||
if (lang === 'de') {
|
||||
return `Erstelle einen prägnanten Alt-Text (maximal 10 Wörter, 125 Zeichen) für dieses Rezeptbild auf Deutsch.
|
||||
|
||||
Rezept: ${context.name}
|
||||
Kategorie: ${context.category}
|
||||
${context.tags ? `Stichwörter: ${context.tags.slice(0, 3).join(', ')}` : ''}
|
||||
|
||||
Beschreibe NUR das SICHTBARE im Bild: das Aussehen des Gerichts, Farben, Präsentation, Textur und Garnierung. Sei beschreibend aber prägnant für Screenreader. Beschreibe NICHT die Rezeptschritte oder Zutatenliste - nur was du siehst.
|
||||
|
||||
Antworte NUR mit dem Alt-Text, ohne Erklärung oder Anführungszeichen.`;
|
||||
} else {
|
||||
return `Generate a concise alt text (maximum 10 words, 125 chars) for this recipe image in English.
|
||||
|
||||
Recipe: ${context.name}
|
||||
Category: ${context.category}
|
||||
${context.tags ? `Keywords: ${context.tags.slice(0, 3).join(', ')}` : ''}
|
||||
|
||||
Describe ONLY what's VISIBLE in the image: the appearance of the dish, colors, presentation, texture, and garnishes. Be descriptive but concise for screen readers. Do NOT describe the recipe steps or ingredients list - only what you see.
|
||||
|
||||
Respond with ONLY the alt text, no explanation or quotes.`;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Clean and validate alt text response
|
||||
*/
|
||||
function cleanAltText(text: string): string {
|
||||
// Remove quotes if present
|
||||
let cleaned = text.replace(/^["']|["']$/g, '');
|
||||
|
||||
// Remove "Alt text:" prefix if present
|
||||
cleaned = cleaned.replace(/^(Alt[- ]?text|Alternativer Text):\s*/i, '');
|
||||
|
||||
// Trim whitespace
|
||||
cleaned = cleaned.trim();
|
||||
|
||||
// Truncate to 125 characters if too long
|
||||
if (cleaned.length > 125) {
|
||||
cleaned = cleaned.substring(0, 122) + '...';
|
||||
}
|
||||
|
||||
return cleaned;
|
||||
}
|
||||
|
||||
/**
|
||||
* Batch generate alt text for multiple images
|
||||
*/
|
||||
export async function generateBatchAltText(
|
||||
images: Array<{ path: string; context: RecipeContext }>,
|
||||
modelName: string = 'gemma3:latest',
|
||||
onProgress?: (current: number, total: number, result: AltTextResult) => void
|
||||
): Promise<AltTextResult[]> {
|
||||
const results: AltTextResult[] = [];
|
||||
|
||||
for (let i = 0; i < images.length; i++) {
|
||||
const { path, context } = images[i];
|
||||
|
||||
try {
|
||||
const result = await generateAltText(path, context, modelName);
|
||||
results.push(result);
|
||||
|
||||
if (onProgress) {
|
||||
onProgress(i + 1, images.length, result);
|
||||
}
|
||||
} catch (error) {
|
||||
console.error(`Failed to generate alt text for ${path}:`, error);
|
||||
// Return empty strings on error
|
||||
results.push({ de: '', en: '' });
|
||||
}
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
82
src/lib/server/ai/imageUtils.ts
Normal file
82
src/lib/server/ai/imageUtils.ts
Normal file
@@ -0,0 +1,82 @@
|
||||
import sharp from 'sharp';
|
||||
import { readFile } from 'fs/promises';
|
||||
|
||||
export interface ResizeOptions {
|
||||
maxWidth?: number;
|
||||
maxHeight?: number;
|
||||
quality?: number;
|
||||
format?: 'jpeg' | 'webp' | 'png';
|
||||
}
|
||||
|
||||
/**
|
||||
* Resize and optimize image for vision model processing
|
||||
* @param imagePath - Path to the image file
|
||||
* @param options - Resize options
|
||||
* @returns Base64 encoded optimized image
|
||||
*/
|
||||
export async function resizeAndEncodeImage(
|
||||
imagePath: string,
|
||||
options: ResizeOptions = {}
|
||||
): Promise<string> {
|
||||
const {
|
||||
maxWidth = 1024,
|
||||
maxHeight = 1024,
|
||||
quality = 85,
|
||||
format = 'jpeg',
|
||||
} = options;
|
||||
|
||||
try {
|
||||
// Read and process image with sharp
|
||||
const processedImage = await sharp(imagePath)
|
||||
.resize(maxWidth, maxHeight, {
|
||||
fit: 'inside', // Maintain aspect ratio
|
||||
withoutEnlargement: true, // Don't upscale smaller images
|
||||
})
|
||||
.toFormat(format, { quality })
|
||||
.toBuffer();
|
||||
|
||||
return processedImage.toString('base64');
|
||||
} catch (error) {
|
||||
console.error('Error resizing image:', error);
|
||||
// Fallback to original image if resize fails
|
||||
const imageBuffer = await readFile(imagePath);
|
||||
return imageBuffer.toString('base64');
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get image dimensions without loading full image into memory
|
||||
*/
|
||||
export async function getImageDimensions(
|
||||
imagePath: string
|
||||
): Promise<{ width: number; height: number }> {
|
||||
const metadata = await sharp(imagePath).metadata();
|
||||
return {
|
||||
width: metadata.width || 0,
|
||||
height: metadata.height || 0,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Estimate optimal resize dimensions for vision models
|
||||
* Balance between quality and performance
|
||||
*/
|
||||
export function calculateOptimalDimensions(
|
||||
originalWidth: number,
|
||||
originalHeight: number,
|
||||
targetSize: number = 1024
|
||||
): { width: number; height: number } {
|
||||
const aspectRatio = originalWidth / originalHeight;
|
||||
|
||||
if (originalWidth > originalHeight) {
|
||||
return {
|
||||
width: Math.min(targetSize, originalWidth),
|
||||
height: Math.min(Math.round(targetSize / aspectRatio), originalHeight),
|
||||
};
|
||||
} else {
|
||||
return {
|
||||
width: Math.min(Math.round(targetSize * aspectRatio), originalWidth),
|
||||
height: Math.min(targetSize, originalHeight),
|
||||
};
|
||||
}
|
||||
}
|
||||
104
src/lib/server/ai/ollama.ts
Normal file
104
src/lib/server/ai/ollama.ts
Normal file
@@ -0,0 +1,104 @@
|
||||
import { OLLAMA_URL } from '$env/static/private';
|
||||
import { readFile } from 'fs/promises';
|
||||
|
||||
/**
|
||||
* Ollama API client for local vision model inference
|
||||
*/
|
||||
|
||||
export interface OllamaGenerateRequest {
|
||||
model: string;
|
||||
prompt: string;
|
||||
images?: string[]; // base64 encoded images
|
||||
stream?: boolean;
|
||||
options?: {
|
||||
temperature?: number;
|
||||
top_p?: number;
|
||||
max_tokens?: number;
|
||||
};
|
||||
}
|
||||
|
||||
export interface OllamaGenerateResponse {
|
||||
model: string;
|
||||
created_at: string;
|
||||
response: string;
|
||||
done: boolean;
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate text response from Ollama model with optional image input
|
||||
*/
|
||||
export async function generateWithOllama(
|
||||
request: OllamaGenerateRequest
|
||||
): Promise<string> {
|
||||
const ollamaUrl = OLLAMA_URL || 'http://localhost:11434';
|
||||
|
||||
try {
|
||||
const response = await fetch(`${ollamaUrl}/api/generate`, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
body: JSON.stringify({
|
||||
...request,
|
||||
stream: false, // Always use non-streaming for simpler handling
|
||||
}),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`Ollama API error: ${response.status} ${response.statusText}`);
|
||||
}
|
||||
|
||||
const data = (await response.json()) as OllamaGenerateResponse;
|
||||
return data.response.trim();
|
||||
} catch (error) {
|
||||
console.error('Ollama API error:', error);
|
||||
throw new Error(`Failed to generate response from Ollama: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert image file path to base64 string
|
||||
*/
|
||||
export async function imageToBase64(imagePath: string): Promise<string> {
|
||||
const imageBuffer = await readFile(imagePath);
|
||||
return imageBuffer.toString('base64');
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if Ollama server is available
|
||||
*/
|
||||
export async function checkOllamaHealth(): Promise<boolean> {
|
||||
const ollamaUrl = OLLAMA_URL || 'http://localhost:11434';
|
||||
|
||||
try {
|
||||
const response = await fetch(`${ollamaUrl}/api/tags`, {
|
||||
method: 'GET',
|
||||
});
|
||||
return response.ok;
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* List available models on Ollama server
|
||||
*/
|
||||
export async function listOllamaModels(): Promise<string[]> {
|
||||
const ollamaUrl = OLLAMA_URL || 'http://localhost:11434';
|
||||
|
||||
try {
|
||||
const response = await fetch(`${ollamaUrl}/api/tags`, {
|
||||
method: 'GET',
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error('Failed to fetch models');
|
||||
}
|
||||
|
||||
const data = await response.json();
|
||||
return data.models?.map((m: any) => m.name) || [];
|
||||
} catch (error) {
|
||||
console.error('Failed to list Ollama models:', error);
|
||||
return [];
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user