Files
homepage/src/lib/server/nutritionMatcher.ts
T
Alexander d2a0411937 recipes: nutrition calculator with BLS/USDA matching, manual overwrites, and skip
Dual-source nutrition system using BLS (German, primary) and USDA (English, fallback)
with ML embedding matching (multilingual-e5-small / all-MiniLM-L6-v2), hybrid
substring-first search, and position-aware scoring heuristics.

Includes per-recipe and global manual ingredient overwrites, ingredient skip/exclude,
referenced recipe nutrition (base refs + anchor tags), section-name dedup,
amino acid tracking, and reactive client-side calculator with NutritionSummary component.
2026-04-01 13:00:55 +02:00

817 lines
27 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
/**
* Dual-source embedding-based ingredient matching engine.
* Priority: global overwrite → alias → BLS (German, primary) → USDA (English, fallback) → none
*
* BLS uses multilingual-e5-small for German ingredient names.
* USDA uses all-MiniLM-L6-v2 for English ingredient names.
*/
import { pipeline, type FeatureExtractionPipeline } from '@huggingface/transformers';
import { readFileSync } from 'fs';
import { resolve } from 'path';
import { NUTRITION_DB, type NutritionEntry } from '$lib/data/nutritionDb';
import { BLS_DB, type BlsEntry } from '$lib/data/blsDb';
import { lookupAlias } from '$lib/data/ingredientAliases';
import { canonicalizeUnit, resolveGramsPerUnit } from '$lib/data/unitConversions';
import { resolveDefaultAmount } from '$lib/data/defaultAmounts';
import type { NutritionMapping, NutritionPer100g } from '$types/types';
import { NutritionOverwrite } from '$models/NutritionOverwrite';
const USDA_MODEL = 'Xenova/all-MiniLM-L6-v2';
const BLS_MODEL = 'Xenova/multilingual-e5-small';
const USDA_EMBEDDINGS_PATH = resolve('src/lib/data/nutritionEmbeddings.json');
const BLS_EMBEDDINGS_PATH = resolve('src/lib/data/blsEmbeddings.json');
const CONFIDENCE_THRESHOLD = 0.45;
// Lazy-loaded singletons — USDA
let usdaEmbedder: FeatureExtractionPipeline | null = null;
let usdaEmbeddingIndex: { fdcId: number; name: string; vector: number[] }[] | null = null;
let nutritionByFdcId: Map<number, NutritionEntry> | null = null;
let nutritionByName: Map<string, NutritionEntry> | null = null;
// Lazy-loaded singletons — BLS
let blsEmbedder: FeatureExtractionPipeline | null = null;
let blsEmbeddingIndex: { blsCode: string; name: string; vector: number[] }[] | null = null;
let blsByCode: Map<string, BlsEntry> | null = null;
/** Modifiers to strip from ingredient names before matching */
const STRIP_MODIFIERS = [
'warm', 'cold', 'hot', 'room temperature', 'lukewarm',
'fresh', 'freshly', 'dried', 'dry',
'finely', 'coarsely', 'roughly', 'thinly',
'chopped', 'diced', 'minced', 'sliced', 'grated', 'shredded',
'crushed', 'ground', 'whole', 'halved', 'quartered',
'peeled', 'unpeeled', 'pitted', 'seeded', 'deseeded',
'melted', 'softened', 'frozen', 'thawed', 'chilled',
'toasted', 'roasted', 'blanched', 'boiled', 'steamed',
'sifted', 'packed', 'loosely packed', 'firmly packed',
'small', 'medium', 'large', 'extra-large',
'organic', 'free-range', 'grass-fed',
'optional', 'to taste', 'as needed', 'for garnish', 'for serving',
'about', 'approximately', 'roughly',
];
/** German modifiers to strip */
const STRIP_MODIFIERS_DE = [
'warm', 'kalt', 'heiß', 'lauwarm', 'zimmerwarm',
'frisch', 'getrocknet', 'trocken',
'fein', 'grob', 'dünn',
'gehackt', 'gewürfelt', 'geschnitten', 'gerieben', 'geraspelt',
'gemahlen', 'ganz', 'halbiert', 'geviertelt',
'geschält', 'entkernt', 'entsteint',
'geschmolzen', 'weich', 'gefroren', 'aufgetaut', 'gekühlt',
'geröstet', 'blanchiert', 'gekocht', 'gedämpft',
'gesiebt',
'klein', 'mittel', 'groß',
'bio', 'optional', 'nach Geschmack', 'nach Bedarf', 'zum Garnieren',
'etwa', 'ungefähr', 'ca',
];
// ── USDA helpers ──
function getNutritionByName(): Map<string, NutritionEntry> {
if (!nutritionByName) {
nutritionByName = new Map();
for (const entry of NUTRITION_DB) nutritionByName.set(entry.name, entry);
}
return nutritionByName;
}
function getNutritionByFdcId(): Map<number, NutritionEntry> {
if (!nutritionByFdcId) {
nutritionByFdcId = new Map();
for (const entry of NUTRITION_DB) nutritionByFdcId.set(entry.fdcId, entry);
}
return nutritionByFdcId;
}
async function getUsdaEmbedder(): Promise<FeatureExtractionPipeline> {
if (!usdaEmbedder) {
usdaEmbedder = await pipeline('feature-extraction', USDA_MODEL, { dtype: 'q8' });
}
return usdaEmbedder;
}
function getUsdaEmbeddingIndex() {
if (!usdaEmbeddingIndex) {
const raw = JSON.parse(readFileSync(USDA_EMBEDDINGS_PATH, 'utf-8'));
usdaEmbeddingIndex = raw.entries;
}
return usdaEmbeddingIndex!;
}
// ── BLS helpers ──
function getBlsByCode(): Map<string, BlsEntry> {
if (!blsByCode) {
blsByCode = new Map();
for (const entry of BLS_DB) blsByCode.set(entry.blsCode, entry);
}
return blsByCode;
}
async function getBlsEmbedder(): Promise<FeatureExtractionPipeline> {
if (!blsEmbedder) {
blsEmbedder = await pipeline('feature-extraction', BLS_MODEL, { dtype: 'q8' });
}
return blsEmbedder;
}
function getBlsEmbeddingIndex() {
if (!blsEmbeddingIndex) {
try {
const raw = JSON.parse(readFileSync(BLS_EMBEDDINGS_PATH, 'utf-8'));
blsEmbeddingIndex = raw.entries;
} catch {
// BLS embeddings not yet generated — skip
blsEmbeddingIndex = [];
}
}
return blsEmbeddingIndex!;
}
// ── Shared ──
/** Normalize an ingredient name for matching (English) */
export function normalizeIngredientName(name: string): string {
let normalized = name.toLowerCase().trim();
normalized = normalized.replace(/\(.*?\)/g, '').trim();
for (const mod of STRIP_MODIFIERS) {
normalized = normalized.replace(new RegExp(`\\b${mod}\\b,?\\s*`, 'gi'), '').trim();
}
normalized = normalized.replace(/\s+/g, ' ').replace(/,\s*$/, '').trim();
return normalized;
}
/** Normalize a German ingredient name for matching */
export function normalizeIngredientNameDe(name: string): string {
let normalized = name.toLowerCase().trim();
normalized = normalized.replace(/\(.*?\)/g, '').trim();
for (const mod of STRIP_MODIFIERS_DE) {
normalized = normalized.replace(new RegExp(`\\b${mod}\\b,?\\s*`, 'gi'), '').trim();
}
normalized = normalized.replace(/\s+/g, ' ').replace(/,\s*$/, '').trim();
return normalized;
}
function cosineSimilarity(a: number[], b: number[]): number {
let dot = 0, normA = 0, normB = 0;
for (let i = 0; i < a.length; i++) {
dot += a[i] * b[i];
normA += a[i] * a[i];
normB += b[i] * b[i];
}
return dot / (Math.sqrt(normA) * Math.sqrt(normB));
}
/** Replace German umlauts and ß for fuzzy substring matching */
function deUmlaut(s: string): string {
return s.replace(/ä/g, 'a').replace(/ö/g, 'o').replace(/ü/g, 'u').replace(/ß/g, 'ss');
}
/**
* Generate singular/stem forms for a German word for substring matching.
* Not full stemming — just common plural patterns.
*/
function germanSingulars(word: string): string[] {
const base = deUmlaut(word);
const forms = new Set([word, base]);
// -n: Tomaten→Tomate, Kartoffeln→Kartoffel
if (base.endsWith('n')) forms.add(base.slice(0, -1));
// -en: Bohnen→Bohn (then also try Bohne)
if (base.endsWith('en')) { forms.add(base.slice(0, -2)); forms.add(base.slice(0, -1)); }
// -er: Eier→Ei
if (base.endsWith('er')) forms.add(base.slice(0, -2));
// -e: Birne→Birn (for compound matching)
if (base.endsWith('e')) forms.add(base.slice(0, -1));
// -s: (loanwords)
if (base.endsWith('s')) forms.add(base.slice(0, -1));
return [...forms].filter(f => f.length >= 2);
}
/** BLS categories that are prepared dishes — exclude from embedding-only matching */
const EXCLUDED_BLS_CATEGORIES = new Set([
'Gerichte und Rezepte', 'Backwaren', 'Supplemente',
]);
/**
* Generate search forms for an ingredient name, including compound word parts
* and individual words for multi-word queries.
* "Zitronenschale" → ["zitronenschale", "zitronen", "zitrone", "schale", ...]
* "cinnamon stick" → ["cinnamon stick", "cinnamon", "stick", ...]
*/
function searchForms(query: string): string[] {
const forms = new Set(germanSingulars(query.toLowerCase()));
// Add individual words from multi-word queries
const words = query.toLowerCase().split(/\s+/);
for (const word of words) {
if (word.length >= 3) {
forms.add(word);
forms.add(deUmlaut(word));
for (const s of germanSingulars(word)) forms.add(s);
}
}
// Try splitting common German compound suffixes
const compoundSuffixes = [
'schale', 'saft', 'stange', 'stück', 'pulver', 'blatt', 'blätter',
'korn', 'körner', 'mehl', 'öl', 'ol', 'flocken', 'creme', 'mark',
'wasser', 'milch', 'sahne', 'butter', 'käse', 'kase', 'soße', 'sosse',
];
const base = deUmlaut(query.toLowerCase());
for (const suffix of compoundSuffixes) {
if (base.endsWith(suffix) && base.length > suffix.length + 2) {
const stem = base.slice(0, -suffix.length);
forms.add(stem);
for (const s of germanSingulars(stem)) forms.add(s);
}
}
return [...forms].filter(f => f.length >= 3);
}
/**
* Find substring matches in a name list. Returns indices of entries
* where any form of the query appears in the entry name.
*/
function findSubstringMatches(
query: string,
entries: { name: string }[],
): number[] {
const forms = searchForms(query);
const matches: number[] = [];
for (let i = 0; i < entries.length; i++) {
const entryName = deUmlaut(entries[i].name.toLowerCase());
for (const form of forms) {
if (entryName.includes(form)) {
matches.push(i);
break;
}
}
}
return matches;
}
/**
* Score a substring match, combining embedding similarity with heuristics:
* - Word-boundary matches preferred over mid-word matches
* - Shorter names are preferred (more likely base ingredients)
* - Names containing "roh" (raw) get a bonus
* - Names starting with the query get a bonus
*/
function substringMatchScore(
embeddingScore: number,
entryName: string,
queryForms: string[],
): number {
let score = embeddingScore;
const nameLower = deUmlaut(entryName.toLowerCase());
// Check how the query matches: word-start vs mid-compound vs trailing mention
let hasStartMatch = false;
let hasEarlyMatch = false; // within first 15 chars
let hasWordBoundaryMatch = false;
for (const form of queryForms) {
// Start match: name begins with the query
if (nameLower.startsWith(form + ' ') || nameLower.startsWith(form + ',') || nameLower === form) {
hasStartMatch = true;
}
// Early match: appears within first ~15 chars (likely the main ingredient)
const pos = nameLower.indexOf(form);
if (pos >= 0 && pos < 15) hasEarlyMatch = true;
// Word-boundary match
const wordBoundary = new RegExp(`(^|[\\s,/])${form}([\\s,/]|$)`);
if (wordBoundary.test(nameLower)) hasWordBoundaryMatch = true;
}
// Strong bonus for name starting with query form
if (hasStartMatch) score += 0.2;
// Moderate bonus for early appearance in name
else if (hasEarlyMatch) score += 0.12;
// Small bonus for word-boundary match
else if (hasWordBoundaryMatch) score += 0.05;
// Penalty for late/trailing mentions (e.g., "mit Zimt" at end of a dish name)
else score -= 0.05;
// Bonus for short names (base ingredients like "Apfel roh" vs long dish names)
// Short names get strong boost, long names get penalized
score += Math.max(-0.1, (20 - nameLower.length) * 0.008);
// Bonus for "roh" (raw) — but only if query starts the name (avoid boosting unrelated raw items)
if (/\broh\b/.test(nameLower) && (hasStartMatch || hasWordBoundaryMatch)) score += 0.1;
return score;
}
/**
* Find best BLS match: substring-first hybrid.
* 1. Find BLS entries whose name contains the ingredient (lexical match)
* 2. Among those, rank by embedding + heuristic score
* 3. If no lexical matches, fall back to full embedding search
*/
async function blsEmbeddingMatch(
ingredientNameDe: string
): Promise<{ entry: BlsEntry; confidence: number } | null> {
const index = getBlsEmbeddingIndex();
if (index.length === 0) return null;
const emb = await getBlsEmbedder();
const result = await emb(`query: ${ingredientNameDe}`, { pooling: 'mean', normalize: true });
const queryVector = Array.from(result.data as Float32Array);
const queryForms = searchForms(ingredientNameDe);
// Find lexical substring matches first
const substringIndices = findSubstringMatches(ingredientNameDe, index);
if (substringIndices.length > 0) {
let bestScore = -1;
let bestItem: typeof index[0] | null = null;
for (const idx of substringIndices) {
const item = index[idx];
const entry = getBlsByCode().get(item.blsCode);
if (entry && EXCLUDED_BLS_CATEGORIES.has(entry.category)) continue;
const embScore = cosineSimilarity(queryVector, item.vector);
const score = substringMatchScore(embScore, item.name, queryForms);
if (score > bestScore) {
bestScore = score;
bestItem = item;
}
}
if (bestItem) {
const entry = getBlsByCode().get(bestItem.blsCode);
if (entry) {
// Check if ANY substring match is a direct hit (query at start/early in name)
const nameNorm = deUmlaut(bestItem.name.toLowerCase());
const isDirectMatch = queryForms.some(f =>
nameNorm.startsWith(f + ' ') || nameNorm.startsWith(f + ',') ||
nameNorm.startsWith(f + '/') || nameNorm === f ||
(nameNorm.indexOf(f) >= 0 && nameNorm.indexOf(f) < 12)
);
// Only use substring match if it's a direct hit — otherwise the query
// word appears as a minor component in a dish name and we should
// fall through to full search / USDA
if (isDirectMatch) {
const conf = Math.min(Math.max(bestScore, 0.7), 1.0);
return { entry, confidence: conf };
}
}
}
}
// Fall back to full embedding search (excluding prepared dishes)
// Use higher threshold for pure embedding — short German words produce unreliable scores
const EMBEDDING_ONLY_THRESHOLD = 0.85;
let bestScore = -1;
let bestItem: typeof index[0] | null = null;
for (const item of index) {
const entry = getBlsByCode().get(item.blsCode);
if (entry && EXCLUDED_BLS_CATEGORIES.has(entry.category)) continue;
const score = cosineSimilarity(queryVector, item.vector);
if (score > bestScore) {
bestScore = score;
bestItem = item;
}
}
if (!bestItem || bestScore < EMBEDDING_ONLY_THRESHOLD) return null;
const entry = getBlsByCode().get(bestItem.blsCode);
if (!entry) return null;
return { entry, confidence: bestScore };
}
/** USDA categories that are prepared dishes — exclude from matching */
const EXCLUDED_USDA_CATEGORIES = new Set(['Restaurant Foods']);
/**
* Score a USDA substring match with heuristics similar to BLS.
*/
function usdaSubstringMatchScore(
embeddingScore: number,
entryName: string,
query: string,
): number {
let score = embeddingScore;
const nameLower = entryName.toLowerCase();
const queryForms = searchForms(query);
// Check match position
let hasStartMatch = false;
let hasEarlyMatch = false;
for (const form of queryForms) {
if (nameLower.startsWith(form + ',') || nameLower.startsWith(form + ' ') || nameLower === form) {
hasStartMatch = true;
}
const pos = nameLower.indexOf(form);
if (pos >= 0 && pos < 15) hasEarlyMatch = true;
}
if (hasStartMatch) score += 0.2;
else if (hasEarlyMatch) score += 0.1;
else score -= 0.05;
// Bonus for short names — but moderate to avoid "Bread, X" beating "Spices, X, ground"
score += Math.max(-0.1, (25 - nameLower.length) * 0.003);
// Bonus for "raw" — base ingredient indicator (only if direct match)
if (/\braw\b/.test(nameLower) && (hasStartMatch || hasEarlyMatch)) score += 0.1;
// Bonus for category-style entries ("Spices, X" / "Seeds, X" / "Oil, X")
if (/^(spices|seeds|oil|nuts|fish|cheese|milk|cream|butter|flour|sugar),/i.test(nameLower)) {
score += 0.08;
}
return score;
}
/**
* Find best USDA match: substring-first hybrid.
* Same strategy as BLS: lexical matches first, heuristic re-ranking, then fallback.
*/
async function usdaEmbeddingMatch(
ingredientNameEn: string
): Promise<{ entry: NutritionEntry; confidence: number } | null> {
const emb = await getUsdaEmbedder();
const index = getUsdaEmbeddingIndex();
const result = await emb(ingredientNameEn, { pooling: 'mean', normalize: true });
const queryVector = Array.from(result.data as Float32Array);
// Find lexical substring matches
const substringIndices = findSubstringMatches(ingredientNameEn, index);
if (substringIndices.length > 0) {
let bestScore = -1;
let bestItem: typeof index[0] | null = null;
for (const idx of substringIndices) {
const item = index[idx];
const entry = getNutritionByFdcId().get(item.fdcId);
if (entry && EXCLUDED_USDA_CATEGORIES.has(entry.category)) continue;
const embScore = cosineSimilarity(queryVector, item.vector);
const score = usdaSubstringMatchScore(embScore, item.name, ingredientNameEn);
if (score > bestScore) {
bestScore = score;
bestItem = item;
}
}
if (bestItem) {
const nutrition = getNutritionByFdcId().get(bestItem.fdcId);
if (nutrition) {
const nameNorm = bestItem.name.toLowerCase();
const forms = searchForms(ingredientNameEn);
const isDirectMatch = forms.some(f =>
nameNorm.startsWith(f + ',') || nameNorm.startsWith(f + ' ') ||
nameNorm === f || (nameNorm.indexOf(f) >= 0 && nameNorm.indexOf(f) < 15)
);
if (isDirectMatch) {
return { entry: nutrition, confidence: Math.min(Math.max(bestScore, 0.7), 1.0) };
}
}
}
}
// Full embedding search fallback (excluding restaurant foods)
let bestScore = -1;
let bestEntry: typeof index[0] | null = null;
for (const item of index) {
const entry = getNutritionByFdcId().get(item.fdcId);
if (entry && EXCLUDED_USDA_CATEGORIES.has(entry.category)) continue;
const score = cosineSimilarity(queryVector, item.vector);
if (score > bestScore) {
bestScore = score;
bestEntry = item;
}
}
if (!bestEntry || bestScore < CONFIDENCE_THRESHOLD) return null;
const nutrition = getNutritionByFdcId().get(bestEntry.fdcId);
if (!nutrition) return null;
return { entry: nutrition, confidence: bestScore };
}
/** Parse a recipe amount string to a number */
export function parseAmount(amount: string): number {
if (!amount || !amount.trim()) return 0;
let s = amount.trim();
const rangeMatch = s.match(/^(\d+(?:[.,]\d+)?)\s*[-]\s*(\d+(?:[.,]\d+)?)$/);
if (rangeMatch) {
return (parseFloat(rangeMatch[1].replace(',', '.')) + parseFloat(rangeMatch[2].replace(',', '.'))) / 2;
}
s = s.replace(',', '.');
const fractionMatch = s.match(/^(\d+)\s*\/\s*(\d+)$/);
if (fractionMatch) return parseInt(fractionMatch[1]) / parseInt(fractionMatch[2]);
const mixedMatch = s.match(/^(\d+)\s+(\d+)\s*\/\s*(\d+)$/);
if (mixedMatch) return parseInt(mixedMatch[1]) + parseInt(mixedMatch[2]) / parseInt(mixedMatch[3]);
const parsed = parseFloat(s);
return isNaN(parsed) ? 0 : parsed;
}
// ── Global overwrite cache ──
let overwriteCache: Map<string, any> | null = null;
let overwriteCacheTime = 0;
const OVERWRITE_CACHE_TTL = 60_000; // 1 minute
async function lookupGlobalOverwrite(normalizedNameDe: string): Promise<any | null> {
const now = Date.now();
if (!overwriteCache || now - overwriteCacheTime > OVERWRITE_CACHE_TTL) {
try {
const all = await NutritionOverwrite.find({}).lean();
overwriteCache = new Map(all.map((o: any) => [o.ingredientNameDe, o]));
overwriteCacheTime = now;
} catch {
overwriteCache = new Map();
overwriteCacheTime = now;
}
}
return overwriteCache.get(normalizedNameDe) || null;
}
/** Invalidate the overwrite cache (call after creating/updating/deleting overwrites) */
export function invalidateOverwriteCache() {
overwriteCache = null;
}
/**
* Match a single ingredient against BLS (German, primary) then USDA (English, fallback).
*/
export async function matchIngredient(
ingredientNameDe: string,
ingredientNameEn: string | undefined,
unit: string,
amount: string,
sectionIndex: number,
ingredientIndex: number,
): Promise<NutritionMapping> {
const normalizedEn = ingredientNameEn ? normalizeIngredientName(ingredientNameEn) : '';
const normalizedDe = normalizeIngredientNameDe(ingredientNameDe);
let source: 'bls' | 'usda' = 'usda';
let fdcId: number | undefined;
let blsCode: string | undefined;
let nutritionDbName: string | undefined;
let matchMethod: NutritionMapping['matchMethod'] = 'none';
let confidence = 0;
let portions: { description: string; grams: number }[] = [];
let category = '';
// 0. Check global overwrites (DB-stored manual mappings)
const overwrite = await lookupGlobalOverwrite(normalizedDe);
if (overwrite) {
if (overwrite.excluded || overwrite.source === 'skip') {
return {
sectionIndex, ingredientIndex,
ingredientName: ingredientNameEn || ingredientNameDe,
ingredientNameDe,
source: 'usda', matchMethod: 'manual', matchConfidence: 1,
gramsPerUnit: 0, defaultAmountUsed: false,
unitConversionSource: 'none', manuallyEdited: false, excluded: true,
};
}
if (overwrite.source === 'bls' && overwrite.blsCode) {
const entry = getBlsByCode().get(overwrite.blsCode);
if (entry) {
source = 'bls'; blsCode = overwrite.blsCode;
nutritionDbName = entry.nameDe; matchMethod = 'exact';
confidence = 1.0; category = entry.category;
}
} else if (overwrite.source === 'usda' && overwrite.fdcId) {
const entry = getNutritionByFdcId().get(overwrite.fdcId);
if (entry) {
source = 'usda'; fdcId = overwrite.fdcId;
nutritionDbName = entry.name; matchMethod = 'exact';
confidence = 1.0; portions = entry.portions; category = entry.category;
}
}
}
// 1. Try alias table (English, fast path → USDA)
if (matchMethod === 'none' && normalizedEn) {
const aliasResult = lookupAlias(normalizedEn);
if (aliasResult) {
const entry = getNutritionByName().get(aliasResult);
if (entry) {
source = 'usda';
fdcId = entry.fdcId;
nutritionDbName = entry.name;
matchMethod = 'exact';
confidence = 1.0;
portions = entry.portions;
category = entry.category;
}
}
}
// 2. Try BLS embedding match (German name, primary)
if (matchMethod === 'none' && normalizedDe) {
const blsResult = await blsEmbeddingMatch(normalizedDe);
if (blsResult) {
source = 'bls';
blsCode = blsResult.entry.blsCode;
nutritionDbName = blsResult.entry.nameDe;
matchMethod = 'embedding';
confidence = blsResult.confidence;
category = blsResult.entry.category;
// BLS has no portion data — will use unit conversion tables
}
}
// 3. Try USDA embedding match (English name, fallback)
if (matchMethod === 'none' && normalizedEn) {
const usdaResult = await usdaEmbeddingMatch(normalizedEn);
if (usdaResult) {
source = 'usda';
fdcId = usdaResult.entry.fdcId;
nutritionDbName = usdaResult.entry.name;
matchMethod = 'embedding';
confidence = usdaResult.confidence;
portions = usdaResult.entry.portions;
category = usdaResult.entry.category;
}
}
// Resolve unit conversion
const canonicalUnit = canonicalizeUnit(unit);
let parsedAmount = parseAmount(amount);
let defaultAmountUsed = false;
// If no amount given, try default amounts
if (!parsedAmount && matchMethod !== 'none') {
const nameForDefault = normalizedEn || normalizedDe;
const defaultAmt = resolveDefaultAmount(nameForDefault, category);
if (defaultAmt) {
parsedAmount = defaultAmt.amount;
const defaultCanonical = canonicalizeUnit(defaultAmt.unit);
const unitResolution = resolveGramsPerUnit(defaultCanonical, portions);
defaultAmountUsed = true;
return {
sectionIndex, ingredientIndex,
ingredientName: ingredientNameEn || ingredientNameDe,
ingredientNameDe,
source, fdcId, blsCode, nutritionDbName,
matchConfidence: confidence, matchMethod,
gramsPerUnit: unitResolution.grams,
defaultAmountUsed,
unitConversionSource: unitResolution.source,
manuallyEdited: false,
excluded: defaultAmt.amount === 0,
};
}
}
const unitResolution = resolveGramsPerUnit(canonicalUnit, portions);
return {
sectionIndex, ingredientIndex,
ingredientName: ingredientNameEn || ingredientNameDe,
ingredientNameDe,
source, fdcId, blsCode, nutritionDbName,
matchConfidence: confidence, matchMethod,
gramsPerUnit: unitResolution.grams,
defaultAmountUsed,
unitConversionSource: unitResolution.source,
manuallyEdited: false,
excluded: false,
};
}
/**
* Generate nutrition mappings for all ingredients in a recipe.
* Uses German names for BLS matching and English names for USDA fallback.
*/
export async function generateNutritionMappings(
ingredients: any[],
translatedIngredients?: any[],
): Promise<NutritionMapping[]> {
const mappings: NutritionMapping[] = [];
for (let sectionIdx = 0; sectionIdx < ingredients.length; sectionIdx++) {
const sectionDe = ingredients[sectionIdx];
const sectionEn = translatedIngredients?.[sectionIdx];
if (sectionDe.type === 'reference' || !sectionDe.list) continue;
for (let itemIdx = 0; itemIdx < sectionDe.list.length; itemIdx++) {
const itemDe = sectionDe.list[itemIdx];
const itemEn = sectionEn?.list?.[itemIdx];
const mapping = await matchIngredient(
itemDe.name,
itemEn?.name || undefined,
itemDe.unit || '',
itemDe.amount || '',
sectionIdx,
itemIdx,
);
mappings.push(mapping);
}
}
return mappings;
}
/** Look up a USDA NutritionEntry by fdcId */
export function getNutritionEntryByFdcId(fdcId: number): NutritionEntry | undefined {
return getNutritionByFdcId().get(fdcId);
}
/** Look up a BLS entry by blsCode */
export function getBlsEntryByCode(code: string): BlsEntry | undefined {
return getBlsByCode().get(code);
}
/** Resolve per100g data for a mapping from BLS or USDA */
export function resolvePer100g(mapping: any): NutritionPer100g | null {
if (mapping.blsCode && mapping.source === 'bls') {
const entry = getBlsByCode().get(mapping.blsCode);
return entry?.per100g ?? null;
}
if (mapping.fdcId) {
const entry = getNutritionByFdcId().get(mapping.fdcId);
return entry?.per100g ?? null;
}
return null;
}
/**
* Compute absolute nutrition totals for a recipe's ingredients using its nutritionMappings.
* Returns total nutrients (not per-100g), optionally scaled by a multiplier.
*/
export function computeRecipeNutritionTotals(
ingredients: any[],
nutritionMappings: any[],
multiplier = 1,
): Record<string, number> {
const index = new Map(
(nutritionMappings || []).map((m: any) => [`${m.sectionIndex}-${m.ingredientIndex}`, m])
);
const totals: Record<string, number> = {};
// Collect section names for dedup (skip ingredients referencing earlier sections)
const sectionNames = new Set<string>();
for (let si = 0; si < ingredients.length; si++) {
const section = ingredients[si];
if (section.type === 'reference' || !section.list) {
if (section.name) sectionNames.add(stripHtml(section.name).toLowerCase().trim());
continue;
}
if (section.name) sectionNames.add(stripHtml(section.name).toLowerCase().trim());
}
for (let si = 0; si < ingredients.length; si++) {
const section = ingredients[si];
if (section.type === 'reference' || !section.list) continue;
const currentSectionName = section.name ? stripHtml(section.name).toLowerCase().trim() : '';
for (let ii = 0; ii < section.list.length; ii++) {
const item = section.list[ii];
const rawName = item.name || '';
const itemName = stripHtml(rawName).toLowerCase().trim();
// Skip anchor-tag references to other recipes (handled separately)
if (/<a\s/i.test(rawName)) continue;
// Skip if this ingredient name matches a DIFFERENT section's name
if (itemName && sectionNames.has(itemName) && itemName !== currentSectionName) continue;
const mapping = index.get(`${si}-${ii}`);
if (!mapping || mapping.matchMethod === 'none' || mapping.excluded) continue;
const per100g = resolvePer100g(mapping);
if (!per100g) continue;
const amount = parseAmount(item.amount || '') || (mapping.defaultAmountUsed ? 1 : 0);
const grams = amount * multiplier * (mapping.gramsPerUnit || 0);
const factor = grams / 100;
for (const [key, value] of Object.entries(per100g)) {
if (typeof value === 'number') {
totals[key] = (totals[key] || 0) + factor * value;
}
}
}
}
return totals;
}
/** Strip HTML tags from a string */
function stripHtml(html: string): string {
return html.replace(/<[^>]*>/g, '');
}