recipes: nutrition calculator with BLS/USDA matching, manual overwrites, and skip

Dual-source nutrition system using BLS (German, primary) and USDA (English, fallback)
with ML embedding matching (multilingual-e5-small / all-MiniLM-L6-v2), hybrid
substring-first search, and position-aware scoring heuristics.

Includes per-recipe and global manual ingredient overwrites, ingredient skip/exclude,
referenced recipe nutrition (base refs + anchor tags), section-name dedup,
amino acid tracking, and reactive client-side calculator with NutritionSummary component.
This commit is contained in:
2026-04-01 13:00:52 +02:00
parent 3cafe8955a
commit 7e1181461e
30 changed files with 722384 additions and 12 deletions

61
scripts/embed-bls-db.ts Normal file
View File

@@ -0,0 +1,61 @@
/**
* Pre-compute sentence embeddings for BLS German food names.
* Uses multilingual-e5-small for good German language understanding.
*
* Run: pnpm exec vite-node scripts/embed-bls-db.ts
*/
import { pipeline } from '@huggingface/transformers';
import { writeFileSync } from 'fs';
import { resolve } from 'path';
// Dynamic import of blsDb (generated file)
const { BLS_DB } = await import('../src/lib/data/blsDb');
const MODEL_NAME = 'Xenova/multilingual-e5-small';
const OUTPUT_FILE = resolve('src/lib/data/blsEmbeddings.json');
async function main() {
console.log(`Loading model ${MODEL_NAME}...`);
const embedder = await pipeline('feature-extraction', MODEL_NAME, {
dtype: 'q8',
});
console.log(`Embedding ${BLS_DB.length} BLS entries...`);
const entries: { blsCode: string; name: string; vector: number[] }[] = [];
const batchSize = 32;
for (let i = 0; i < BLS_DB.length; i += batchSize) {
const batch = BLS_DB.slice(i, i + batchSize);
// e5 models require "passage: " prefix for documents
const texts = batch.map(e => `passage: ${e.nameDe}`);
for (let j = 0; j < batch.length; j++) {
const result = await embedder(texts[j], { pooling: 'mean', normalize: true });
const vector = Array.from(result.data as Float32Array).map(v => Math.round(v * 10000) / 10000);
entries.push({
blsCode: batch[j].blsCode,
name: batch[j].nameDe,
vector,
});
}
if ((i + batchSize) % 500 < batchSize) {
console.log(` ${Math.min(i + batchSize, BLS_DB.length)}/${BLS_DB.length}`);
}
}
const output = {
model: MODEL_NAME,
dimensions: entries[0]?.vector.length || 384,
count: entries.length,
entries,
};
const json = JSON.stringify(output);
writeFileSync(OUTPUT_FILE, json, 'utf-8');
console.log(`Written ${OUTPUT_FILE} (${(json.length / 1024 / 1024).toFixed(1)}MB, ${entries.length} entries)`);
}
main().catch(console.error);

View File

@@ -0,0 +1,60 @@
/**
* Pre-computes sentence embeddings for all USDA nutrition DB entries using
* all-MiniLM-L6-v2 via @huggingface/transformers.
*
* Run with: pnpm exec vite-node scripts/embed-nutrition-db.ts
*
* Outputs: src/lib/data/nutritionEmbeddings.json
* Format: { entries: [{ fdcId, name, vector: number[384] }] }
*/
import { writeFileSync } from 'fs';
import { resolve } from 'path';
import { pipeline } from '@huggingface/transformers';
import { NUTRITION_DB } from '../src/lib/data/nutritionDb';
const OUTPUT_PATH = resolve('src/lib/data/nutritionEmbeddings.json');
const MODEL_NAME = 'Xenova/all-MiniLM-L6-v2';
const BATCH_SIZE = 64;
async function main() {
console.log('=== Nutrition DB Embedding Generation ===\n');
console.log(`Entries to embed: ${NUTRITION_DB.length}`);
console.log(`Model: ${MODEL_NAME}`);
console.log(`Loading model (first run downloads ~23MB)...\n`);
const embedder = await pipeline('feature-extraction', MODEL_NAME, {
dtype: 'q8',
});
const entries: { fdcId: number; name: string; vector: number[] }[] = [];
const totalBatches = Math.ceil(NUTRITION_DB.length / BATCH_SIZE);
for (let i = 0; i < NUTRITION_DB.length; i += BATCH_SIZE) {
const batch = NUTRITION_DB.slice(i, i + BATCH_SIZE);
const batchNum = Math.floor(i / BATCH_SIZE) + 1;
process.stdout.write(`\r Batch ${batchNum}/${totalBatches} (${i + batch.length}/${NUTRITION_DB.length})`);
// Embed all names in this batch
for (const item of batch) {
const result = await embedder(item.name, { pooling: 'mean', normalize: true });
// result.data is a Float32Array — truncate to 4 decimal places to save space
const vector = Array.from(result.data as Float32Array).map(v => Math.round(v * 10000) / 10000);
entries.push({ fdcId: item.fdcId, name: item.name, vector });
}
}
console.log('\n\nWriting embeddings...');
const output = { model: MODEL_NAME, dimensions: 384, count: entries.length, entries };
writeFileSync(OUTPUT_PATH, JSON.stringify(output), 'utf-8');
const fileSizeMB = (Buffer.byteLength(JSON.stringify(output)) / 1024 / 1024).toFixed(1);
console.log(`Written ${entries.length} embeddings to ${OUTPUT_PATH} (${fileSizeMB}MB)`);
await embedder.dispose();
}
main().catch(err => {
console.error('Embedding generation failed:', err);
process.exit(1);
});

View File

@@ -0,0 +1,180 @@
/**
* Import BLS 4.0 (Bundeslebensmittelschlüssel) nutrition data from CSV.
* Pre-convert the xlsx to CSV first (one-time):
* node -e "const X=require('xlsx');const w=X.readFile('BLS_4_0_2025_DE/BLS_4_0_Daten_2025_DE.xlsx');
* require('fs').writeFileSync('BLS_4_0_2025_DE/BLS_4_0_Daten_2025_DE.csv',X.utils.sheet_to_csv(w.Sheets[w.SheetNames[0]]))"
*
* Run: pnpm exec vite-node scripts/import-bls-nutrition.ts
*/
import { readFileSync, writeFileSync } from 'fs';
import { resolve } from 'path';
/** Parse CSV handling quoted fields with commas */
function parseCSV(text: string): string[][] {
const rows: string[][] = [];
let i = 0;
while (i < text.length) {
const row: string[] = [];
while (i < text.length && text[i] !== '\n') {
if (text[i] === '"') {
i++; // skip opening quote
let field = '';
while (i < text.length) {
if (text[i] === '"') {
if (text[i + 1] === '"') { field += '"'; i += 2; }
else { i++; break; }
} else { field += text[i]; i++; }
}
row.push(field);
if (text[i] === ',') i++;
} else {
const next = text.indexOf(',', i);
const nl = text.indexOf('\n', i);
const end = (next === -1 || (nl !== -1 && nl < next)) ? (nl === -1 ? text.length : nl) : next;
row.push(text.substring(i, end));
i = end;
if (text[i] === ',') i++;
}
}
if (text[i] === '\n') i++;
if (row.length > 0) rows.push(row);
}
return rows;
}
const BLS_CSV = resolve('BLS_4_0_2025_DE/BLS_4_0_Daten_2025_DE.csv');
const OUTPUT_FILE = resolve('src/lib/data/blsDb.ts');
// BLS nutrient code → our per100g field name
const NUTRIENT_MAP: Record<string, { field: string; divisor?: number }> = {
ENERCC: { field: 'calories' },
PROT625: { field: 'protein' },
FAT: { field: 'fat' },
FASAT: { field: 'saturatedFat' },
CHO: { field: 'carbs' },
FIBT: { field: 'fiber' },
SUGAR: { field: 'sugars' },
CA: { field: 'calcium' },
FE: { field: 'iron' },
MG: { field: 'magnesium' },
P: { field: 'phosphorus' },
K: { field: 'potassium' },
NA: { field: 'sodium' },
ZN: { field: 'zinc' },
VITA: { field: 'vitaminA' },
VITC: { field: 'vitaminC' },
VITD: { field: 'vitaminD' },
VITE: { field: 'vitaminE' },
VITK: { field: 'vitaminK' },
THIA: { field: 'thiamin' },
RIBF: { field: 'riboflavin' },
NIA: { field: 'niacin' },
VITB6: { field: 'vitaminB6', divisor: 1000 }, // BLS: µg → mg
VITB12: { field: 'vitaminB12' },
FOL: { field: 'folate' },
CHORL: { field: 'cholesterol' },
// Amino acids (all g/100g)
ILE: { field: 'isoleucine' },
LEU: { field: 'leucine' },
LYS: { field: 'lysine' },
MET: { field: 'methionine' },
PHE: { field: 'phenylalanine' },
THR: { field: 'threonine' },
TRP: { field: 'tryptophan' },
VAL: { field: 'valine' },
HIS: { field: 'histidine' },
ALA: { field: 'alanine' },
ARG: { field: 'arginine' },
ASP: { field: 'asparticAcid' },
CYSTE: { field: 'cysteine' },
GLU: { field: 'glutamicAcid' },
GLY: { field: 'glycine' },
PRO: { field: 'proline' },
SER: { field: 'serine' },
TYR: { field: 'tyrosine' },
};
// BLS code first letter → category (BLS 4.0 Hauptgruppen)
const CATEGORY_MAP: Record<string, string> = {
A: 'Getränke', B: 'Getreideprodukte', C: 'Getreide', D: 'Backwaren',
E: 'Gemüse', F: 'Obst', G: 'Hülsenfrüchte',
H: 'Gewürze und Kräuter', J: 'Fette und Öle', K: 'Milch und Milchprodukte',
L: 'Eier', M: 'Fleisch', N: 'Wurstwaren', O: 'Wild', P: 'Geflügel',
Q: 'Fisch und Meeresfrüchte', R: 'Süßwaren', S: 'Zucker und Honig',
T: 'Gerichte und Rezepte', U: 'Pilze', V: 'Sonstiges', W: 'Algen',
X: 'Fleischersatz', Y: 'Supplemente',
};
async function main() {
console.log('Reading BLS CSV...');
const csvText = readFileSync(BLS_CSV, 'utf-8');
const rows: string[][] = parseCSV(csvText);
const headers = rows[0];
console.log(`Headers: ${headers.length} columns, ${rows.length - 1} data rows`);
// Build column index: BLS nutrient code → column index of the value column
const codeToCol = new Map<string, number>();
for (let c = 3; c < headers.length; c += 3) {
const code = headers[c]?.split(' ')[0];
if (code) codeToCol.set(code, c);
}
const entries: any[] = [];
for (let r = 1; r < rows.length; r++) {
const row = rows[r];
const blsCode = row[0]?.trim();
const nameDe = row[1]?.trim();
const nameEn = row[2]?.trim() || '';
if (!blsCode || !nameDe) continue;
const category = CATEGORY_MAP[blsCode[0]] || 'Sonstiges';
const per100g: Record<string, number> = {};
for (const [blsNutrientCode, mapping] of Object.entries(NUTRIENT_MAP)) {
const col = codeToCol.get(blsNutrientCode);
if (col === undefined) {
per100g[mapping.field] = 0;
continue;
}
let value = parseFloat(row[col] || '0');
if (isNaN(value)) value = 0;
if (mapping.divisor) value /= mapping.divisor;
per100g[mapping.field] = Math.round(value * 1000) / 1000;
}
entries.push({ blsCode, nameDe, nameEn, category, per100g });
}
console.log(`Parsed ${entries.length} BLS entries`);
// Sample entries
const sample = entries.slice(0, 3);
for (const e of sample) {
console.log(` ${e.blsCode} | ${e.nameDe} | ${e.per100g.calories} kcal | protein ${e.per100g.protein}g`);
}
const output = `// Auto-generated from BLS 4.0 (Bundeslebensmittelschlüssel)
// Generated: ${new Date().toISOString().split('T')[0]}
// Do not edit manually — regenerate with: pnpm exec vite-node scripts/import-bls-nutrition.ts
import type { NutritionPer100g } from '$types/types';
export type BlsEntry = {
blsCode: string;
nameDe: string;
nameEn: string;
category: string;
per100g: NutritionPer100g;
};
export const BLS_DB: BlsEntry[] = ${JSON.stringify(entries, null, 0)};
`;
writeFileSync(OUTPUT_FILE, output, 'utf-8');
console.log(`Written ${OUTPUT_FILE} (${(output.length / 1024 / 1024).toFixed(1)}MB, ${entries.length} entries)`);
}
main().catch(console.error);

View File

@@ -0,0 +1,371 @@
/**
* Imports USDA FoodData Central data (SR Legacy + Foundation Foods) and generates
* a typed nutrition database for the recipe calorie calculator.
*
* Run with: pnpm exec vite-node scripts/import-usda-nutrition.ts
*
* Downloads bulk CSV data from USDA FDC, filters to relevant food categories,
* extracts macro/micronutrient data per 100g, and outputs src/lib/data/nutritionDb.ts
*/
import { existsSync, mkdirSync, readFileSync, writeFileSync } from 'fs';
import { resolve } from 'path';
const DATA_DIR = resolve('data/usda');
const OUTPUT_PATH = resolve('src/lib/data/nutritionDb.ts');
// USDA FDC bulk download URLs
const USDA_URLS = {
srLegacy: 'https://fdc.nal.usda.gov/fdc-datasets/FoodData_Central_sr_legacy_food_csv_2018-04.zip',
foundation: 'https://fdc.nal.usda.gov/fdc-datasets/FoodData_Central_foundation_food_csv_2024-10-31.zip',
};
// Nutrient IDs we care about
const NUTRIENT_IDS: Record<number, string> = {
1008: 'calories',
1003: 'protein',
1004: 'fat',
1258: 'saturatedFat',
1005: 'carbs',
1079: 'fiber',
1063: 'sugars',
// Minerals
1087: 'calcium',
1089: 'iron',
1090: 'magnesium',
1091: 'phosphorus',
1092: 'potassium',
1093: 'sodium',
1095: 'zinc',
// Vitamins
1106: 'vitaminA', // RAE (mcg)
1162: 'vitaminC',
1114: 'vitaminD', // D2+D3 (mcg)
1109: 'vitaminE',
1185: 'vitaminK',
1165: 'thiamin',
1166: 'riboflavin',
1167: 'niacin',
1175: 'vitaminB6',
1178: 'vitaminB12',
1177: 'folate',
// Other
1253: 'cholesterol',
// Amino acids (g/100g)
1212: 'isoleucine',
1213: 'leucine',
1214: 'lysine',
1215: 'methionine',
1217: 'phenylalanine',
1211: 'threonine',
1210: 'tryptophan',
1219: 'valine',
1221: 'histidine',
1222: 'alanine',
1220: 'arginine',
1223: 'asparticAcid',
1216: 'cysteine',
1224: 'glutamicAcid',
1225: 'glycine',
1226: 'proline',
1227: 'serine',
1218: 'tyrosine',
};
// Food categories to include (SR Legacy food_category_id descriptions)
const INCLUDED_CATEGORIES = new Set([
'Dairy and Egg Products',
'Spices and Herbs',
'Baby Foods',
'Fats and Oils',
'Poultry Products',
'Soups, Sauces, and Gravies',
'Sausages and Luncheon Meats',
'Breakfast Cereals',
'Fruits and Fruit Juices',
'Pork Products',
'Vegetables and Vegetable Products',
'Nut and Seed Products',
'Beef Products',
'Beverages',
'Finfish and Shellfish Products',
'Legumes and Legume Products',
'Lamb, Veal, and Game Products',
'Baked Products',
'Sweets',
'Cereal Grains and Pasta',
'Snacks',
'Restaurant Foods',
]);
type NutrientData = Record<string, number>;
interface RawFood {
fdcId: number;
description: string;
categoryId: number;
category: string;
}
interface Portion {
description: string;
grams: number;
}
// Simple CSV line parser that handles quoted fields
function parseCSVLine(line: string): string[] {
const fields: string[] = [];
let current = '';
let inQuotes = false;
for (let i = 0; i < line.length; i++) {
const ch = line[i];
if (ch === '"') {
if (inQuotes && i + 1 < line.length && line[i + 1] === '"') {
current += '"';
i++;
} else {
inQuotes = !inQuotes;
}
} else if (ch === ',' && !inQuotes) {
fields.push(current);
current = '';
} else {
current += ch;
}
}
fields.push(current);
return fields;
}
async function readCSV(filePath: string): Promise<Record<string, string>[]> {
if (!existsSync(filePath)) {
console.warn(` File not found: ${filePath}`);
return [];
}
const content = readFileSync(filePath, 'utf-8');
const lines = content.split('\n').filter(l => l.trim());
if (lines.length === 0) return [];
const headers = parseCSVLine(lines[0]);
const rows: Record<string, string>[] = [];
for (let i = 1; i < lines.length; i++) {
const fields = parseCSVLine(lines[i]);
const row: Record<string, string> = {};
for (let j = 0; j < headers.length; j++) {
row[headers[j]] = fields[j] || '';
}
rows.push(row);
}
return rows;
}
async function downloadAndExtract(url: string, targetDir: string): Promise<void> {
const zipName = url.split('/').pop()!;
const zipPath = resolve(DATA_DIR, zipName);
if (existsSync(targetDir) && readFileSync(resolve(targetDir, '.done'), 'utf-8').trim() === 'ok') {
console.log(` Already extracted: ${targetDir}`);
return;
}
mkdirSync(targetDir, { recursive: true });
if (!existsSync(zipPath)) {
console.log(` Downloading ${zipName}...`);
const response = await fetch(url);
if (!response.ok) throw new Error(`Download failed: ${response.status} ${response.statusText}`);
const buffer = Buffer.from(await response.arrayBuffer());
writeFileSync(zipPath, buffer);
console.log(` Downloaded ${(buffer.length / 1024 / 1024).toFixed(1)}MB`);
}
console.log(` Extracting to ${targetDir}...`);
const { execSync } = await import('child_process');
execSync(`unzip -o -j "${zipPath}" -d "${targetDir}"`, { stdio: 'pipe' });
writeFileSync(resolve(targetDir, '.done'), 'ok');
}
async function importDataset(datasetDir: string, label: string) {
console.log(`\nProcessing ${label}...`);
// Read category mapping
const categoryRows = await readCSV(resolve(datasetDir, 'food_category.csv'));
const categoryMap = new Map<string, string>();
for (const row of categoryRows) {
categoryMap.set(row['id'], row['description']);
}
// Read foods
const foodRows = await readCSV(resolve(datasetDir, 'food.csv'));
const foods = new Map<number, RawFood>();
for (const row of foodRows) {
const catId = parseInt(row['food_category_id'] || '0');
const category = categoryMap.get(row['food_category_id']) || '';
if (!INCLUDED_CATEGORIES.has(category)) continue;
const fdcId = parseInt(row['fdc_id']);
foods.set(fdcId, {
fdcId,
description: row['description'],
categoryId: catId,
category,
});
}
console.log(` Found ${foods.size} foods in included categories`);
// Read nutrients
const nutrientRows = await readCSV(resolve(datasetDir, 'food_nutrient.csv'));
const nutrients = new Map<number, NutrientData>();
for (const row of nutrientRows) {
const fdcId = parseInt(row['fdc_id']);
if (!foods.has(fdcId)) continue;
const nutrientId = parseInt(row['nutrient_id']);
const fieldName = NUTRIENT_IDS[nutrientId];
if (!fieldName) continue;
if (!nutrients.has(fdcId)) nutrients.set(fdcId, {});
const amount = parseFloat(row['amount'] || '0');
if (!isNaN(amount)) {
nutrients.get(fdcId)![fieldName] = amount;
}
}
console.log(` Loaded nutrients for ${nutrients.size} foods`);
// Read portions
const portionRows = await readCSV(resolve(datasetDir, 'food_portion.csv'));
const portions = new Map<number, Portion[]>();
for (const row of portionRows) {
const fdcId = parseInt(row['fdc_id']);
if (!foods.has(fdcId)) continue;
const gramWeight = parseFloat(row['gram_weight'] || '0');
if (!gramWeight || isNaN(gramWeight)) continue;
// Build description from amount + modifier/description
const amount = parseFloat(row['amount'] || '1');
const modifier = row['modifier'] || row['portion_description'] || '';
const desc = modifier
? (amount !== 1 ? `${amount} ${modifier}` : modifier)
: `${amount} unit`;
if (!portions.has(fdcId)) portions.set(fdcId, []);
portions.get(fdcId)!.push({ description: desc, grams: Math.round(gramWeight * 100) / 100 });
}
console.log(` Loaded portions for ${portions.size} foods`);
return { foods, nutrients, portions };
}
function buildNutrientRecord(data: NutrientData | undefined): Record<string, number> {
const allFields = Object.values(NUTRIENT_IDS);
const result: Record<string, number> = {};
for (const field of allFields) {
result[field] = Math.round((data?.[field] || 0) * 100) / 100;
}
return result;
}
async function main() {
console.log('=== USDA Nutrition Database Import ===\n');
mkdirSync(DATA_DIR, { recursive: true });
// Download and extract datasets
const srDir = resolve(DATA_DIR, 'sr_legacy');
const foundationDir = resolve(DATA_DIR, 'foundation');
await downloadAndExtract(USDA_URLS.srLegacy, srDir);
await downloadAndExtract(USDA_URLS.foundation, foundationDir);
// Import both datasets
const sr = await importDataset(srDir, 'SR Legacy');
const foundation = await importDataset(foundationDir, 'Foundation Foods');
// Merge: Foundation Foods takes priority (more detailed), SR Legacy fills gaps
const merged = new Map<string, {
fdcId: number;
name: string;
category: string;
per100g: Record<string, number>;
portions: Portion[];
}>();
// Add SR Legacy first
for (const [fdcId, food] of sr.foods) {
const nutrientData = buildNutrientRecord(sr.nutrients.get(fdcId));
// Skip entries with no nutrient data at all
if (!sr.nutrients.has(fdcId)) continue;
merged.set(food.description.toLowerCase(), {
fdcId,
name: food.description,
category: food.category,
per100g: nutrientData,
portions: sr.portions.get(fdcId) || [],
});
}
// Override with Foundation Foods where available
for (const [fdcId, food] of foundation.foods) {
const nutrientData = buildNutrientRecord(foundation.nutrients.get(fdcId));
if (!foundation.nutrients.has(fdcId)) continue;
merged.set(food.description.toLowerCase(), {
fdcId,
name: food.description,
category: food.category,
per100g: nutrientData,
portions: foundation.portions.get(fdcId) || [],
});
}
console.log(`\nMerged total: ${merged.size} unique foods`);
// Sort by name for stable output
const entries = [...merged.values()].sort((a, b) => a.name.localeCompare(b.name));
// Generate TypeScript output
const tsContent = `// Auto-generated from USDA FoodData Central (SR Legacy + Foundation Foods)
// Generated: ${new Date().toISOString().split('T')[0]}
// Do not edit manually — regenerate with: pnpm exec vite-node scripts/import-usda-nutrition.ts
import type { NutritionPer100g } from '$types/types';
export type NutritionEntry = {
fdcId: number;
name: string;
category: string;
per100g: NutritionPer100g;
portions: { description: string; grams: number }[];
};
export const NUTRITION_DB: NutritionEntry[] = ${JSON.stringify(entries, null, '\t')};
`;
writeFileSync(OUTPUT_PATH, tsContent, 'utf-8');
console.log(`\nWritten ${entries.length} entries to ${OUTPUT_PATH}`);
// Print category breakdown
const categoryCounts = new Map<string, number>();
for (const entry of entries) {
categoryCounts.set(entry.category, (categoryCounts.get(entry.category) || 0) + 1);
}
console.log('\nCategory breakdown:');
for (const [cat, count] of [...categoryCounts.entries()].sort((a, b) => b[1] - a[1])) {
console.log(` ${cat}: ${count}`);
}
}
main().catch(err => {
console.error('Import failed:', err);
process.exit(1);
});