recipes: nutrition calculator with BLS/USDA matching, manual overwrites, and skip

Dual-source nutrition system using BLS (German, primary) and USDA (English, fallback)
with ML embedding matching (multilingual-e5-small / all-MiniLM-L6-v2), hybrid
substring-first search, and position-aware scoring heuristics.

Includes per-recipe and global manual ingredient overwrites, ingredient skip/exclude,
referenced recipe nutrition (base refs + anchor tags), section-name dedup,
amino acid tracking, and reactive client-side calculator with NutritionSummary component.
This commit is contained in:
2026-04-01 13:00:52 +02:00
parent 3cafe8955a
commit 7e1181461e
30 changed files with 722384 additions and 12 deletions

View File

@@ -0,0 +1,371 @@
/**
* Imports USDA FoodData Central data (SR Legacy + Foundation Foods) and generates
* a typed nutrition database for the recipe calorie calculator.
*
* Run with: pnpm exec vite-node scripts/import-usda-nutrition.ts
*
* Downloads bulk CSV data from USDA FDC, filters to relevant food categories,
* extracts macro/micronutrient data per 100g, and outputs src/lib/data/nutritionDb.ts
*/
import { existsSync, mkdirSync, readFileSync, writeFileSync } from 'fs';
import { resolve } from 'path';
const DATA_DIR = resolve('data/usda');
const OUTPUT_PATH = resolve('src/lib/data/nutritionDb.ts');
// USDA FDC bulk download URLs
const USDA_URLS = {
srLegacy: 'https://fdc.nal.usda.gov/fdc-datasets/FoodData_Central_sr_legacy_food_csv_2018-04.zip',
foundation: 'https://fdc.nal.usda.gov/fdc-datasets/FoodData_Central_foundation_food_csv_2024-10-31.zip',
};
// Nutrient IDs we care about
const NUTRIENT_IDS: Record<number, string> = {
1008: 'calories',
1003: 'protein',
1004: 'fat',
1258: 'saturatedFat',
1005: 'carbs',
1079: 'fiber',
1063: 'sugars',
// Minerals
1087: 'calcium',
1089: 'iron',
1090: 'magnesium',
1091: 'phosphorus',
1092: 'potassium',
1093: 'sodium',
1095: 'zinc',
// Vitamins
1106: 'vitaminA', // RAE (mcg)
1162: 'vitaminC',
1114: 'vitaminD', // D2+D3 (mcg)
1109: 'vitaminE',
1185: 'vitaminK',
1165: 'thiamin',
1166: 'riboflavin',
1167: 'niacin',
1175: 'vitaminB6',
1178: 'vitaminB12',
1177: 'folate',
// Other
1253: 'cholesterol',
// Amino acids (g/100g)
1212: 'isoleucine',
1213: 'leucine',
1214: 'lysine',
1215: 'methionine',
1217: 'phenylalanine',
1211: 'threonine',
1210: 'tryptophan',
1219: 'valine',
1221: 'histidine',
1222: 'alanine',
1220: 'arginine',
1223: 'asparticAcid',
1216: 'cysteine',
1224: 'glutamicAcid',
1225: 'glycine',
1226: 'proline',
1227: 'serine',
1218: 'tyrosine',
};
// Food categories to include (SR Legacy food_category_id descriptions)
const INCLUDED_CATEGORIES = new Set([
'Dairy and Egg Products',
'Spices and Herbs',
'Baby Foods',
'Fats and Oils',
'Poultry Products',
'Soups, Sauces, and Gravies',
'Sausages and Luncheon Meats',
'Breakfast Cereals',
'Fruits and Fruit Juices',
'Pork Products',
'Vegetables and Vegetable Products',
'Nut and Seed Products',
'Beef Products',
'Beverages',
'Finfish and Shellfish Products',
'Legumes and Legume Products',
'Lamb, Veal, and Game Products',
'Baked Products',
'Sweets',
'Cereal Grains and Pasta',
'Snacks',
'Restaurant Foods',
]);
type NutrientData = Record<string, number>;
interface RawFood {
fdcId: number;
description: string;
categoryId: number;
category: string;
}
interface Portion {
description: string;
grams: number;
}
// Simple CSV line parser that handles quoted fields
function parseCSVLine(line: string): string[] {
const fields: string[] = [];
let current = '';
let inQuotes = false;
for (let i = 0; i < line.length; i++) {
const ch = line[i];
if (ch === '"') {
if (inQuotes && i + 1 < line.length && line[i + 1] === '"') {
current += '"';
i++;
} else {
inQuotes = !inQuotes;
}
} else if (ch === ',' && !inQuotes) {
fields.push(current);
current = '';
} else {
current += ch;
}
}
fields.push(current);
return fields;
}
async function readCSV(filePath: string): Promise<Record<string, string>[]> {
if (!existsSync(filePath)) {
console.warn(` File not found: ${filePath}`);
return [];
}
const content = readFileSync(filePath, 'utf-8');
const lines = content.split('\n').filter(l => l.trim());
if (lines.length === 0) return [];
const headers = parseCSVLine(lines[0]);
const rows: Record<string, string>[] = [];
for (let i = 1; i < lines.length; i++) {
const fields = parseCSVLine(lines[i]);
const row: Record<string, string> = {};
for (let j = 0; j < headers.length; j++) {
row[headers[j]] = fields[j] || '';
}
rows.push(row);
}
return rows;
}
async function downloadAndExtract(url: string, targetDir: string): Promise<void> {
const zipName = url.split('/').pop()!;
const zipPath = resolve(DATA_DIR, zipName);
if (existsSync(targetDir) && readFileSync(resolve(targetDir, '.done'), 'utf-8').trim() === 'ok') {
console.log(` Already extracted: ${targetDir}`);
return;
}
mkdirSync(targetDir, { recursive: true });
if (!existsSync(zipPath)) {
console.log(` Downloading ${zipName}...`);
const response = await fetch(url);
if (!response.ok) throw new Error(`Download failed: ${response.status} ${response.statusText}`);
const buffer = Buffer.from(await response.arrayBuffer());
writeFileSync(zipPath, buffer);
console.log(` Downloaded ${(buffer.length / 1024 / 1024).toFixed(1)}MB`);
}
console.log(` Extracting to ${targetDir}...`);
const { execSync } = await import('child_process');
execSync(`unzip -o -j "${zipPath}" -d "${targetDir}"`, { stdio: 'pipe' });
writeFileSync(resolve(targetDir, '.done'), 'ok');
}
async function importDataset(datasetDir: string, label: string) {
console.log(`\nProcessing ${label}...`);
// Read category mapping
const categoryRows = await readCSV(resolve(datasetDir, 'food_category.csv'));
const categoryMap = new Map<string, string>();
for (const row of categoryRows) {
categoryMap.set(row['id'], row['description']);
}
// Read foods
const foodRows = await readCSV(resolve(datasetDir, 'food.csv'));
const foods = new Map<number, RawFood>();
for (const row of foodRows) {
const catId = parseInt(row['food_category_id'] || '0');
const category = categoryMap.get(row['food_category_id']) || '';
if (!INCLUDED_CATEGORIES.has(category)) continue;
const fdcId = parseInt(row['fdc_id']);
foods.set(fdcId, {
fdcId,
description: row['description'],
categoryId: catId,
category,
});
}
console.log(` Found ${foods.size} foods in included categories`);
// Read nutrients
const nutrientRows = await readCSV(resolve(datasetDir, 'food_nutrient.csv'));
const nutrients = new Map<number, NutrientData>();
for (const row of nutrientRows) {
const fdcId = parseInt(row['fdc_id']);
if (!foods.has(fdcId)) continue;
const nutrientId = parseInt(row['nutrient_id']);
const fieldName = NUTRIENT_IDS[nutrientId];
if (!fieldName) continue;
if (!nutrients.has(fdcId)) nutrients.set(fdcId, {});
const amount = parseFloat(row['amount'] || '0');
if (!isNaN(amount)) {
nutrients.get(fdcId)![fieldName] = amount;
}
}
console.log(` Loaded nutrients for ${nutrients.size} foods`);
// Read portions
const portionRows = await readCSV(resolve(datasetDir, 'food_portion.csv'));
const portions = new Map<number, Portion[]>();
for (const row of portionRows) {
const fdcId = parseInt(row['fdc_id']);
if (!foods.has(fdcId)) continue;
const gramWeight = parseFloat(row['gram_weight'] || '0');
if (!gramWeight || isNaN(gramWeight)) continue;
// Build description from amount + modifier/description
const amount = parseFloat(row['amount'] || '1');
const modifier = row['modifier'] || row['portion_description'] || '';
const desc = modifier
? (amount !== 1 ? `${amount} ${modifier}` : modifier)
: `${amount} unit`;
if (!portions.has(fdcId)) portions.set(fdcId, []);
portions.get(fdcId)!.push({ description: desc, grams: Math.round(gramWeight * 100) / 100 });
}
console.log(` Loaded portions for ${portions.size} foods`);
return { foods, nutrients, portions };
}
function buildNutrientRecord(data: NutrientData | undefined): Record<string, number> {
const allFields = Object.values(NUTRIENT_IDS);
const result: Record<string, number> = {};
for (const field of allFields) {
result[field] = Math.round((data?.[field] || 0) * 100) / 100;
}
return result;
}
async function main() {
console.log('=== USDA Nutrition Database Import ===\n');
mkdirSync(DATA_DIR, { recursive: true });
// Download and extract datasets
const srDir = resolve(DATA_DIR, 'sr_legacy');
const foundationDir = resolve(DATA_DIR, 'foundation');
await downloadAndExtract(USDA_URLS.srLegacy, srDir);
await downloadAndExtract(USDA_URLS.foundation, foundationDir);
// Import both datasets
const sr = await importDataset(srDir, 'SR Legacy');
const foundation = await importDataset(foundationDir, 'Foundation Foods');
// Merge: Foundation Foods takes priority (more detailed), SR Legacy fills gaps
const merged = new Map<string, {
fdcId: number;
name: string;
category: string;
per100g: Record<string, number>;
portions: Portion[];
}>();
// Add SR Legacy first
for (const [fdcId, food] of sr.foods) {
const nutrientData = buildNutrientRecord(sr.nutrients.get(fdcId));
// Skip entries with no nutrient data at all
if (!sr.nutrients.has(fdcId)) continue;
merged.set(food.description.toLowerCase(), {
fdcId,
name: food.description,
category: food.category,
per100g: nutrientData,
portions: sr.portions.get(fdcId) || [],
});
}
// Override with Foundation Foods where available
for (const [fdcId, food] of foundation.foods) {
const nutrientData = buildNutrientRecord(foundation.nutrients.get(fdcId));
if (!foundation.nutrients.has(fdcId)) continue;
merged.set(food.description.toLowerCase(), {
fdcId,
name: food.description,
category: food.category,
per100g: nutrientData,
portions: foundation.portions.get(fdcId) || [],
});
}
console.log(`\nMerged total: ${merged.size} unique foods`);
// Sort by name for stable output
const entries = [...merged.values()].sort((a, b) => a.name.localeCompare(b.name));
// Generate TypeScript output
const tsContent = `// Auto-generated from USDA FoodData Central (SR Legacy + Foundation Foods)
// Generated: ${new Date().toISOString().split('T')[0]}
// Do not edit manually — regenerate with: pnpm exec vite-node scripts/import-usda-nutrition.ts
import type { NutritionPer100g } from '$types/types';
export type NutritionEntry = {
fdcId: number;
name: string;
category: string;
per100g: NutritionPer100g;
portions: { description: string; grams: number }[];
};
export const NUTRITION_DB: NutritionEntry[] = ${JSON.stringify(entries, null, '\t')};
`;
writeFileSync(OUTPUT_PATH, tsContent, 'utf-8');
console.log(`\nWritten ${entries.length} entries to ${OUTPUT_PATH}`);
// Print category breakdown
const categoryCounts = new Map<string, number>();
for (const entry of entries) {
categoryCounts.set(entry.category, (categoryCounts.get(entry.category) || 0) + 1);
}
console.log('\nCategory breakdown:');
for (const [cat, count] of [...categoryCounts.entries()].sort((a, b) => b[1] - a[1])) {
console.log(` ${cat}: ${count}`);
}
}
main().catch(err => {
console.error('Import failed:', err);
process.exit(1);
});