recipes: nutrition calculator with BLS/USDA matching, manual overwrites, and skip

Dual-source nutrition system using BLS (German, primary) and USDA (English, fallback) with ML embedding matching (multilingual-e5-small / all-MiniLM-L6-v2), hybrid substring-first search, and position-aware scoring heuristics. Includes per-recipe and global manual ingredient overwrites, ingredient skip/exclude, referenced recipe nutrition (base refs + anchor tags), section-name dedup, amino acid tracking, and reactive client-side calculator with NutritionSummary component.
2026-04-01 13:00:52 +02:00
parent 3cafe8955a
commit 7e1181461e
30 changed files with 722384 additions and 12 deletions
--- a/scripts/embed-bls-db.ts
+++ b/scripts/embed-bls-db.ts
@@ -0,0 +1,61 @@
+/**
+ * Pre-compute sentence embeddings for BLS German food names.
+ * Uses multilingual-e5-small for good German language understanding.
+ *
+ * Run: pnpm exec vite-node scripts/embed-bls-db.ts
+ */
+import { pipeline } from '@huggingface/transformers';
+import { writeFileSync } from 'fs';
+import { resolve } from 'path';
+
+// Dynamic import of blsDb (generated file)
+const { BLS_DB } = await import('../src/lib/data/blsDb');
+
+const MODEL_NAME = 'Xenova/multilingual-e5-small';
+const OUTPUT_FILE = resolve('src/lib/data/blsEmbeddings.json');
+
+async function main() {
+  console.log(`Loading model ${MODEL_NAME}...`);
+  const embedder = await pipeline('feature-extraction', MODEL_NAME, {
+    dtype: 'q8',
+  });
+
+  console.log(`Embedding ${BLS_DB.length} BLS entries...`);
+
+  const entries: { blsCode: string; name: string; vector: number[] }[] = [];
+  const batchSize = 32;
+
+  for (let i = 0; i < BLS_DB.length; i += batchSize) {
+    const batch = BLS_DB.slice(i, i + batchSize);
+    // e5 models require "passage: " prefix for documents
+    const texts = batch.map(e => `passage: ${e.nameDe}`);
+
+    for (let j = 0; j < batch.length; j++) {
+      const result = await embedder(texts[j], { pooling: 'mean', normalize: true });
+      const vector = Array.from(result.data as Float32Array).map(v => Math.round(v * 10000) / 10000);
+
+      entries.push({
+        blsCode: batch[j].blsCode,
+        name: batch[j].nameDe,
+        vector,
+      });
+    }
+
+    if ((i + batchSize) % 500 < batchSize) {
+      console.log(`  ${Math.min(i + batchSize, BLS_DB.length)}/${BLS_DB.length}`);
+    }
+  }
+
+  const output = {
+    model: MODEL_NAME,
+    dimensions: entries[0]?.vector.length || 384,
+    count: entries.length,
+    entries,
+  };
+
+  const json = JSON.stringify(output);
+  writeFileSync(OUTPUT_FILE, json, 'utf-8');
+  console.log(`Written ${OUTPUT_FILE} (${(json.length / 1024 / 1024).toFixed(1)}MB, ${entries.length} entries)`);
+}
+
+main().catch(console.error);
--- a/scripts/embed-nutrition-db.ts
+++ b/scripts/embed-nutrition-db.ts
@@ -0,0 +1,60 @@
+/**
+ * Pre-computes sentence embeddings for all USDA nutrition DB entries using
+ * all-MiniLM-L6-v2 via @huggingface/transformers.
+ *
+ * Run with: pnpm exec vite-node scripts/embed-nutrition-db.ts
+ *
+ * Outputs: src/lib/data/nutritionEmbeddings.json
+ * Format: { entries: [{ fdcId, name, vector: number[384] }] }
+ */
+import { writeFileSync } from 'fs';
+import { resolve } from 'path';
+import { pipeline } from '@huggingface/transformers';
+import { NUTRITION_DB } from '../src/lib/data/nutritionDb';
+
+const OUTPUT_PATH = resolve('src/lib/data/nutritionEmbeddings.json');
+const MODEL_NAME = 'Xenova/all-MiniLM-L6-v2';
+const BATCH_SIZE = 64;
+
+async function main() {
+	console.log('=== Nutrition DB Embedding Generation ===\n');
+	console.log(`Entries to embed: ${NUTRITION_DB.length}`);
+	console.log(`Model: ${MODEL_NAME}`);
+	console.log(`Loading model (first run downloads ~23MB)...\n`);
+
+	const embedder = await pipeline('feature-extraction', MODEL_NAME, {
+		dtype: 'q8',
+	});
+
+	const entries: { fdcId: number; name: string; vector: number[] }[] = [];
+	const totalBatches = Math.ceil(NUTRITION_DB.length / BATCH_SIZE);
+
+	for (let i = 0; i < NUTRITION_DB.length; i += BATCH_SIZE) {
+		const batch = NUTRITION_DB.slice(i, i + BATCH_SIZE);
+		const batchNum = Math.floor(i / BATCH_SIZE) + 1;
+		process.stdout.write(`\r  Batch ${batchNum}/${totalBatches} (${i + batch.length}/${NUTRITION_DB.length})`);
+
+		// Embed all names in this batch
+		for (const item of batch) {
+			const result = await embedder(item.name, { pooling: 'mean', normalize: true });
+			// result.data is a Float32Array — truncate to 4 decimal places to save space
+			const vector = Array.from(result.data as Float32Array).map(v => Math.round(v * 10000) / 10000);
+			entries.push({ fdcId: item.fdcId, name: item.name, vector });
+		}
+	}
+
+	console.log('\n\nWriting embeddings...');
+
+	const output = { model: MODEL_NAME, dimensions: 384, count: entries.length, entries };
+	writeFileSync(OUTPUT_PATH, JSON.stringify(output), 'utf-8');
+
+	const fileSizeMB = (Buffer.byteLength(JSON.stringify(output)) / 1024 / 1024).toFixed(1);
+	console.log(`Written ${entries.length} embeddings to ${OUTPUT_PATH} (${fileSizeMB}MB)`);
+
+	await embedder.dispose();
+}
+
+main().catch(err => {
+	console.error('Embedding generation failed:', err);
+	process.exit(1);
+});
--- a/scripts/import-bls-nutrition.ts
+++ b/scripts/import-bls-nutrition.ts
@@ -0,0 +1,180 @@
+/**
+ * Import BLS 4.0 (Bundeslebensmittelschlüssel) nutrition data from CSV.
+ * Pre-convert the xlsx to CSV first (one-time):
+ *   node -e "const X=require('xlsx');const w=X.readFile('BLS_4_0_2025_DE/BLS_4_0_Daten_2025_DE.xlsx');
+ *   require('fs').writeFileSync('BLS_4_0_2025_DE/BLS_4_0_Daten_2025_DE.csv',X.utils.sheet_to_csv(w.Sheets[w.SheetNames[0]]))"
+ *
+ * Run: pnpm exec vite-node scripts/import-bls-nutrition.ts
+ */
+import { readFileSync, writeFileSync } from 'fs';
+import { resolve } from 'path';
+
+/** Parse CSV handling quoted fields with commas */
+function parseCSV(text: string): string[][] {
+  const rows: string[][] = [];
+  let i = 0;
+  while (i < text.length) {
+    const row: string[] = [];
+    while (i < text.length && text[i] !== '\n') {
+      if (text[i] === '"') {
+        i++; // skip opening quote
+        let field = '';
+        while (i < text.length) {
+          if (text[i] === '"') {
+            if (text[i + 1] === '"') { field += '"'; i += 2; }
+            else { i++; break; }
+          } else { field += text[i]; i++; }
+        }
+        row.push(field);
+        if (text[i] === ',') i++;
+      } else {
+        const next = text.indexOf(',', i);
+        const nl = text.indexOf('\n', i);
+        const end = (next === -1 || (nl !== -1 && nl < next)) ? (nl === -1 ? text.length : nl) : next;
+        row.push(text.substring(i, end));
+        i = end;
+        if (text[i] === ',') i++;
+      }
+    }
+    if (text[i] === '\n') i++;
+    if (row.length > 0) rows.push(row);
+  }
+  return rows;
+}
+
+const BLS_CSV = resolve('BLS_4_0_2025_DE/BLS_4_0_Daten_2025_DE.csv');
+const OUTPUT_FILE = resolve('src/lib/data/blsDb.ts');
+
+// BLS nutrient code → our per100g field name
+const NUTRIENT_MAP: Record<string, { field: string; divisor?: number }> = {
+  ENERCC: { field: 'calories' },
+  PROT625: { field: 'protein' },
+  FAT: { field: 'fat' },
+  FASAT: { field: 'saturatedFat' },
+  CHO: { field: 'carbs' },
+  FIBT: { field: 'fiber' },
+  SUGAR: { field: 'sugars' },
+  CA: { field: 'calcium' },
+  FE: { field: 'iron' },
+  MG: { field: 'magnesium' },
+  P: { field: 'phosphorus' },
+  K: { field: 'potassium' },
+  NA: { field: 'sodium' },
+  ZN: { field: 'zinc' },
+  VITA: { field: 'vitaminA' },
+  VITC: { field: 'vitaminC' },
+  VITD: { field: 'vitaminD' },
+  VITE: { field: 'vitaminE' },
+  VITK: { field: 'vitaminK' },
+  THIA: { field: 'thiamin' },
+  RIBF: { field: 'riboflavin' },
+  NIA: { field: 'niacin' },
+  VITB6: { field: 'vitaminB6', divisor: 1000 }, // BLS: µg → mg
+  VITB12: { field: 'vitaminB12' },
+  FOL: { field: 'folate' },
+  CHORL: { field: 'cholesterol' },
+  // Amino acids (all g/100g)
+  ILE: { field: 'isoleucine' },
+  LEU: { field: 'leucine' },
+  LYS: { field: 'lysine' },
+  MET: { field: 'methionine' },
+  PHE: { field: 'phenylalanine' },
+  THR: { field: 'threonine' },
+  TRP: { field: 'tryptophan' },
+  VAL: { field: 'valine' },
+  HIS: { field: 'histidine' },
+  ALA: { field: 'alanine' },
+  ARG: { field: 'arginine' },
+  ASP: { field: 'asparticAcid' },
+  CYSTE: { field: 'cysteine' },
+  GLU: { field: 'glutamicAcid' },
+  GLY: { field: 'glycine' },
+  PRO: { field: 'proline' },
+  SER: { field: 'serine' },
+  TYR: { field: 'tyrosine' },
+};
+
+// BLS code first letter → category (BLS 4.0 Hauptgruppen)
+const CATEGORY_MAP: Record<string, string> = {
+  A: 'Getränke', B: 'Getreideprodukte', C: 'Getreide', D: 'Backwaren',
+  E: 'Gemüse', F: 'Obst', G: 'Hülsenfrüchte',
+  H: 'Gewürze und Kräuter', J: 'Fette und Öle', K: 'Milch und Milchprodukte',
+  L: 'Eier', M: 'Fleisch', N: 'Wurstwaren', O: 'Wild', P: 'Geflügel',
+  Q: 'Fisch und Meeresfrüchte', R: 'Süßwaren', S: 'Zucker und Honig',
+  T: 'Gerichte und Rezepte', U: 'Pilze', V: 'Sonstiges', W: 'Algen',
+  X: 'Fleischersatz', Y: 'Supplemente',
+};
+
+async function main() {
+  console.log('Reading BLS CSV...');
+  const csvText = readFileSync(BLS_CSV, 'utf-8');
+  const rows: string[][] = parseCSV(csvText);
+
+  const headers = rows[0];
+  console.log(`Headers: ${headers.length} columns, ${rows.length - 1} data rows`);
+
+  // Build column index: BLS nutrient code → column index of the value column
+  const codeToCol = new Map<string, number>();
+  for (let c = 3; c < headers.length; c += 3) {
+    const code = headers[c]?.split(' ')[0];
+    if (code) codeToCol.set(code, c);
+  }
+
+  const entries: any[] = [];
+
+  for (let r = 1; r < rows.length; r++) {
+    const row = rows[r];
+    const blsCode = row[0]?.trim();
+    const nameDe = row[1]?.trim();
+    const nameEn = row[2]?.trim() || '';
+
+    if (!blsCode || !nameDe) continue;
+
+    const category = CATEGORY_MAP[blsCode[0]] || 'Sonstiges';
+    const per100g: Record<string, number> = {};
+
+    for (const [blsNutrientCode, mapping] of Object.entries(NUTRIENT_MAP)) {
+      const col = codeToCol.get(blsNutrientCode);
+      if (col === undefined) {
+        per100g[mapping.field] = 0;
+        continue;
+      }
+      let value = parseFloat(row[col] || '0');
+      if (isNaN(value)) value = 0;
+      if (mapping.divisor) value /= mapping.divisor;
+      per100g[mapping.field] = Math.round(value * 1000) / 1000;
+    }
+
+    entries.push({ blsCode, nameDe, nameEn, category, per100g });
+  }
+
+  console.log(`Parsed ${entries.length} BLS entries`);
+
+  // Sample entries
+  const sample = entries.slice(0, 3);
+  for (const e of sample) {
+    console.log(`  ${e.blsCode} | ${e.nameDe} | ${e.per100g.calories} kcal | protein ${e.per100g.protein}g`);
+  }
+
+  const output = `// Auto-generated from BLS 4.0 (Bundeslebensmittelschlüssel)
+// Generated: ${new Date().toISOString().split('T')[0]}
+// Do not edit manually — regenerate with: pnpm exec vite-node scripts/import-bls-nutrition.ts
+
+import type { NutritionPer100g } from '$types/types';
+
+export type BlsEntry = {
+  blsCode: string;
+  nameDe: string;
+  nameEn: string;
+  category: string;
+  per100g: NutritionPer100g;
+};
+
+export const BLS_DB: BlsEntry[] = ${JSON.stringify(entries, null, 0)};
+`;
+
+  writeFileSync(OUTPUT_FILE, output, 'utf-8');
+  console.log(`Written ${OUTPUT_FILE} (${(output.length / 1024 / 1024).toFixed(1)}MB, ${entries.length} entries)`);
+}
+
+main().catch(console.error);
--- a/scripts/import-usda-nutrition.ts
+++ b/scripts/import-usda-nutrition.ts
@@ -0,0 +1,371 @@
+/**
+ * Imports USDA FoodData Central data (SR Legacy + Foundation Foods) and generates
+ * a typed nutrition database for the recipe calorie calculator.
+ *
+ * Run with: pnpm exec vite-node scripts/import-usda-nutrition.ts
+ *
+ * Downloads bulk CSV data from USDA FDC, filters to relevant food categories,
+ * extracts macro/micronutrient data per 100g, and outputs src/lib/data/nutritionDb.ts
+ */
+import { existsSync, mkdirSync, readFileSync, writeFileSync } from 'fs';
+import { resolve } from 'path';
+
+const DATA_DIR = resolve('data/usda');
+const OUTPUT_PATH = resolve('src/lib/data/nutritionDb.ts');
+
+// USDA FDC bulk download URLs
+const USDA_URLS = {
+	srLegacy: 'https://fdc.nal.usda.gov/fdc-datasets/FoodData_Central_sr_legacy_food_csv_2018-04.zip',
+	foundation: 'https://fdc.nal.usda.gov/fdc-datasets/FoodData_Central_foundation_food_csv_2024-10-31.zip',
+};
+
+// Nutrient IDs we care about
+const NUTRIENT_IDS: Record<number, string> = {
+	1008: 'calories',
+	1003: 'protein',
+	1004: 'fat',
+	1258: 'saturatedFat',
+	1005: 'carbs',
+	1079: 'fiber',
+	1063: 'sugars',
+	// Minerals
+	1087: 'calcium',
+	1089: 'iron',
+	1090: 'magnesium',
+	1091: 'phosphorus',
+	1092: 'potassium',
+	1093: 'sodium',
+	1095: 'zinc',
+	// Vitamins
+	1106: 'vitaminA',    // RAE (mcg)
+	1162: 'vitaminC',
+	1114: 'vitaminD',    // D2+D3 (mcg)
+	1109: 'vitaminE',
+	1185: 'vitaminK',
+	1165: 'thiamin',
+	1166: 'riboflavin',
+	1167: 'niacin',
+	1175: 'vitaminB6',
+	1178: 'vitaminB12',
+	1177: 'folate',
+	// Other
+	1253: 'cholesterol',
+	// Amino acids (g/100g)
+	1212: 'isoleucine',
+	1213: 'leucine',
+	1214: 'lysine',
+	1215: 'methionine',
+	1217: 'phenylalanine',
+	1211: 'threonine',
+	1210: 'tryptophan',
+	1219: 'valine',
+	1221: 'histidine',
+	1222: 'alanine',
+	1220: 'arginine',
+	1223: 'asparticAcid',
+	1216: 'cysteine',
+	1224: 'glutamicAcid',
+	1225: 'glycine',
+	1226: 'proline',
+	1227: 'serine',
+	1218: 'tyrosine',
+};
+
+// Food categories to include (SR Legacy food_category_id descriptions)
+const INCLUDED_CATEGORIES = new Set([
+	'Dairy and Egg Products',
+	'Spices and Herbs',
+	'Baby Foods',
+	'Fats and Oils',
+	'Poultry Products',
+	'Soups, Sauces, and Gravies',
+	'Sausages and Luncheon Meats',
+	'Breakfast Cereals',
+	'Fruits and Fruit Juices',
+	'Pork Products',
+	'Vegetables and Vegetable Products',
+	'Nut and Seed Products',
+	'Beef Products',
+	'Beverages',
+	'Finfish and Shellfish Products',
+	'Legumes and Legume Products',
+	'Lamb, Veal, and Game Products',
+	'Baked Products',
+	'Sweets',
+	'Cereal Grains and Pasta',
+	'Snacks',
+	'Restaurant Foods',
+]);
+
+type NutrientData = Record<string, number>;
+
+interface RawFood {
+	fdcId: number;
+	description: string;
+	categoryId: number;
+	category: string;
+}
+
+interface Portion {
+	description: string;
+	grams: number;
+}
+
+// Simple CSV line parser that handles quoted fields
+function parseCSVLine(line: string): string[] {
+	const fields: string[] = [];
+	let current = '';
+	let inQuotes = false;
+
+	for (let i = 0; i < line.length; i++) {
+		const ch = line[i];
+		if (ch === '"') {
+			if (inQuotes && i + 1 < line.length && line[i + 1] === '"') {
+				current += '"';
+				i++;
+			} else {
+				inQuotes = !inQuotes;
+			}
+		} else if (ch === ',' && !inQuotes) {
+			fields.push(current);
+			current = '';
+		} else {
+			current += ch;
+		}
+	}
+	fields.push(current);
+	return fields;
+}
+
+async function readCSV(filePath: string): Promise<Record<string, string>[]> {
+	if (!existsSync(filePath)) {
+		console.warn(`  File not found: ${filePath}`);
+		return [];
+	}
+
+	const content = readFileSync(filePath, 'utf-8');
+	const lines = content.split('\n').filter(l => l.trim());
+	if (lines.length === 0) return [];
+
+	const headers = parseCSVLine(lines[0]);
+	const rows: Record<string, string>[] = [];
+
+	for (let i = 1; i < lines.length; i++) {
+		const fields = parseCSVLine(lines[i]);
+		const row: Record<string, string> = {};
+		for (let j = 0; j < headers.length; j++) {
+			row[headers[j]] = fields[j] || '';
+		}
+		rows.push(row);
+	}
+
+	return rows;
+}
+
+async function downloadAndExtract(url: string, targetDir: string): Promise<void> {
+	const zipName = url.split('/').pop()!;
+	const zipPath = resolve(DATA_DIR, zipName);
+
+	if (existsSync(targetDir) && readFileSync(resolve(targetDir, '.done'), 'utf-8').trim() === 'ok') {
+		console.log(`  Already extracted: ${targetDir}`);
+		return;
+	}
+
+	mkdirSync(targetDir, { recursive: true });
+
+	if (!existsSync(zipPath)) {
+		console.log(`  Downloading ${zipName}...`);
+		const response = await fetch(url);
+		if (!response.ok) throw new Error(`Download failed: ${response.status} ${response.statusText}`);
+
+		const buffer = Buffer.from(await response.arrayBuffer());
+		writeFileSync(zipPath, buffer);
+		console.log(`  Downloaded ${(buffer.length / 1024 / 1024).toFixed(1)}MB`);
+	}
+
+	console.log(`  Extracting to ${targetDir}...`);
+	const { execSync } = await import('child_process');
+	execSync(`unzip -o -j "${zipPath}" -d "${targetDir}"`, { stdio: 'pipe' });
+	writeFileSync(resolve(targetDir, '.done'), 'ok');
+}
+
+async function importDataset(datasetDir: string, label: string) {
+	console.log(`\nProcessing ${label}...`);
+
+	// Read category mapping
+	const categoryRows = await readCSV(resolve(datasetDir, 'food_category.csv'));
+	const categoryMap = new Map<string, string>();
+	for (const row of categoryRows) {
+		categoryMap.set(row['id'], row['description']);
+	}
+
+	// Read foods
+	const foodRows = await readCSV(resolve(datasetDir, 'food.csv'));
+	const foods = new Map<number, RawFood>();
+
+	for (const row of foodRows) {
+		const catId = parseInt(row['food_category_id'] || '0');
+		const category = categoryMap.get(row['food_category_id']) || '';
+
+		if (!INCLUDED_CATEGORIES.has(category)) continue;
+
+		const fdcId = parseInt(row['fdc_id']);
+		foods.set(fdcId, {
+			fdcId,
+			description: row['description'],
+			categoryId: catId,
+			category,
+		});
+	}
+	console.log(`  Found ${foods.size} foods in included categories`);
+
+	// Read nutrients
+	const nutrientRows = await readCSV(resolve(datasetDir, 'food_nutrient.csv'));
+	const nutrients = new Map<number, NutrientData>();
+
+	for (const row of nutrientRows) {
+		const fdcId = parseInt(row['fdc_id']);
+		if (!foods.has(fdcId)) continue;
+
+		const nutrientId = parseInt(row['nutrient_id']);
+		const fieldName = NUTRIENT_IDS[nutrientId];
+		if (!fieldName) continue;
+
+		if (!nutrients.has(fdcId)) nutrients.set(fdcId, {});
+		const amount = parseFloat(row['amount'] || '0');
+		if (!isNaN(amount)) {
+			nutrients.get(fdcId)![fieldName] = amount;
+		}
+	}
+	console.log(`  Loaded nutrients for ${nutrients.size} foods`);
+
+	// Read portions
+	const portionRows = await readCSV(resolve(datasetDir, 'food_portion.csv'));
+	const portions = new Map<number, Portion[]>();
+
+	for (const row of portionRows) {
+		const fdcId = parseInt(row['fdc_id']);
+		if (!foods.has(fdcId)) continue;
+
+		const gramWeight = parseFloat(row['gram_weight'] || '0');
+		if (!gramWeight || isNaN(gramWeight)) continue;
+
+		// Build description from amount + modifier/description
+		const amount = parseFloat(row['amount'] || '1');
+		const modifier = row['modifier'] || row['portion_description'] || '';
+		const desc = modifier
+			? (amount !== 1 ? `${amount} ${modifier}` : modifier)
+			: `${amount} unit`;
+
+		if (!portions.has(fdcId)) portions.set(fdcId, []);
+		portions.get(fdcId)!.push({ description: desc, grams: Math.round(gramWeight * 100) / 100 });
+	}
+	console.log(`  Loaded portions for ${portions.size} foods`);
+
+	return { foods, nutrients, portions };
+}
+
+function buildNutrientRecord(data: NutrientData | undefined): Record<string, number> {
+	const allFields = Object.values(NUTRIENT_IDS);
+	const result: Record<string, number> = {};
+	for (const field of allFields) {
+		result[field] = Math.round((data?.[field] || 0) * 100) / 100;
+	}
+	return result;
+}
+
+async function main() {
+	console.log('=== USDA Nutrition Database Import ===\n');
+
+	mkdirSync(DATA_DIR, { recursive: true });
+
+	// Download and extract datasets
+	const srDir = resolve(DATA_DIR, 'sr_legacy');
+	const foundationDir = resolve(DATA_DIR, 'foundation');
+
+	await downloadAndExtract(USDA_URLS.srLegacy, srDir);
+	await downloadAndExtract(USDA_URLS.foundation, foundationDir);
+
+	// Import both datasets
+	const sr = await importDataset(srDir, 'SR Legacy');
+	const foundation = await importDataset(foundationDir, 'Foundation Foods');
+
+	// Merge: Foundation Foods takes priority (more detailed), SR Legacy fills gaps
+	const merged = new Map<string, {
+		fdcId: number;
+		name: string;
+		category: string;
+		per100g: Record<string, number>;
+		portions: Portion[];
+	}>();
+
+	// Add SR Legacy first
+	for (const [fdcId, food] of sr.foods) {
+		const nutrientData = buildNutrientRecord(sr.nutrients.get(fdcId));
+		// Skip entries with no nutrient data at all
+		if (!sr.nutrients.has(fdcId)) continue;
+
+		merged.set(food.description.toLowerCase(), {
+			fdcId,
+			name: food.description,
+			category: food.category,
+			per100g: nutrientData,
+			portions: sr.portions.get(fdcId) || [],
+		});
+	}
+
+	// Override with Foundation Foods where available
+	for (const [fdcId, food] of foundation.foods) {
+		const nutrientData = buildNutrientRecord(foundation.nutrients.get(fdcId));
+		if (!foundation.nutrients.has(fdcId)) continue;
+
+		merged.set(food.description.toLowerCase(), {
+			fdcId,
+			name: food.description,
+			category: food.category,
+			per100g: nutrientData,
+			portions: foundation.portions.get(fdcId) || [],
+		});
+	}
+
+	console.log(`\nMerged total: ${merged.size} unique foods`);
+
+	// Sort by name for stable output
+	const entries = [...merged.values()].sort((a, b) => a.name.localeCompare(b.name));
+
+	// Generate TypeScript output
+	const tsContent = `// Auto-generated from USDA FoodData Central (SR Legacy + Foundation Foods)
+// Generated: ${new Date().toISOString().split('T')[0]}
+// Do not edit manually — regenerate with: pnpm exec vite-node scripts/import-usda-nutrition.ts
+
+import type { NutritionPer100g } from '$types/types';
+
+export type NutritionEntry = {
+	fdcId: number;
+	name: string;
+	category: string;
+	per100g: NutritionPer100g;
+	portions: { description: string; grams: number }[];
+};
+
+export const NUTRITION_DB: NutritionEntry[] = ${JSON.stringify(entries, null, '\t')};
+`;
+
+	writeFileSync(OUTPUT_PATH, tsContent, 'utf-8');
+	console.log(`\nWritten ${entries.length} entries to ${OUTPUT_PATH}`);
+
+	// Print category breakdown
+	const categoryCounts = new Map<string, number>();
+	for (const entry of entries) {
+		categoryCounts.set(entry.category, (categoryCounts.get(entry.category) || 0) + 1);
+	}
+	console.log('\nCategory breakdown:');
+	for (const [cat, count] of [...categoryCounts.entries()].sort((a, b) => b[1] - a[1])) {
+		console.log(`  ${cat}: ${count}`);
+	}
+}
+
+main().catch(err => {
+	console.error('Import failed:', err);
+	process.exit(1);
+});