recipes: nutrition calculator with BLS/USDA matching, manual overwrites, and skip
Dual-source nutrition system using BLS (German, primary) and USDA (English, fallback) with ML embedding matching (multilingual-e5-small / all-MiniLM-L6-v2), hybrid substring-first search, and position-aware scoring heuristics. Includes per-recipe and global manual ingredient overwrites, ingredient skip/exclude, referenced recipe nutrition (base refs + anchor tags), section-name dedup, amino acid tracking, and reactive client-side calculator with NutritionSummary component.
This commit is contained in:
371
scripts/import-usda-nutrition.ts
Normal file
371
scripts/import-usda-nutrition.ts
Normal file
@@ -0,0 +1,371 @@
|
||||
/**
|
||||
* Imports USDA FoodData Central data (SR Legacy + Foundation Foods) and generates
|
||||
* a typed nutrition database for the recipe calorie calculator.
|
||||
*
|
||||
* Run with: pnpm exec vite-node scripts/import-usda-nutrition.ts
|
||||
*
|
||||
* Downloads bulk CSV data from USDA FDC, filters to relevant food categories,
|
||||
* extracts macro/micronutrient data per 100g, and outputs src/lib/data/nutritionDb.ts
|
||||
*/
|
||||
import { existsSync, mkdirSync, readFileSync, writeFileSync } from 'fs';
|
||||
import { resolve } from 'path';
|
||||
|
||||
const DATA_DIR = resolve('data/usda');
|
||||
const OUTPUT_PATH = resolve('src/lib/data/nutritionDb.ts');
|
||||
|
||||
// USDA FDC bulk download URLs
|
||||
const USDA_URLS = {
|
||||
srLegacy: 'https://fdc.nal.usda.gov/fdc-datasets/FoodData_Central_sr_legacy_food_csv_2018-04.zip',
|
||||
foundation: 'https://fdc.nal.usda.gov/fdc-datasets/FoodData_Central_foundation_food_csv_2024-10-31.zip',
|
||||
};
|
||||
|
||||
// Nutrient IDs we care about
|
||||
const NUTRIENT_IDS: Record<number, string> = {
|
||||
1008: 'calories',
|
||||
1003: 'protein',
|
||||
1004: 'fat',
|
||||
1258: 'saturatedFat',
|
||||
1005: 'carbs',
|
||||
1079: 'fiber',
|
||||
1063: 'sugars',
|
||||
// Minerals
|
||||
1087: 'calcium',
|
||||
1089: 'iron',
|
||||
1090: 'magnesium',
|
||||
1091: 'phosphorus',
|
||||
1092: 'potassium',
|
||||
1093: 'sodium',
|
||||
1095: 'zinc',
|
||||
// Vitamins
|
||||
1106: 'vitaminA', // RAE (mcg)
|
||||
1162: 'vitaminC',
|
||||
1114: 'vitaminD', // D2+D3 (mcg)
|
||||
1109: 'vitaminE',
|
||||
1185: 'vitaminK',
|
||||
1165: 'thiamin',
|
||||
1166: 'riboflavin',
|
||||
1167: 'niacin',
|
||||
1175: 'vitaminB6',
|
||||
1178: 'vitaminB12',
|
||||
1177: 'folate',
|
||||
// Other
|
||||
1253: 'cholesterol',
|
||||
// Amino acids (g/100g)
|
||||
1212: 'isoleucine',
|
||||
1213: 'leucine',
|
||||
1214: 'lysine',
|
||||
1215: 'methionine',
|
||||
1217: 'phenylalanine',
|
||||
1211: 'threonine',
|
||||
1210: 'tryptophan',
|
||||
1219: 'valine',
|
||||
1221: 'histidine',
|
||||
1222: 'alanine',
|
||||
1220: 'arginine',
|
||||
1223: 'asparticAcid',
|
||||
1216: 'cysteine',
|
||||
1224: 'glutamicAcid',
|
||||
1225: 'glycine',
|
||||
1226: 'proline',
|
||||
1227: 'serine',
|
||||
1218: 'tyrosine',
|
||||
};
|
||||
|
||||
// Food categories to include (SR Legacy food_category_id descriptions)
|
||||
const INCLUDED_CATEGORIES = new Set([
|
||||
'Dairy and Egg Products',
|
||||
'Spices and Herbs',
|
||||
'Baby Foods',
|
||||
'Fats and Oils',
|
||||
'Poultry Products',
|
||||
'Soups, Sauces, and Gravies',
|
||||
'Sausages and Luncheon Meats',
|
||||
'Breakfast Cereals',
|
||||
'Fruits and Fruit Juices',
|
||||
'Pork Products',
|
||||
'Vegetables and Vegetable Products',
|
||||
'Nut and Seed Products',
|
||||
'Beef Products',
|
||||
'Beverages',
|
||||
'Finfish and Shellfish Products',
|
||||
'Legumes and Legume Products',
|
||||
'Lamb, Veal, and Game Products',
|
||||
'Baked Products',
|
||||
'Sweets',
|
||||
'Cereal Grains and Pasta',
|
||||
'Snacks',
|
||||
'Restaurant Foods',
|
||||
]);
|
||||
|
||||
type NutrientData = Record<string, number>;
|
||||
|
||||
interface RawFood {
|
||||
fdcId: number;
|
||||
description: string;
|
||||
categoryId: number;
|
||||
category: string;
|
||||
}
|
||||
|
||||
interface Portion {
|
||||
description: string;
|
||||
grams: number;
|
||||
}
|
||||
|
||||
// Simple CSV line parser that handles quoted fields
|
||||
function parseCSVLine(line: string): string[] {
|
||||
const fields: string[] = [];
|
||||
let current = '';
|
||||
let inQuotes = false;
|
||||
|
||||
for (let i = 0; i < line.length; i++) {
|
||||
const ch = line[i];
|
||||
if (ch === '"') {
|
||||
if (inQuotes && i + 1 < line.length && line[i + 1] === '"') {
|
||||
current += '"';
|
||||
i++;
|
||||
} else {
|
||||
inQuotes = !inQuotes;
|
||||
}
|
||||
} else if (ch === ',' && !inQuotes) {
|
||||
fields.push(current);
|
||||
current = '';
|
||||
} else {
|
||||
current += ch;
|
||||
}
|
||||
}
|
||||
fields.push(current);
|
||||
return fields;
|
||||
}
|
||||
|
||||
async function readCSV(filePath: string): Promise<Record<string, string>[]> {
|
||||
if (!existsSync(filePath)) {
|
||||
console.warn(` File not found: ${filePath}`);
|
||||
return [];
|
||||
}
|
||||
|
||||
const content = readFileSync(filePath, 'utf-8');
|
||||
const lines = content.split('\n').filter(l => l.trim());
|
||||
if (lines.length === 0) return [];
|
||||
|
||||
const headers = parseCSVLine(lines[0]);
|
||||
const rows: Record<string, string>[] = [];
|
||||
|
||||
for (let i = 1; i < lines.length; i++) {
|
||||
const fields = parseCSVLine(lines[i]);
|
||||
const row: Record<string, string> = {};
|
||||
for (let j = 0; j < headers.length; j++) {
|
||||
row[headers[j]] = fields[j] || '';
|
||||
}
|
||||
rows.push(row);
|
||||
}
|
||||
|
||||
return rows;
|
||||
}
|
||||
|
||||
async function downloadAndExtract(url: string, targetDir: string): Promise<void> {
|
||||
const zipName = url.split('/').pop()!;
|
||||
const zipPath = resolve(DATA_DIR, zipName);
|
||||
|
||||
if (existsSync(targetDir) && readFileSync(resolve(targetDir, '.done'), 'utf-8').trim() === 'ok') {
|
||||
console.log(` Already extracted: ${targetDir}`);
|
||||
return;
|
||||
}
|
||||
|
||||
mkdirSync(targetDir, { recursive: true });
|
||||
|
||||
if (!existsSync(zipPath)) {
|
||||
console.log(` Downloading ${zipName}...`);
|
||||
const response = await fetch(url);
|
||||
if (!response.ok) throw new Error(`Download failed: ${response.status} ${response.statusText}`);
|
||||
|
||||
const buffer = Buffer.from(await response.arrayBuffer());
|
||||
writeFileSync(zipPath, buffer);
|
||||
console.log(` Downloaded ${(buffer.length / 1024 / 1024).toFixed(1)}MB`);
|
||||
}
|
||||
|
||||
console.log(` Extracting to ${targetDir}...`);
|
||||
const { execSync } = await import('child_process');
|
||||
execSync(`unzip -o -j "${zipPath}" -d "${targetDir}"`, { stdio: 'pipe' });
|
||||
writeFileSync(resolve(targetDir, '.done'), 'ok');
|
||||
}
|
||||
|
||||
async function importDataset(datasetDir: string, label: string) {
|
||||
console.log(`\nProcessing ${label}...`);
|
||||
|
||||
// Read category mapping
|
||||
const categoryRows = await readCSV(resolve(datasetDir, 'food_category.csv'));
|
||||
const categoryMap = new Map<string, string>();
|
||||
for (const row of categoryRows) {
|
||||
categoryMap.set(row['id'], row['description']);
|
||||
}
|
||||
|
||||
// Read foods
|
||||
const foodRows = await readCSV(resolve(datasetDir, 'food.csv'));
|
||||
const foods = new Map<number, RawFood>();
|
||||
|
||||
for (const row of foodRows) {
|
||||
const catId = parseInt(row['food_category_id'] || '0');
|
||||
const category = categoryMap.get(row['food_category_id']) || '';
|
||||
|
||||
if (!INCLUDED_CATEGORIES.has(category)) continue;
|
||||
|
||||
const fdcId = parseInt(row['fdc_id']);
|
||||
foods.set(fdcId, {
|
||||
fdcId,
|
||||
description: row['description'],
|
||||
categoryId: catId,
|
||||
category,
|
||||
});
|
||||
}
|
||||
console.log(` Found ${foods.size} foods in included categories`);
|
||||
|
||||
// Read nutrients
|
||||
const nutrientRows = await readCSV(resolve(datasetDir, 'food_nutrient.csv'));
|
||||
const nutrients = new Map<number, NutrientData>();
|
||||
|
||||
for (const row of nutrientRows) {
|
||||
const fdcId = parseInt(row['fdc_id']);
|
||||
if (!foods.has(fdcId)) continue;
|
||||
|
||||
const nutrientId = parseInt(row['nutrient_id']);
|
||||
const fieldName = NUTRIENT_IDS[nutrientId];
|
||||
if (!fieldName) continue;
|
||||
|
||||
if (!nutrients.has(fdcId)) nutrients.set(fdcId, {});
|
||||
const amount = parseFloat(row['amount'] || '0');
|
||||
if (!isNaN(amount)) {
|
||||
nutrients.get(fdcId)![fieldName] = amount;
|
||||
}
|
||||
}
|
||||
console.log(` Loaded nutrients for ${nutrients.size} foods`);
|
||||
|
||||
// Read portions
|
||||
const portionRows = await readCSV(resolve(datasetDir, 'food_portion.csv'));
|
||||
const portions = new Map<number, Portion[]>();
|
||||
|
||||
for (const row of portionRows) {
|
||||
const fdcId = parseInt(row['fdc_id']);
|
||||
if (!foods.has(fdcId)) continue;
|
||||
|
||||
const gramWeight = parseFloat(row['gram_weight'] || '0');
|
||||
if (!gramWeight || isNaN(gramWeight)) continue;
|
||||
|
||||
// Build description from amount + modifier/description
|
||||
const amount = parseFloat(row['amount'] || '1');
|
||||
const modifier = row['modifier'] || row['portion_description'] || '';
|
||||
const desc = modifier
|
||||
? (amount !== 1 ? `${amount} ${modifier}` : modifier)
|
||||
: `${amount} unit`;
|
||||
|
||||
if (!portions.has(fdcId)) portions.set(fdcId, []);
|
||||
portions.get(fdcId)!.push({ description: desc, grams: Math.round(gramWeight * 100) / 100 });
|
||||
}
|
||||
console.log(` Loaded portions for ${portions.size} foods`);
|
||||
|
||||
return { foods, nutrients, portions };
|
||||
}
|
||||
|
||||
function buildNutrientRecord(data: NutrientData | undefined): Record<string, number> {
|
||||
const allFields = Object.values(NUTRIENT_IDS);
|
||||
const result: Record<string, number> = {};
|
||||
for (const field of allFields) {
|
||||
result[field] = Math.round((data?.[field] || 0) * 100) / 100;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
async function main() {
|
||||
console.log('=== USDA Nutrition Database Import ===\n');
|
||||
|
||||
mkdirSync(DATA_DIR, { recursive: true });
|
||||
|
||||
// Download and extract datasets
|
||||
const srDir = resolve(DATA_DIR, 'sr_legacy');
|
||||
const foundationDir = resolve(DATA_DIR, 'foundation');
|
||||
|
||||
await downloadAndExtract(USDA_URLS.srLegacy, srDir);
|
||||
await downloadAndExtract(USDA_URLS.foundation, foundationDir);
|
||||
|
||||
// Import both datasets
|
||||
const sr = await importDataset(srDir, 'SR Legacy');
|
||||
const foundation = await importDataset(foundationDir, 'Foundation Foods');
|
||||
|
||||
// Merge: Foundation Foods takes priority (more detailed), SR Legacy fills gaps
|
||||
const merged = new Map<string, {
|
||||
fdcId: number;
|
||||
name: string;
|
||||
category: string;
|
||||
per100g: Record<string, number>;
|
||||
portions: Portion[];
|
||||
}>();
|
||||
|
||||
// Add SR Legacy first
|
||||
for (const [fdcId, food] of sr.foods) {
|
||||
const nutrientData = buildNutrientRecord(sr.nutrients.get(fdcId));
|
||||
// Skip entries with no nutrient data at all
|
||||
if (!sr.nutrients.has(fdcId)) continue;
|
||||
|
||||
merged.set(food.description.toLowerCase(), {
|
||||
fdcId,
|
||||
name: food.description,
|
||||
category: food.category,
|
||||
per100g: nutrientData,
|
||||
portions: sr.portions.get(fdcId) || [],
|
||||
});
|
||||
}
|
||||
|
||||
// Override with Foundation Foods where available
|
||||
for (const [fdcId, food] of foundation.foods) {
|
||||
const nutrientData = buildNutrientRecord(foundation.nutrients.get(fdcId));
|
||||
if (!foundation.nutrients.has(fdcId)) continue;
|
||||
|
||||
merged.set(food.description.toLowerCase(), {
|
||||
fdcId,
|
||||
name: food.description,
|
||||
category: food.category,
|
||||
per100g: nutrientData,
|
||||
portions: foundation.portions.get(fdcId) || [],
|
||||
});
|
||||
}
|
||||
|
||||
console.log(`\nMerged total: ${merged.size} unique foods`);
|
||||
|
||||
// Sort by name for stable output
|
||||
const entries = [...merged.values()].sort((a, b) => a.name.localeCompare(b.name));
|
||||
|
||||
// Generate TypeScript output
|
||||
const tsContent = `// Auto-generated from USDA FoodData Central (SR Legacy + Foundation Foods)
|
||||
// Generated: ${new Date().toISOString().split('T')[0]}
|
||||
// Do not edit manually — regenerate with: pnpm exec vite-node scripts/import-usda-nutrition.ts
|
||||
|
||||
import type { NutritionPer100g } from '$types/types';
|
||||
|
||||
export type NutritionEntry = {
|
||||
fdcId: number;
|
||||
name: string;
|
||||
category: string;
|
||||
per100g: NutritionPer100g;
|
||||
portions: { description: string; grams: number }[];
|
||||
};
|
||||
|
||||
export const NUTRITION_DB: NutritionEntry[] = ${JSON.stringify(entries, null, '\t')};
|
||||
`;
|
||||
|
||||
writeFileSync(OUTPUT_PATH, tsContent, 'utf-8');
|
||||
console.log(`\nWritten ${entries.length} entries to ${OUTPUT_PATH}`);
|
||||
|
||||
// Print category breakdown
|
||||
const categoryCounts = new Map<string, number>();
|
||||
for (const entry of entries) {
|
||||
categoryCounts.set(entry.category, (categoryCounts.get(entry.category) || 0) + 1);
|
||||
}
|
||||
console.log('\nCategory breakdown:');
|
||||
for (const [cat, count] of [...categoryCounts.entries()].sort((a, b) => b[1] - a[1])) {
|
||||
console.log(` ${cat}: ${count}`);
|
||||
}
|
||||
}
|
||||
|
||||
main().catch(err => {
|
||||
console.error('Import failed:', err);
|
||||
process.exit(1);
|
||||
});
|
||||
Reference in New Issue
Block a user