recipes: nutrition calculator with BLS/USDA matching, manual overwrites, and skip
Dual-source nutrition system using BLS (German, primary) and USDA (English, fallback) with ML embedding matching (multilingual-e5-small / all-MiniLM-L6-v2), hybrid substring-first search, and position-aware scoring heuristics. Includes per-recipe and global manual ingredient overwrites, ingredient skip/exclude, referenced recipe nutrition (base refs + anchor tags), section-name dedup, amino acid tracking, and reactive client-side calculator with NutritionSummary component.
This commit is contained in:
61
scripts/embed-bls-db.ts
Normal file
61
scripts/embed-bls-db.ts
Normal file
@@ -0,0 +1,61 @@
|
||||
/**
|
||||
* Pre-compute sentence embeddings for BLS German food names.
|
||||
* Uses multilingual-e5-small for good German language understanding.
|
||||
*
|
||||
* Run: pnpm exec vite-node scripts/embed-bls-db.ts
|
||||
*/
|
||||
import { pipeline } from '@huggingface/transformers';
|
||||
import { writeFileSync } from 'fs';
|
||||
import { resolve } from 'path';
|
||||
|
||||
// Dynamic import of blsDb (generated file)
|
||||
const { BLS_DB } = await import('../src/lib/data/blsDb');
|
||||
|
||||
const MODEL_NAME = 'Xenova/multilingual-e5-small';
|
||||
const OUTPUT_FILE = resolve('src/lib/data/blsEmbeddings.json');
|
||||
|
||||
async function main() {
|
||||
console.log(`Loading model ${MODEL_NAME}...`);
|
||||
const embedder = await pipeline('feature-extraction', MODEL_NAME, {
|
||||
dtype: 'q8',
|
||||
});
|
||||
|
||||
console.log(`Embedding ${BLS_DB.length} BLS entries...`);
|
||||
|
||||
const entries: { blsCode: string; name: string; vector: number[] }[] = [];
|
||||
const batchSize = 32;
|
||||
|
||||
for (let i = 0; i < BLS_DB.length; i += batchSize) {
|
||||
const batch = BLS_DB.slice(i, i + batchSize);
|
||||
// e5 models require "passage: " prefix for documents
|
||||
const texts = batch.map(e => `passage: ${e.nameDe}`);
|
||||
|
||||
for (let j = 0; j < batch.length; j++) {
|
||||
const result = await embedder(texts[j], { pooling: 'mean', normalize: true });
|
||||
const vector = Array.from(result.data as Float32Array).map(v => Math.round(v * 10000) / 10000);
|
||||
|
||||
entries.push({
|
||||
blsCode: batch[j].blsCode,
|
||||
name: batch[j].nameDe,
|
||||
vector,
|
||||
});
|
||||
}
|
||||
|
||||
if ((i + batchSize) % 500 < batchSize) {
|
||||
console.log(` ${Math.min(i + batchSize, BLS_DB.length)}/${BLS_DB.length}`);
|
||||
}
|
||||
}
|
||||
|
||||
const output = {
|
||||
model: MODEL_NAME,
|
||||
dimensions: entries[0]?.vector.length || 384,
|
||||
count: entries.length,
|
||||
entries,
|
||||
};
|
||||
|
||||
const json = JSON.stringify(output);
|
||||
writeFileSync(OUTPUT_FILE, json, 'utf-8');
|
||||
console.log(`Written ${OUTPUT_FILE} (${(json.length / 1024 / 1024).toFixed(1)}MB, ${entries.length} entries)`);
|
||||
}
|
||||
|
||||
main().catch(console.error);
|
||||
60
scripts/embed-nutrition-db.ts
Normal file
60
scripts/embed-nutrition-db.ts
Normal file
@@ -0,0 +1,60 @@
|
||||
/**
|
||||
* Pre-computes sentence embeddings for all USDA nutrition DB entries using
|
||||
* all-MiniLM-L6-v2 via @huggingface/transformers.
|
||||
*
|
||||
* Run with: pnpm exec vite-node scripts/embed-nutrition-db.ts
|
||||
*
|
||||
* Outputs: src/lib/data/nutritionEmbeddings.json
|
||||
* Format: { entries: [{ fdcId, name, vector: number[384] }] }
|
||||
*/
|
||||
import { writeFileSync } from 'fs';
|
||||
import { resolve } from 'path';
|
||||
import { pipeline } from '@huggingface/transformers';
|
||||
import { NUTRITION_DB } from '../src/lib/data/nutritionDb';
|
||||
|
||||
const OUTPUT_PATH = resolve('src/lib/data/nutritionEmbeddings.json');
|
||||
const MODEL_NAME = 'Xenova/all-MiniLM-L6-v2';
|
||||
const BATCH_SIZE = 64;
|
||||
|
||||
async function main() {
|
||||
console.log('=== Nutrition DB Embedding Generation ===\n');
|
||||
console.log(`Entries to embed: ${NUTRITION_DB.length}`);
|
||||
console.log(`Model: ${MODEL_NAME}`);
|
||||
console.log(`Loading model (first run downloads ~23MB)...\n`);
|
||||
|
||||
const embedder = await pipeline('feature-extraction', MODEL_NAME, {
|
||||
dtype: 'q8',
|
||||
});
|
||||
|
||||
const entries: { fdcId: number; name: string; vector: number[] }[] = [];
|
||||
const totalBatches = Math.ceil(NUTRITION_DB.length / BATCH_SIZE);
|
||||
|
||||
for (let i = 0; i < NUTRITION_DB.length; i += BATCH_SIZE) {
|
||||
const batch = NUTRITION_DB.slice(i, i + BATCH_SIZE);
|
||||
const batchNum = Math.floor(i / BATCH_SIZE) + 1;
|
||||
process.stdout.write(`\r Batch ${batchNum}/${totalBatches} (${i + batch.length}/${NUTRITION_DB.length})`);
|
||||
|
||||
// Embed all names in this batch
|
||||
for (const item of batch) {
|
||||
const result = await embedder(item.name, { pooling: 'mean', normalize: true });
|
||||
// result.data is a Float32Array — truncate to 4 decimal places to save space
|
||||
const vector = Array.from(result.data as Float32Array).map(v => Math.round(v * 10000) / 10000);
|
||||
entries.push({ fdcId: item.fdcId, name: item.name, vector });
|
||||
}
|
||||
}
|
||||
|
||||
console.log('\n\nWriting embeddings...');
|
||||
|
||||
const output = { model: MODEL_NAME, dimensions: 384, count: entries.length, entries };
|
||||
writeFileSync(OUTPUT_PATH, JSON.stringify(output), 'utf-8');
|
||||
|
||||
const fileSizeMB = (Buffer.byteLength(JSON.stringify(output)) / 1024 / 1024).toFixed(1);
|
||||
console.log(`Written ${entries.length} embeddings to ${OUTPUT_PATH} (${fileSizeMB}MB)`);
|
||||
|
||||
await embedder.dispose();
|
||||
}
|
||||
|
||||
main().catch(err => {
|
||||
console.error('Embedding generation failed:', err);
|
||||
process.exit(1);
|
||||
});
|
||||
180
scripts/import-bls-nutrition.ts
Normal file
180
scripts/import-bls-nutrition.ts
Normal file
@@ -0,0 +1,180 @@
|
||||
/**
|
||||
* Import BLS 4.0 (Bundeslebensmittelschlüssel) nutrition data from CSV.
|
||||
* Pre-convert the xlsx to CSV first (one-time):
|
||||
* node -e "const X=require('xlsx');const w=X.readFile('BLS_4_0_2025_DE/BLS_4_0_Daten_2025_DE.xlsx');
|
||||
* require('fs').writeFileSync('BLS_4_0_2025_DE/BLS_4_0_Daten_2025_DE.csv',X.utils.sheet_to_csv(w.Sheets[w.SheetNames[0]]))"
|
||||
*
|
||||
* Run: pnpm exec vite-node scripts/import-bls-nutrition.ts
|
||||
*/
|
||||
import { readFileSync, writeFileSync } from 'fs';
|
||||
import { resolve } from 'path';
|
||||
|
||||
/** Parse CSV handling quoted fields with commas */
|
||||
function parseCSV(text: string): string[][] {
|
||||
const rows: string[][] = [];
|
||||
let i = 0;
|
||||
while (i < text.length) {
|
||||
const row: string[] = [];
|
||||
while (i < text.length && text[i] !== '\n') {
|
||||
if (text[i] === '"') {
|
||||
i++; // skip opening quote
|
||||
let field = '';
|
||||
while (i < text.length) {
|
||||
if (text[i] === '"') {
|
||||
if (text[i + 1] === '"') { field += '"'; i += 2; }
|
||||
else { i++; break; }
|
||||
} else { field += text[i]; i++; }
|
||||
}
|
||||
row.push(field);
|
||||
if (text[i] === ',') i++;
|
||||
} else {
|
||||
const next = text.indexOf(',', i);
|
||||
const nl = text.indexOf('\n', i);
|
||||
const end = (next === -1 || (nl !== -1 && nl < next)) ? (nl === -1 ? text.length : nl) : next;
|
||||
row.push(text.substring(i, end));
|
||||
i = end;
|
||||
if (text[i] === ',') i++;
|
||||
}
|
||||
}
|
||||
if (text[i] === '\n') i++;
|
||||
if (row.length > 0) rows.push(row);
|
||||
}
|
||||
return rows;
|
||||
}
|
||||
|
||||
const BLS_CSV = resolve('BLS_4_0_2025_DE/BLS_4_0_Daten_2025_DE.csv');
|
||||
const OUTPUT_FILE = resolve('src/lib/data/blsDb.ts');
|
||||
|
||||
// BLS nutrient code → our per100g field name
|
||||
const NUTRIENT_MAP: Record<string, { field: string; divisor?: number }> = {
|
||||
ENERCC: { field: 'calories' },
|
||||
PROT625: { field: 'protein' },
|
||||
FAT: { field: 'fat' },
|
||||
FASAT: { field: 'saturatedFat' },
|
||||
CHO: { field: 'carbs' },
|
||||
FIBT: { field: 'fiber' },
|
||||
SUGAR: { field: 'sugars' },
|
||||
CA: { field: 'calcium' },
|
||||
FE: { field: 'iron' },
|
||||
MG: { field: 'magnesium' },
|
||||
P: { field: 'phosphorus' },
|
||||
K: { field: 'potassium' },
|
||||
NA: { field: 'sodium' },
|
||||
ZN: { field: 'zinc' },
|
||||
VITA: { field: 'vitaminA' },
|
||||
VITC: { field: 'vitaminC' },
|
||||
VITD: { field: 'vitaminD' },
|
||||
VITE: { field: 'vitaminE' },
|
||||
VITK: { field: 'vitaminK' },
|
||||
THIA: { field: 'thiamin' },
|
||||
RIBF: { field: 'riboflavin' },
|
||||
NIA: { field: 'niacin' },
|
||||
VITB6: { field: 'vitaminB6', divisor: 1000 }, // BLS: µg → mg
|
||||
VITB12: { field: 'vitaminB12' },
|
||||
FOL: { field: 'folate' },
|
||||
CHORL: { field: 'cholesterol' },
|
||||
// Amino acids (all g/100g)
|
||||
ILE: { field: 'isoleucine' },
|
||||
LEU: { field: 'leucine' },
|
||||
LYS: { field: 'lysine' },
|
||||
MET: { field: 'methionine' },
|
||||
PHE: { field: 'phenylalanine' },
|
||||
THR: { field: 'threonine' },
|
||||
TRP: { field: 'tryptophan' },
|
||||
VAL: { field: 'valine' },
|
||||
HIS: { field: 'histidine' },
|
||||
ALA: { field: 'alanine' },
|
||||
ARG: { field: 'arginine' },
|
||||
ASP: { field: 'asparticAcid' },
|
||||
CYSTE: { field: 'cysteine' },
|
||||
GLU: { field: 'glutamicAcid' },
|
||||
GLY: { field: 'glycine' },
|
||||
PRO: { field: 'proline' },
|
||||
SER: { field: 'serine' },
|
||||
TYR: { field: 'tyrosine' },
|
||||
};
|
||||
|
||||
// BLS code first letter → category (BLS 4.0 Hauptgruppen)
|
||||
const CATEGORY_MAP: Record<string, string> = {
|
||||
A: 'Getränke', B: 'Getreideprodukte', C: 'Getreide', D: 'Backwaren',
|
||||
E: 'Gemüse', F: 'Obst', G: 'Hülsenfrüchte',
|
||||
H: 'Gewürze und Kräuter', J: 'Fette und Öle', K: 'Milch und Milchprodukte',
|
||||
L: 'Eier', M: 'Fleisch', N: 'Wurstwaren', O: 'Wild', P: 'Geflügel',
|
||||
Q: 'Fisch und Meeresfrüchte', R: 'Süßwaren', S: 'Zucker und Honig',
|
||||
T: 'Gerichte und Rezepte', U: 'Pilze', V: 'Sonstiges', W: 'Algen',
|
||||
X: 'Fleischersatz', Y: 'Supplemente',
|
||||
};
|
||||
|
||||
async function main() {
|
||||
console.log('Reading BLS CSV...');
|
||||
const csvText = readFileSync(BLS_CSV, 'utf-8');
|
||||
const rows: string[][] = parseCSV(csvText);
|
||||
|
||||
const headers = rows[0];
|
||||
console.log(`Headers: ${headers.length} columns, ${rows.length - 1} data rows`);
|
||||
|
||||
// Build column index: BLS nutrient code → column index of the value column
|
||||
const codeToCol = new Map<string, number>();
|
||||
for (let c = 3; c < headers.length; c += 3) {
|
||||
const code = headers[c]?.split(' ')[0];
|
||||
if (code) codeToCol.set(code, c);
|
||||
}
|
||||
|
||||
const entries: any[] = [];
|
||||
|
||||
for (let r = 1; r < rows.length; r++) {
|
||||
const row = rows[r];
|
||||
const blsCode = row[0]?.trim();
|
||||
const nameDe = row[1]?.trim();
|
||||
const nameEn = row[2]?.trim() || '';
|
||||
|
||||
if (!blsCode || !nameDe) continue;
|
||||
|
||||
const category = CATEGORY_MAP[blsCode[0]] || 'Sonstiges';
|
||||
const per100g: Record<string, number> = {};
|
||||
|
||||
for (const [blsNutrientCode, mapping] of Object.entries(NUTRIENT_MAP)) {
|
||||
const col = codeToCol.get(blsNutrientCode);
|
||||
if (col === undefined) {
|
||||
per100g[mapping.field] = 0;
|
||||
continue;
|
||||
}
|
||||
let value = parseFloat(row[col] || '0');
|
||||
if (isNaN(value)) value = 0;
|
||||
if (mapping.divisor) value /= mapping.divisor;
|
||||
per100g[mapping.field] = Math.round(value * 1000) / 1000;
|
||||
}
|
||||
|
||||
entries.push({ blsCode, nameDe, nameEn, category, per100g });
|
||||
}
|
||||
|
||||
console.log(`Parsed ${entries.length} BLS entries`);
|
||||
|
||||
// Sample entries
|
||||
const sample = entries.slice(0, 3);
|
||||
for (const e of sample) {
|
||||
console.log(` ${e.blsCode} | ${e.nameDe} | ${e.per100g.calories} kcal | protein ${e.per100g.protein}g`);
|
||||
}
|
||||
|
||||
const output = `// Auto-generated from BLS 4.0 (Bundeslebensmittelschlüssel)
|
||||
// Generated: ${new Date().toISOString().split('T')[0]}
|
||||
// Do not edit manually — regenerate with: pnpm exec vite-node scripts/import-bls-nutrition.ts
|
||||
|
||||
import type { NutritionPer100g } from '$types/types';
|
||||
|
||||
export type BlsEntry = {
|
||||
blsCode: string;
|
||||
nameDe: string;
|
||||
nameEn: string;
|
||||
category: string;
|
||||
per100g: NutritionPer100g;
|
||||
};
|
||||
|
||||
export const BLS_DB: BlsEntry[] = ${JSON.stringify(entries, null, 0)};
|
||||
`;
|
||||
|
||||
writeFileSync(OUTPUT_FILE, output, 'utf-8');
|
||||
console.log(`Written ${OUTPUT_FILE} (${(output.length / 1024 / 1024).toFixed(1)}MB, ${entries.length} entries)`);
|
||||
}
|
||||
|
||||
main().catch(console.error);
|
||||
371
scripts/import-usda-nutrition.ts
Normal file
371
scripts/import-usda-nutrition.ts
Normal file
@@ -0,0 +1,371 @@
|
||||
/**
|
||||
* Imports USDA FoodData Central data (SR Legacy + Foundation Foods) and generates
|
||||
* a typed nutrition database for the recipe calorie calculator.
|
||||
*
|
||||
* Run with: pnpm exec vite-node scripts/import-usda-nutrition.ts
|
||||
*
|
||||
* Downloads bulk CSV data from USDA FDC, filters to relevant food categories,
|
||||
* extracts macro/micronutrient data per 100g, and outputs src/lib/data/nutritionDb.ts
|
||||
*/
|
||||
import { existsSync, mkdirSync, readFileSync, writeFileSync } from 'fs';
|
||||
import { resolve } from 'path';
|
||||
|
||||
const DATA_DIR = resolve('data/usda');
|
||||
const OUTPUT_PATH = resolve('src/lib/data/nutritionDb.ts');
|
||||
|
||||
// USDA FDC bulk download URLs
|
||||
const USDA_URLS = {
|
||||
srLegacy: 'https://fdc.nal.usda.gov/fdc-datasets/FoodData_Central_sr_legacy_food_csv_2018-04.zip',
|
||||
foundation: 'https://fdc.nal.usda.gov/fdc-datasets/FoodData_Central_foundation_food_csv_2024-10-31.zip',
|
||||
};
|
||||
|
||||
// Nutrient IDs we care about
|
||||
const NUTRIENT_IDS: Record<number, string> = {
|
||||
1008: 'calories',
|
||||
1003: 'protein',
|
||||
1004: 'fat',
|
||||
1258: 'saturatedFat',
|
||||
1005: 'carbs',
|
||||
1079: 'fiber',
|
||||
1063: 'sugars',
|
||||
// Minerals
|
||||
1087: 'calcium',
|
||||
1089: 'iron',
|
||||
1090: 'magnesium',
|
||||
1091: 'phosphorus',
|
||||
1092: 'potassium',
|
||||
1093: 'sodium',
|
||||
1095: 'zinc',
|
||||
// Vitamins
|
||||
1106: 'vitaminA', // RAE (mcg)
|
||||
1162: 'vitaminC',
|
||||
1114: 'vitaminD', // D2+D3 (mcg)
|
||||
1109: 'vitaminE',
|
||||
1185: 'vitaminK',
|
||||
1165: 'thiamin',
|
||||
1166: 'riboflavin',
|
||||
1167: 'niacin',
|
||||
1175: 'vitaminB6',
|
||||
1178: 'vitaminB12',
|
||||
1177: 'folate',
|
||||
// Other
|
||||
1253: 'cholesterol',
|
||||
// Amino acids (g/100g)
|
||||
1212: 'isoleucine',
|
||||
1213: 'leucine',
|
||||
1214: 'lysine',
|
||||
1215: 'methionine',
|
||||
1217: 'phenylalanine',
|
||||
1211: 'threonine',
|
||||
1210: 'tryptophan',
|
||||
1219: 'valine',
|
||||
1221: 'histidine',
|
||||
1222: 'alanine',
|
||||
1220: 'arginine',
|
||||
1223: 'asparticAcid',
|
||||
1216: 'cysteine',
|
||||
1224: 'glutamicAcid',
|
||||
1225: 'glycine',
|
||||
1226: 'proline',
|
||||
1227: 'serine',
|
||||
1218: 'tyrosine',
|
||||
};
|
||||
|
||||
// Food categories to include (SR Legacy food_category_id descriptions)
|
||||
const INCLUDED_CATEGORIES = new Set([
|
||||
'Dairy and Egg Products',
|
||||
'Spices and Herbs',
|
||||
'Baby Foods',
|
||||
'Fats and Oils',
|
||||
'Poultry Products',
|
||||
'Soups, Sauces, and Gravies',
|
||||
'Sausages and Luncheon Meats',
|
||||
'Breakfast Cereals',
|
||||
'Fruits and Fruit Juices',
|
||||
'Pork Products',
|
||||
'Vegetables and Vegetable Products',
|
||||
'Nut and Seed Products',
|
||||
'Beef Products',
|
||||
'Beverages',
|
||||
'Finfish and Shellfish Products',
|
||||
'Legumes and Legume Products',
|
||||
'Lamb, Veal, and Game Products',
|
||||
'Baked Products',
|
||||
'Sweets',
|
||||
'Cereal Grains and Pasta',
|
||||
'Snacks',
|
||||
'Restaurant Foods',
|
||||
]);
|
||||
|
||||
type NutrientData = Record<string, number>;
|
||||
|
||||
interface RawFood {
|
||||
fdcId: number;
|
||||
description: string;
|
||||
categoryId: number;
|
||||
category: string;
|
||||
}
|
||||
|
||||
interface Portion {
|
||||
description: string;
|
||||
grams: number;
|
||||
}
|
||||
|
||||
// Simple CSV line parser that handles quoted fields
|
||||
function parseCSVLine(line: string): string[] {
|
||||
const fields: string[] = [];
|
||||
let current = '';
|
||||
let inQuotes = false;
|
||||
|
||||
for (let i = 0; i < line.length; i++) {
|
||||
const ch = line[i];
|
||||
if (ch === '"') {
|
||||
if (inQuotes && i + 1 < line.length && line[i + 1] === '"') {
|
||||
current += '"';
|
||||
i++;
|
||||
} else {
|
||||
inQuotes = !inQuotes;
|
||||
}
|
||||
} else if (ch === ',' && !inQuotes) {
|
||||
fields.push(current);
|
||||
current = '';
|
||||
} else {
|
||||
current += ch;
|
||||
}
|
||||
}
|
||||
fields.push(current);
|
||||
return fields;
|
||||
}
|
||||
|
||||
async function readCSV(filePath: string): Promise<Record<string, string>[]> {
|
||||
if (!existsSync(filePath)) {
|
||||
console.warn(` File not found: ${filePath}`);
|
||||
return [];
|
||||
}
|
||||
|
||||
const content = readFileSync(filePath, 'utf-8');
|
||||
const lines = content.split('\n').filter(l => l.trim());
|
||||
if (lines.length === 0) return [];
|
||||
|
||||
const headers = parseCSVLine(lines[0]);
|
||||
const rows: Record<string, string>[] = [];
|
||||
|
||||
for (let i = 1; i < lines.length; i++) {
|
||||
const fields = parseCSVLine(lines[i]);
|
||||
const row: Record<string, string> = {};
|
||||
for (let j = 0; j < headers.length; j++) {
|
||||
row[headers[j]] = fields[j] || '';
|
||||
}
|
||||
rows.push(row);
|
||||
}
|
||||
|
||||
return rows;
|
||||
}
|
||||
|
||||
async function downloadAndExtract(url: string, targetDir: string): Promise<void> {
|
||||
const zipName = url.split('/').pop()!;
|
||||
const zipPath = resolve(DATA_DIR, zipName);
|
||||
|
||||
if (existsSync(targetDir) && readFileSync(resolve(targetDir, '.done'), 'utf-8').trim() === 'ok') {
|
||||
console.log(` Already extracted: ${targetDir}`);
|
||||
return;
|
||||
}
|
||||
|
||||
mkdirSync(targetDir, { recursive: true });
|
||||
|
||||
if (!existsSync(zipPath)) {
|
||||
console.log(` Downloading ${zipName}...`);
|
||||
const response = await fetch(url);
|
||||
if (!response.ok) throw new Error(`Download failed: ${response.status} ${response.statusText}`);
|
||||
|
||||
const buffer = Buffer.from(await response.arrayBuffer());
|
||||
writeFileSync(zipPath, buffer);
|
||||
console.log(` Downloaded ${(buffer.length / 1024 / 1024).toFixed(1)}MB`);
|
||||
}
|
||||
|
||||
console.log(` Extracting to ${targetDir}...`);
|
||||
const { execSync } = await import('child_process');
|
||||
execSync(`unzip -o -j "${zipPath}" -d "${targetDir}"`, { stdio: 'pipe' });
|
||||
writeFileSync(resolve(targetDir, '.done'), 'ok');
|
||||
}
|
||||
|
||||
async function importDataset(datasetDir: string, label: string) {
|
||||
console.log(`\nProcessing ${label}...`);
|
||||
|
||||
// Read category mapping
|
||||
const categoryRows = await readCSV(resolve(datasetDir, 'food_category.csv'));
|
||||
const categoryMap = new Map<string, string>();
|
||||
for (const row of categoryRows) {
|
||||
categoryMap.set(row['id'], row['description']);
|
||||
}
|
||||
|
||||
// Read foods
|
||||
const foodRows = await readCSV(resolve(datasetDir, 'food.csv'));
|
||||
const foods = new Map<number, RawFood>();
|
||||
|
||||
for (const row of foodRows) {
|
||||
const catId = parseInt(row['food_category_id'] || '0');
|
||||
const category = categoryMap.get(row['food_category_id']) || '';
|
||||
|
||||
if (!INCLUDED_CATEGORIES.has(category)) continue;
|
||||
|
||||
const fdcId = parseInt(row['fdc_id']);
|
||||
foods.set(fdcId, {
|
||||
fdcId,
|
||||
description: row['description'],
|
||||
categoryId: catId,
|
||||
category,
|
||||
});
|
||||
}
|
||||
console.log(` Found ${foods.size} foods in included categories`);
|
||||
|
||||
// Read nutrients
|
||||
const nutrientRows = await readCSV(resolve(datasetDir, 'food_nutrient.csv'));
|
||||
const nutrients = new Map<number, NutrientData>();
|
||||
|
||||
for (const row of nutrientRows) {
|
||||
const fdcId = parseInt(row['fdc_id']);
|
||||
if (!foods.has(fdcId)) continue;
|
||||
|
||||
const nutrientId = parseInt(row['nutrient_id']);
|
||||
const fieldName = NUTRIENT_IDS[nutrientId];
|
||||
if (!fieldName) continue;
|
||||
|
||||
if (!nutrients.has(fdcId)) nutrients.set(fdcId, {});
|
||||
const amount = parseFloat(row['amount'] || '0');
|
||||
if (!isNaN(amount)) {
|
||||
nutrients.get(fdcId)![fieldName] = amount;
|
||||
}
|
||||
}
|
||||
console.log(` Loaded nutrients for ${nutrients.size} foods`);
|
||||
|
||||
// Read portions
|
||||
const portionRows = await readCSV(resolve(datasetDir, 'food_portion.csv'));
|
||||
const portions = new Map<number, Portion[]>();
|
||||
|
||||
for (const row of portionRows) {
|
||||
const fdcId = parseInt(row['fdc_id']);
|
||||
if (!foods.has(fdcId)) continue;
|
||||
|
||||
const gramWeight = parseFloat(row['gram_weight'] || '0');
|
||||
if (!gramWeight || isNaN(gramWeight)) continue;
|
||||
|
||||
// Build description from amount + modifier/description
|
||||
const amount = parseFloat(row['amount'] || '1');
|
||||
const modifier = row['modifier'] || row['portion_description'] || '';
|
||||
const desc = modifier
|
||||
? (amount !== 1 ? `${amount} ${modifier}` : modifier)
|
||||
: `${amount} unit`;
|
||||
|
||||
if (!portions.has(fdcId)) portions.set(fdcId, []);
|
||||
portions.get(fdcId)!.push({ description: desc, grams: Math.round(gramWeight * 100) / 100 });
|
||||
}
|
||||
console.log(` Loaded portions for ${portions.size} foods`);
|
||||
|
||||
return { foods, nutrients, portions };
|
||||
}
|
||||
|
||||
function buildNutrientRecord(data: NutrientData | undefined): Record<string, number> {
|
||||
const allFields = Object.values(NUTRIENT_IDS);
|
||||
const result: Record<string, number> = {};
|
||||
for (const field of allFields) {
|
||||
result[field] = Math.round((data?.[field] || 0) * 100) / 100;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
async function main() {
|
||||
console.log('=== USDA Nutrition Database Import ===\n');
|
||||
|
||||
mkdirSync(DATA_DIR, { recursive: true });
|
||||
|
||||
// Download and extract datasets
|
||||
const srDir = resolve(DATA_DIR, 'sr_legacy');
|
||||
const foundationDir = resolve(DATA_DIR, 'foundation');
|
||||
|
||||
await downloadAndExtract(USDA_URLS.srLegacy, srDir);
|
||||
await downloadAndExtract(USDA_URLS.foundation, foundationDir);
|
||||
|
||||
// Import both datasets
|
||||
const sr = await importDataset(srDir, 'SR Legacy');
|
||||
const foundation = await importDataset(foundationDir, 'Foundation Foods');
|
||||
|
||||
// Merge: Foundation Foods takes priority (more detailed), SR Legacy fills gaps
|
||||
const merged = new Map<string, {
|
||||
fdcId: number;
|
||||
name: string;
|
||||
category: string;
|
||||
per100g: Record<string, number>;
|
||||
portions: Portion[];
|
||||
}>();
|
||||
|
||||
// Add SR Legacy first
|
||||
for (const [fdcId, food] of sr.foods) {
|
||||
const nutrientData = buildNutrientRecord(sr.nutrients.get(fdcId));
|
||||
// Skip entries with no nutrient data at all
|
||||
if (!sr.nutrients.has(fdcId)) continue;
|
||||
|
||||
merged.set(food.description.toLowerCase(), {
|
||||
fdcId,
|
||||
name: food.description,
|
||||
category: food.category,
|
||||
per100g: nutrientData,
|
||||
portions: sr.portions.get(fdcId) || [],
|
||||
});
|
||||
}
|
||||
|
||||
// Override with Foundation Foods where available
|
||||
for (const [fdcId, food] of foundation.foods) {
|
||||
const nutrientData = buildNutrientRecord(foundation.nutrients.get(fdcId));
|
||||
if (!foundation.nutrients.has(fdcId)) continue;
|
||||
|
||||
merged.set(food.description.toLowerCase(), {
|
||||
fdcId,
|
||||
name: food.description,
|
||||
category: food.category,
|
||||
per100g: nutrientData,
|
||||
portions: foundation.portions.get(fdcId) || [],
|
||||
});
|
||||
}
|
||||
|
||||
console.log(`\nMerged total: ${merged.size} unique foods`);
|
||||
|
||||
// Sort by name for stable output
|
||||
const entries = [...merged.values()].sort((a, b) => a.name.localeCompare(b.name));
|
||||
|
||||
// Generate TypeScript output
|
||||
const tsContent = `// Auto-generated from USDA FoodData Central (SR Legacy + Foundation Foods)
|
||||
// Generated: ${new Date().toISOString().split('T')[0]}
|
||||
// Do not edit manually — regenerate with: pnpm exec vite-node scripts/import-usda-nutrition.ts
|
||||
|
||||
import type { NutritionPer100g } from '$types/types';
|
||||
|
||||
export type NutritionEntry = {
|
||||
fdcId: number;
|
||||
name: string;
|
||||
category: string;
|
||||
per100g: NutritionPer100g;
|
||||
portions: { description: string; grams: number }[];
|
||||
};
|
||||
|
||||
export const NUTRITION_DB: NutritionEntry[] = ${JSON.stringify(entries, null, '\t')};
|
||||
`;
|
||||
|
||||
writeFileSync(OUTPUT_PATH, tsContent, 'utf-8');
|
||||
console.log(`\nWritten ${entries.length} entries to ${OUTPUT_PATH}`);
|
||||
|
||||
// Print category breakdown
|
||||
const categoryCounts = new Map<string, number>();
|
||||
for (const entry of entries) {
|
||||
categoryCounts.set(entry.category, (categoryCounts.get(entry.category) || 0) + 1);
|
||||
}
|
||||
console.log('\nCategory breakdown:');
|
||||
for (const [cat, count] of [...categoryCounts.entries()].sort((a, b) => b[1] - a[1])) {
|
||||
console.log(` ${cat}: ${count}`);
|
||||
}
|
||||
}
|
||||
|
||||
main().catch(err => {
|
||||
console.error('Import failed:', err);
|
||||
process.exit(1);
|
||||
});
|
||||
Reference in New Issue
Block a user