nutrition: use SvelteKit read() for embedding files instead of fs
Some checks failed
CI / update (push) Has been cancelled

Replace fragile CWD-based readFileSync path resolution with SvelteKit's
read() + Vite ?url asset imports. This lets the build system manage the
embedding files as hashed immutable assets, fixing ENOENT errors in
production where the working directory didn't match expectations.
This commit is contained in:
2026-04-03 08:43:10 +02:00
parent f386032716
commit 8a14230d00
2 changed files with 9 additions and 15 deletions

View File

@@ -7,7 +7,6 @@
"dev": "vite dev", "dev": "vite dev",
"prebuild": "bash scripts/subset-emoji-font.sh && pnpm exec vite-node scripts/generate-mystery-verses.ts && pnpm exec vite-node scripts/download-models.ts", "prebuild": "bash scripts/subset-emoji-font.sh && pnpm exec vite-node scripts/generate-mystery-verses.ts && pnpm exec vite-node scripts/download-models.ts",
"build": "vite build", "build": "vite build",
"postbuild": "mkdir -p dist/data && cp src/lib/data/nutritionEmbeddings.json src/lib/data/blsEmbeddings.json dist/data/",
"preview": "vite preview", "preview": "vite preview",
"check": "svelte-kit sync && svelte-check --tsconfig ./tsconfig.json", "check": "svelte-kit sync && svelte-check --tsconfig ./tsconfig.json",
"check:watch": "svelte-kit sync && svelte-check --tsconfig ./tsconfig.json --watch", "check:watch": "svelte-kit sync && svelte-check --tsconfig ./tsconfig.json --watch",

View File

@@ -6,8 +6,7 @@
* USDA uses all-MiniLM-L6-v2 for English ingredient names. * USDA uses all-MiniLM-L6-v2 for English ingredient names.
*/ */
import { pipeline, type FeatureExtractionPipeline } from '@huggingface/transformers'; import { pipeline, type FeatureExtractionPipeline } from '@huggingface/transformers';
import { readFileSync, existsSync } from 'fs'; import { read } from '$app/server';
import { resolve } from 'path';
import { NUTRITION_DB, type NutritionEntry } from '$lib/data/nutritionDb'; import { NUTRITION_DB, type NutritionEntry } from '$lib/data/nutritionDb';
import { BLS_DB, type BlsEntry } from '$lib/data/blsDb'; import { BLS_DB, type BlsEntry } from '$lib/data/blsDb';
import { lookupAlias } from '$lib/data/ingredientAliases'; import { lookupAlias } from '$lib/data/ingredientAliases';
@@ -15,15 +14,11 @@ import { canonicalizeUnit, resolveGramsPerUnit } from '$lib/data/unitConversions
import { resolveDefaultAmount } from '$lib/data/defaultAmounts'; import { resolveDefaultAmount } from '$lib/data/defaultAmounts';
import type { NutritionMapping, NutritionPer100g } from '$types/types'; import type { NutritionMapping, NutritionPer100g } from '$types/types';
import { NutritionOverwrite } from '$models/NutritionOverwrite'; import { NutritionOverwrite } from '$models/NutritionOverwrite';
import usdaEmbeddingsUrl from '$lib/data/nutritionEmbeddings.json?url';
import blsEmbeddingsUrl from '$lib/data/blsEmbeddings.json?url';
const USDA_MODEL = 'Xenova/all-MiniLM-L6-v2'; const USDA_MODEL = 'Xenova/all-MiniLM-L6-v2';
const BLS_MODEL = 'Xenova/multilingual-e5-small'; const BLS_MODEL = 'Xenova/multilingual-e5-small';
// In dev CWD is project root; in production (adapter-node) CWD is dist/
const DATA_DIR = existsSync(resolve('src/lib/data/nutritionEmbeddings.json'))
? resolve('src/lib/data')
: resolve('data');
const USDA_EMBEDDINGS_PATH = `${DATA_DIR}/nutritionEmbeddings.json`;
const BLS_EMBEDDINGS_PATH = `${DATA_DIR}/blsEmbeddings.json`;
const CONFIDENCE_THRESHOLD = 0.45; const CONFIDENCE_THRESHOLD = 0.45;
// Lazy-loaded singletons — USDA // Lazy-loaded singletons — USDA
@@ -95,9 +90,9 @@ async function getUsdaEmbedder(): Promise<FeatureExtractionPipeline> {
return usdaEmbedder; return usdaEmbedder;
} }
function getUsdaEmbeddingIndex() { async function getUsdaEmbeddingIndex() {
if (!usdaEmbeddingIndex) { if (!usdaEmbeddingIndex) {
const raw = JSON.parse(readFileSync(USDA_EMBEDDINGS_PATH, 'utf-8')); const raw = await read(usdaEmbeddingsUrl).json();
usdaEmbeddingIndex = raw.entries; usdaEmbeddingIndex = raw.entries;
} }
return usdaEmbeddingIndex!; return usdaEmbeddingIndex!;
@@ -120,10 +115,10 @@ async function getBlsEmbedder(): Promise<FeatureExtractionPipeline> {
return blsEmbedder; return blsEmbedder;
} }
function getBlsEmbeddingIndex() { async function getBlsEmbeddingIndex() {
if (!blsEmbeddingIndex) { if (!blsEmbeddingIndex) {
try { try {
const raw = JSON.parse(readFileSync(BLS_EMBEDDINGS_PATH, 'utf-8')); const raw = await read(blsEmbeddingsUrl).json();
blsEmbeddingIndex = raw.entries; blsEmbeddingIndex = raw.entries;
} catch { } catch {
// BLS embeddings not yet generated — skip // BLS embeddings not yet generated — skip
@@ -317,7 +312,7 @@ function substringMatchScore(
async function blsEmbeddingMatch( async function blsEmbeddingMatch(
ingredientNameDe: string ingredientNameDe: string
): Promise<{ entry: BlsEntry; confidence: number } | null> { ): Promise<{ entry: BlsEntry; confidence: number } | null> {
const index = getBlsEmbeddingIndex(); const index = await getBlsEmbeddingIndex();
if (index.length === 0) return null; if (index.length === 0) return null;
const emb = await getBlsEmbedder(); const emb = await getBlsEmbedder();
@@ -439,7 +434,7 @@ async function usdaEmbeddingMatch(
ingredientNameEn: string ingredientNameEn: string
): Promise<{ entry: NutritionEntry; confidence: number } | null> { ): Promise<{ entry: NutritionEntry; confidence: number } | null> {
const emb = await getUsdaEmbedder(); const emb = await getUsdaEmbedder();
const index = getUsdaEmbeddingIndex(); const index = await getUsdaEmbeddingIndex();
const result = await emb(ingredientNameEn, { pooling: 'mean', normalize: true }); const result = await emb(ingredientNameEn, { pooling: 'mean', normalize: true });
const queryVector = Array.from(result.data as Float32Array); const queryVector = Array.from(result.data as Float32Array);