Files
kochwas/src/lib/server/parsers/ingredient.ts
hsiegeln 72816d6b35 feat(schema): ingredient.section_heading (Migration 012 + Type)
Fuegt das nullable Feld section_heading zur ingredient-Tabelle hinzu
(Migration 012), erweitert den Ingredient-Typ und aktualisiert alle drei
Return-Stellen in parseIngredient. Downstream-Sites (repository, Editor,
Tests) bleiben rot – werden in Task 2+ behoben.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-19 14:49:42 +02:00

121 lines
3.7 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import type { Ingredient } from '$lib/types';
const UNITS = new Set([
'g',
'kg',
'ml',
'l',
'cl',
'dl',
'TL',
'EL',
'Prise',
'Pck.',
'Pkg',
'Becher',
'Stk',
'Stück',
'Bund',
'Tasse',
'Dose'
]);
const FRACTION_MAP: Record<string, number> = {
'1/2': 0.5,
'1/3': 1 / 3,
'2/3': 2 / 3,
'1/4': 0.25,
'3/4': 0.75
};
// Vulgar-Fraction-Codepoints — kommen in deutschsprachigen Rezept-Quellen
// regelmäßig vor (Chefkoch et al. liefern sie vereinzelt, mehr aber bei
// Apple's Food App, Fork etc.).
const UNICODE_FRACTION_MAP: Record<string, number> = {
'\u00BD': 0.5, // ½
'\u00BC': 0.25, // ¼
'\u00BE': 0.75, // ¾
'\u2150': 1 / 7,
'\u2151': 1 / 9,
'\u2152': 1 / 10,
'\u2153': 1 / 3, // ⅓
'\u2154': 2 / 3, // ⅔
'\u2155': 0.2, // ⅕
'\u2156': 0.4, // ⅖
'\u2157': 0.6, // ⅗
'\u2158': 0.8, // ⅘
'\u2159': 1 / 6, // ⅙
'\u215A': 5 / 6, // ⅚
'\u215B': 0.125, // ⅛
'\u215C': 0.375, // ⅜
'\u215D': 0.625, // ⅝
'\u215E': 0.875 // ⅞
};
// Mengen außerhalb dieses Bereichs sind fast sicher ein Parse-Müll
// (z. B. Microformat-Date oder Telefon-Nummer in einem JSON-LD-Quantity-
// Feld). Wir geben null zurück, raw_text bleibt für die UI erhalten.
const MAX_REASONABLE_QTY = 10000;
function clampQuantity(n: number | null): number | null {
if (n === null || !Number.isFinite(n)) return null;
if (n <= 0) return null;
if (n > MAX_REASONABLE_QTY) return null;
return n;
}
function parseQuantity(raw: string): number | null {
const trimmed = raw.trim();
if (FRACTION_MAP[trimmed] !== undefined) return FRACTION_MAP[trimmed];
const rangeMatch = /^(\d+[.,]?\d*)\s*[-]\s*\d+[.,]?\d*$/.exec(trimmed);
if (rangeMatch) {
return parseFloat(rangeMatch[1].replace(',', '.'));
}
const num = parseFloat(trimmed.replace(',', '.'));
return Number.isFinite(num) ? num : null;
}
// Splits "TL Salz" → unit "TL", name "Salz"; "Zitrone" → unit null, name "Zitrone".
function splitUnitAndName(rest: string): { unit: string | null; name: string } {
const trimmed = rest.trim();
const firstTokenMatch = /^(\S+)\s+(.+)$/.exec(trimmed);
if (firstTokenMatch && UNITS.has(firstTokenMatch[1])) {
return { unit: firstTokenMatch[1], name: firstTokenMatch[2].trim() };
}
return { unit: null, name: trimmed };
}
export function parseIngredient(raw: string, position = 0): Ingredient {
const rawText = raw.trim();
let working = rawText;
let note: string | null = null;
const noteMatch = /\(([^)]+)\)/.exec(working);
if (noteMatch) {
note = noteMatch[1].trim();
working = (
working.slice(0, noteMatch.index) + working.slice(noteMatch.index + noteMatch[0].length)
).trim();
}
// Unicode-Bruch am Anfang? Dann das eine Zeichen als Menge nehmen
// und den Rest wie üblich in Unit + Name aufteilen.
const firstChar = working.charAt(0);
if (UNICODE_FRACTION_MAP[firstChar] !== undefined) {
const tail = working.slice(1).trimStart();
if (tail.length > 0) {
const quantity = clampQuantity(UNICODE_FRACTION_MAP[firstChar]);
const { unit, name } = splitUnitAndName(tail);
return { position, quantity, unit, name, note, raw_text: rawText, section_heading: null };
}
}
const qtyPattern = /^((?:\d+[.,]?\d*(?:\s*[-]\s*\d+[.,]?\d*)?)|(?:\d+\/\d+))\s+(.+)$/;
const qtyMatch = qtyPattern.exec(working);
if (!qtyMatch) {
return { position, quantity: null, unit: null, name: working, note, raw_text: rawText, section_heading: null };
}
const quantity = clampQuantity(parseQuantity(qtyMatch[1]));
const { unit, name } = splitUnitAndName(qtyMatch[2]);
return { position, quantity, unit, name, note, raw_text: rawText, section_heading: null };
}