import type { Ingredient } from '$lib/types'; const UNITS = new Set([ 'g', 'kg', 'ml', 'l', 'cl', 'dl', 'TL', 'EL', 'Prise', 'Pck.', 'Pkg', 'Becher', 'Stk', 'Stück', 'Bund', 'Tasse', 'Dose' ]); const FRACTION_MAP: Record = { '1/2': 0.5, '1/3': 1 / 3, '2/3': 2 / 3, '1/4': 0.25, '3/4': 0.75 }; // Vulgar-Fraction-Codepoints — kommen in deutschsprachigen Rezept-Quellen // regelmäßig vor (Chefkoch et al. liefern sie vereinzelt, mehr aber bei // Apple's Food App, Fork etc.). const UNICODE_FRACTION_MAP: Record = { '\u00BD': 0.5, // ½ '\u00BC': 0.25, // ¼ '\u00BE': 0.75, // ¾ '\u2150': 1 / 7, '\u2151': 1 / 9, '\u2152': 1 / 10, '\u2153': 1 / 3, // ⅓ '\u2154': 2 / 3, // ⅔ '\u2155': 0.2, // ⅕ '\u2156': 0.4, // ⅖ '\u2157': 0.6, // ⅗ '\u2158': 0.8, // ⅘ '\u2159': 1 / 6, // ⅙ '\u215A': 5 / 6, // ⅚ '\u215B': 0.125, // ⅛ '\u215C': 0.375, // ⅜ '\u215D': 0.625, // ⅝ '\u215E': 0.875 // ⅞ }; // Mengen außerhalb dieses Bereichs sind fast sicher ein Parse-Müll // (z. B. Microformat-Date oder Telefon-Nummer in einem JSON-LD-Quantity- // Feld). Wir geben null zurück, raw_text bleibt für die UI erhalten. const MAX_REASONABLE_QTY = 10000; function clampQuantity(n: number | null): number | null { if (n === null || !Number.isFinite(n)) return null; if (n <= 0) return null; if (n > MAX_REASONABLE_QTY) return null; return n; } function parseQuantity(raw: string): number | null { const trimmed = raw.trim(); if (FRACTION_MAP[trimmed] !== undefined) return FRACTION_MAP[trimmed]; const rangeMatch = /^(\d+[.,]?\d*)\s*[-–]\s*\d+[.,]?\d*$/.exec(trimmed); if (rangeMatch) { return parseFloat(rangeMatch[1].replace(',', '.')); } const num = parseFloat(trimmed.replace(',', '.')); return Number.isFinite(num) ? num : null; } // Splits "TL Salz" → unit "TL", name "Salz"; "Zitrone" → unit null, name "Zitrone". function splitUnitAndName(rest: string): { unit: string | null; name: string } { const trimmed = rest.trim(); const firstTokenMatch = /^(\S+)\s+(.+)$/.exec(trimmed); if (firstTokenMatch && UNITS.has(firstTokenMatch[1])) { return { unit: firstTokenMatch[1], name: firstTokenMatch[2].trim() }; } return { unit: null, name: trimmed }; } export function parseIngredient(raw: string, position = 0): Ingredient { const rawText = raw.trim(); let working = rawText; let note: string | null = null; const noteMatch = /\(([^)]+)\)/.exec(working); if (noteMatch) { note = noteMatch[1].trim(); working = ( working.slice(0, noteMatch.index) + working.slice(noteMatch.index + noteMatch[0].length) ).trim(); } // Unicode-Bruch am Anfang? Dann das eine Zeichen als Menge nehmen // und den Rest wie üblich in Unit + Name aufteilen. const firstChar = working.charAt(0); if (UNICODE_FRACTION_MAP[firstChar] !== undefined) { const tail = working.slice(1).trimStart(); if (tail.length > 0) { const quantity = clampQuantity(UNICODE_FRACTION_MAP[firstChar]); const { unit, name } = splitUnitAndName(tail); return { position, quantity, unit, name, note, raw_text: rawText, section_heading: null }; } } const qtyPattern = /^((?:\d+[.,]?\d*(?:\s*[-–]\s*\d+[.,]?\d*)?)|(?:\d+\/\d+))\s+(.+)$/; const qtyMatch = qtyPattern.exec(working); if (!qtyMatch) { return { position, quantity: null, unit: null, name: working, note, raw_text: rawText, section_heading: null }; } const quantity = clampQuantity(parseQuantity(qtyMatch[1])); const { unit, name } = splitUnitAndName(qtyMatch[2]); return { position, quantity, unit, name, note, raw_text: rawText, section_heading: null }; }