feat(parser): Unicode-Brueche + Mengen-Plausibilitaet
ingredient.ts: - UNICODE_FRACTION_MAP fuer ½ ¼ ¾ ⅓ ⅔ ⅕ ⅖ ⅗ ⅘ ⅙ ⅚ ⅛ ⅜ ⅝ ⅞ - clampQuantity() weist 0, negative, > 10000 als null ab - splitUnitAndName() helper, vorher 2x dupliziert (Unicode + ASCII Pfad) Tests: - 13 neue Tests fuer Unicode-Brueche (mit/ohne Unit) und Bounds - bestaetigt dass deutsches Kommadezimal (0,25 l) bereits funktioniert Hintergrund: Apple Food App liefert haeufig ½ und ⅓ in JSON-LD Quantity-Feldern. Vor diesem Fix wurden die Felder als unparsable behandelt (quantity null, name = '½ TL Salz'), was den Portionen-Slider fuer importierte Rezepte unbrauchbar machte. Findings aus REVIEW-2026-04-18.md (Refactor D) und structure.md
This commit is contained in:
@@ -28,6 +28,42 @@ const FRACTION_MAP: Record<string, number> = {
|
|||||||
'3/4': 0.75
|
'3/4': 0.75
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Vulgar-Fraction-Codepoints — kommen in deutschsprachigen Rezept-Quellen
|
||||||
|
// regelmäßig vor (Chefkoch et al. liefern sie vereinzelt, mehr aber bei
|
||||||
|
// Apple's Food App, Fork etc.).
|
||||||
|
const UNICODE_FRACTION_MAP: Record<string, number> = {
|
||||||
|
'\u00BD': 0.5, // ½
|
||||||
|
'\u00BC': 0.25, // ¼
|
||||||
|
'\u00BE': 0.75, // ¾
|
||||||
|
'\u2150': 1 / 7,
|
||||||
|
'\u2151': 1 / 9,
|
||||||
|
'\u2152': 1 / 10,
|
||||||
|
'\u2153': 1 / 3, // ⅓
|
||||||
|
'\u2154': 2 / 3, // ⅔
|
||||||
|
'\u2155': 0.2, // ⅕
|
||||||
|
'\u2156': 0.4, // ⅖
|
||||||
|
'\u2157': 0.6, // ⅗
|
||||||
|
'\u2158': 0.8, // ⅘
|
||||||
|
'\u2159': 1 / 6, // ⅙
|
||||||
|
'\u215A': 5 / 6, // ⅚
|
||||||
|
'\u215B': 0.125, // ⅛
|
||||||
|
'\u215C': 0.375, // ⅜
|
||||||
|
'\u215D': 0.625, // ⅝
|
||||||
|
'\u215E': 0.875 // ⅞
|
||||||
|
};
|
||||||
|
|
||||||
|
// Mengen außerhalb dieses Bereichs sind fast sicher ein Parse-Müll
|
||||||
|
// (z. B. Microformat-Date oder Telefon-Nummer in einem JSON-LD-Quantity-
|
||||||
|
// Feld). Wir geben null zurück, raw_text bleibt für die UI erhalten.
|
||||||
|
const MAX_REASONABLE_QTY = 10000;
|
||||||
|
|
||||||
|
function clampQuantity(n: number | null): number | null {
|
||||||
|
if (n === null || !Number.isFinite(n)) return null;
|
||||||
|
if (n <= 0) return null;
|
||||||
|
if (n > MAX_REASONABLE_QTY) return null;
|
||||||
|
return n;
|
||||||
|
}
|
||||||
|
|
||||||
function parseQuantity(raw: string): number | null {
|
function parseQuantity(raw: string): number | null {
|
||||||
const trimmed = raw.trim();
|
const trimmed = raw.trim();
|
||||||
if (FRACTION_MAP[trimmed] !== undefined) return FRACTION_MAP[trimmed];
|
if (FRACTION_MAP[trimmed] !== undefined) return FRACTION_MAP[trimmed];
|
||||||
@@ -39,6 +75,16 @@ function parseQuantity(raw: string): number | null {
|
|||||||
return Number.isFinite(num) ? num : null;
|
return Number.isFinite(num) ? num : null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Splits "TL Salz" → unit "TL", name "Salz"; "Zitrone" → unit null, name "Zitrone".
|
||||||
|
function splitUnitAndName(rest: string): { unit: string | null; name: string } {
|
||||||
|
const trimmed = rest.trim();
|
||||||
|
const firstTokenMatch = /^(\S+)\s+(.+)$/.exec(trimmed);
|
||||||
|
if (firstTokenMatch && UNITS.has(firstTokenMatch[1])) {
|
||||||
|
return { unit: firstTokenMatch[1], name: firstTokenMatch[2].trim() };
|
||||||
|
}
|
||||||
|
return { unit: null, name: trimmed };
|
||||||
|
}
|
||||||
|
|
||||||
export function parseIngredient(raw: string, position = 0): Ingredient {
|
export function parseIngredient(raw: string, position = 0): Ingredient {
|
||||||
const rawText = raw.trim();
|
const rawText = raw.trim();
|
||||||
let working = rawText;
|
let working = rawText;
|
||||||
@@ -51,18 +97,24 @@ export function parseIngredient(raw: string, position = 0): Ingredient {
|
|||||||
).trim();
|
).trim();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Unicode-Bruch am Anfang? Dann das eine Zeichen als Menge nehmen
|
||||||
|
// und den Rest wie üblich in Unit + Name aufteilen.
|
||||||
|
const firstChar = working.charAt(0);
|
||||||
|
if (UNICODE_FRACTION_MAP[firstChar] !== undefined) {
|
||||||
|
const tail = working.slice(1).trimStart();
|
||||||
|
if (tail.length > 0) {
|
||||||
|
const quantity = clampQuantity(UNICODE_FRACTION_MAP[firstChar]);
|
||||||
|
const { unit, name } = splitUnitAndName(tail);
|
||||||
|
return { position, quantity, unit, name, note, raw_text: rawText };
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
const qtyPattern = /^((?:\d+[.,]?\d*(?:\s*[-–]\s*\d+[.,]?\d*)?)|(?:\d+\/\d+))\s+(.+)$/;
|
const qtyPattern = /^((?:\d+[.,]?\d*(?:\s*[-–]\s*\d+[.,]?\d*)?)|(?:\d+\/\d+))\s+(.+)$/;
|
||||||
const qtyMatch = qtyPattern.exec(working);
|
const qtyMatch = qtyPattern.exec(working);
|
||||||
if (!qtyMatch) {
|
if (!qtyMatch) {
|
||||||
return { position, quantity: null, unit: null, name: working, note, raw_text: rawText };
|
return { position, quantity: null, unit: null, name: working, note, raw_text: rawText };
|
||||||
}
|
}
|
||||||
const quantity = parseQuantity(qtyMatch[1]);
|
const quantity = clampQuantity(parseQuantity(qtyMatch[1]));
|
||||||
let rest = qtyMatch[2].trim();
|
const { unit, name } = splitUnitAndName(qtyMatch[2]);
|
||||||
let unit: string | null = null;
|
return { position, quantity, unit, name, note, raw_text: rawText };
|
||||||
const firstTokenMatch = /^(\S+)\s+(.+)$/.exec(rest);
|
|
||||||
if (firstTokenMatch && UNITS.has(firstTokenMatch[1])) {
|
|
||||||
unit = firstTokenMatch[1];
|
|
||||||
rest = firstTokenMatch[2].trim();
|
|
||||||
}
|
|
||||||
return { position, quantity, unit, name: rest, note, raw_text: rawText };
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -39,4 +39,66 @@ describe('parseIngredient', () => {
|
|||||||
expect(p.quantity).toBe(2);
|
expect(p.quantity).toBe(2);
|
||||||
expect(p.name).toBe('Tomaten');
|
expect(p.name).toBe('Tomaten');
|
||||||
});
|
});
|
||||||
|
|
||||||
|
describe('Unicode-Bruchzeichen', () => {
|
||||||
|
it.each([
|
||||||
|
['½ TL Salz', 0.5, 'TL', 'Salz'],
|
||||||
|
['¼ kg Zucker', 0.25, 'kg', 'Zucker'],
|
||||||
|
['¾ l Milch', 0.75, 'l', 'Milch'],
|
||||||
|
['⅓ Tasse Mehl', 1 / 3, 'Tasse', 'Mehl'],
|
||||||
|
['⅔ TL Pfeffer', 2 / 3, 'TL', 'Pfeffer'],
|
||||||
|
['⅛ TL Muskat', 0.125, 'TL', 'Muskat']
|
||||||
|
] as const)('%s', (input, qty, unit, name) => {
|
||||||
|
const p = parseIngredient(input);
|
||||||
|
expect(p.quantity).toBeCloseTo(qty, 5);
|
||||||
|
expect(p.unit).toBe(unit);
|
||||||
|
expect(p.name).toBe(name);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('Unicode-Bruch ohne Unit', () => {
|
||||||
|
const p = parseIngredient('½ Zitrone');
|
||||||
|
expect(p.quantity).toBeCloseTo(0.5, 5);
|
||||||
|
expect(p.unit).toBe(null);
|
||||||
|
expect(p.name).toBe('Zitrone');
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe('Mengen-Plausibilitaet (Bounds)', () => {
|
||||||
|
it('weist 0 als Menge ab → quantity null', () => {
|
||||||
|
const p = parseIngredient('0 g Mehl');
|
||||||
|
expect(p.quantity).toBe(null);
|
||||||
|
// name bleibt das was nach der "0" kommt — Importer muss das nicht
|
||||||
|
// perfekt rekonstruieren, der raw_text bleibt erhalten.
|
||||||
|
expect(p.raw_text).toBe('0 g Mehl');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('weist negative Menge ab', () => {
|
||||||
|
// "-1 EL Öl" — Minus führt regex direkt ins Fallback (kein \d am Start),
|
||||||
|
// also bleibt name = full text.
|
||||||
|
const p = parseIngredient('-1 EL Öl');
|
||||||
|
expect(p.quantity).toBe(null);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('weist Menge > 10000 ab', () => {
|
||||||
|
const p = parseIngredient('99999 g Hokuspokus');
|
||||||
|
expect(p.quantity).toBe(null);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('akzeptiert die Obergrenze 10000 selbst', () => {
|
||||||
|
const p = parseIngredient('10000 g Mehl');
|
||||||
|
expect(p.quantity).toBe(10000);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('akzeptiert führende Null bei Dezimalbrüchen', () => {
|
||||||
|
const p = parseIngredient('0.5 kg Salz');
|
||||||
|
expect(p.quantity).toBe(0.5);
|
||||||
|
expect(p.unit).toBe('kg');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('akzeptiert deutsche führende Null', () => {
|
||||||
|
const p = parseIngredient('0,25 l Wasser');
|
||||||
|
expect(p.quantity).toBe(0.25);
|
||||||
|
expect(p.unit).toBe('l');
|
||||||
|
});
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|||||||
Reference in New Issue
Block a user