diff --git a/src/lib/server/parsers/json-ld-recipe.ts b/src/lib/server/parsers/json-ld-recipe.ts index 908ae8a..43aaeb9 100644 --- a/src/lib/server/parsers/json-ld-recipe.ts +++ b/src/lib/server/parsers/json-ld-recipe.ts @@ -125,9 +125,152 @@ export function hasRecipeJsonLd(html: string): boolean { return hasRecipeMarkup(html); } +function microdataValueOf(el: Element): string { + if (el.hasAttribute('content')) return (el.getAttribute('content') ?? '').trim(); + const tag = el.tagName.toLowerCase(); + if (tag === 'meta') return (el.getAttribute('content') ?? '').trim(); + if (tag === 'a' || tag === 'link' || tag === 'area') + return (el.getAttribute('href') ?? '').trim(); + if ( + tag === 'img' || + tag === 'source' || + tag === 'video' || + tag === 'audio' || + tag === 'embed' || + tag === 'iframe' || + tag === 'track' + ) + return (el.getAttribute('src') ?? '').trim(); + if (tag === 'object') return (el.getAttribute('data') ?? '').trim(); + if (tag === 'data' || tag === 'meter') + return (el.getAttribute('value') ?? '').trim(); + if (tag === 'time') + return (el.getAttribute('datetime') ?? el.textContent ?? '').trim(); + return (el.textContent ?? '').trim(); +} + +type MicroProps = Map; + +function gatherMicrodataProps(scope: Element): MicroProps { + // Alle itemprop-Descendants sammeln, dabei aber nicht in verschachtelte + // itemscopes einsteigen (sonst landen z.B. HowToStep.text im Haupt-Scope). + const map: MicroProps = new Map(); + function walk(el: Element) { + for (const child of Array.from(el.children) as Element[]) { + const hasProp = child.hasAttribute('itemprop'); + const hasScope = child.hasAttribute('itemscope'); + if (hasProp) { + const names = (child.getAttribute('itemprop') ?? '') + .split(/\s+/) + .filter(Boolean); + for (const name of names) { + const arr = map.get(name) ?? []; + arr.push(child); + map.set(name, arr); + } + } + if (!hasScope) walk(child); + } + } + walk(scope); + return map; +} + +function microText(map: MicroProps, name: string): string | null { + const els = map.get(name); + if (!els || els.length === 0) return null; + const v = microdataValueOf(els[0]); + return v || null; +} + +function microAllTexts(map: MicroProps, name: string): string[] { + const els = map.get(name) ?? []; + return els.map(microdataValueOf).filter((v) => v !== ''); +} + +function microSteps(scope: Element): Step[] { + const out: Step[] = []; + let pos = 1; + const nodes = Array.from(scope.querySelectorAll('[itemprop="recipeInstructions"]')); + for (const el of nodes) { + if (el.hasAttribute('itemscope')) { + const textEl = el.querySelector('[itemprop="text"]'); + const t = (textEl?.textContent ?? el.textContent ?? '').trim(); + if (t) out.push({ position: pos++, text: t }); + } else { + const lis = el.querySelectorAll('li'); + if (lis.length > 0) { + for (const li of Array.from(lis)) { + const t = (li.textContent ?? '').trim(); + if (t) out.push({ position: pos++, text: t }); + } + } else { + const t = (el.textContent ?? '').trim(); + if (t) out.push({ position: pos++, text: t }); + } + } + } + return out; +} + +export function extractRecipeFromMicrodata(html: string): Recipe | null { + let document: Document; + try { + ({ document } = parseHTML(html)); + } catch { + return null; + } + const scope = document.querySelector( + '[itemtype*="schema.org/Recipe" i]' + ); + if (!scope) return null; + const props = gatherMicrodataProps(scope); + + const title = microText(props, 'name'); + if (!title) return null; + + const ingredients = microAllTexts(props, 'recipeIngredient') + .map((raw, i) => parseIngredient(raw, i + 1)) + .filter((x): x is NonNullable => x !== null); + + const steps = microSteps(scope); + const prep = parseIso8601Duration(microText(props, 'prepTime') ?? undefined); + const cook = parseIso8601Duration(microText(props, 'cookTime') ?? undefined); + const total = parseIso8601Duration(microText(props, 'totalTime') ?? undefined); + + const tags = new Set([ + ...microAllTexts(props, 'recipeCategory'), + ...microAllTexts(props, 'recipeCuisine'), + ...microAllTexts(props, 'keywords') + ]); + + return { + id: null, + title, + description: microText(props, 'description'), + source_url: microText(props, 'url'), + source_domain: null, + image_path: microText(props, 'image'), + servings_default: toServings(microText(props, 'recipeYield')), + servings_unit: null, + prep_time_min: prep, + cook_time_min: cook, + total_time_min: total, + cuisine: microText(props, 'recipeCuisine'), + category: microText(props, 'recipeCategory'), + ingredients, + steps, + tags: [...tags] + }; +} + export function extractRecipeFromHtml(html: string): Recipe | null { const node = findRecipeNode(html); - if (!node) return null; + if (!node) { + // Fallback auf Microdata — rezeptwelt.de & andere SSR-Sites nutzen das + // anstatt application/ld+json. + return extractRecipeFromMicrodata(html); + } const title = toText(node.name) ?? ''; if (!title) return null; diff --git a/tests/unit/json-ld-recipe.test.ts b/tests/unit/json-ld-recipe.test.ts index f6f8240..f3443c8 100644 --- a/tests/unit/json-ld-recipe.test.ts +++ b/tests/unit/json-ld-recipe.test.ts @@ -46,6 +46,83 @@ describe('extractRecipeFromHtml', () => { }); }); +describe('extractRecipeFromHtml — Microdata fallback', () => { + it('extracts title, ingredients and HowToStep instructions', () => { + const html = ` +
+

Königsberger Klopse

+ +

Klassische Königsberger Klopse.

+ + + 4 + Ostpreußisch +
    +
  • 500 g Hackfleisch gemischt
  • +
  • 1 Zwiebel, fein gewürfelt
  • +
  • 2 EL Kapern
  • +
+
    +
  1. + Hackfleisch und Zwiebel vermengen. +
  2. +
  3. + Klopse formen und in Salzwasser garen. +
  4. +
+
+ `; + const r = extractRecipeFromHtml(html); + expect(r).not.toBeNull(); + expect(r!.title).toBe('Königsberger Klopse'); + expect(r!.ingredients.length).toBe(3); + expect(r!.ingredients[0].raw_text).toContain('Hackfleisch'); + expect(r!.steps.length).toBe(2); + expect(r!.steps[1].text).toContain('Klopse formen'); + expect(r!.prep_time_min).toBe(20); + expect(r!.cook_time_min).toBe(25); + expect(r!.servings_default).toBe(4); + expect(r!.cuisine).toBe('Ostpreußisch'); + expect(r!.image_path).toBe('/img/klopse.jpg'); + }); + + it('handles plain-text recipeInstructions without HowToStep', () => { + const html = ` +
+ Test + 1 Apfel +
+
    +
  1. Schälen.
  2. +
  3. Essen.
  4. +
+
+
+ `; + const r = extractRecipeFromHtml(html); + expect(r).not.toBeNull(); + expect(r!.steps.length).toBe(2); + expect(r!.steps[0].text).toBe('Schälen.'); + }); + + it('prefers JSON-LD when both are present', () => { + const html = ` + + +
+ From Microdata +
+ `; + const r = extractRecipeFromHtml(html); + expect(r?.title).toBe('From JSON-LD'); + }); +}); + describe('hasRecipeMarkup', () => { it('detects JSON-LD Recipe', () => { const html = `