import { parseHTML } from 'linkedom'; import { parseIso8601Duration } from './iso8601-duration'; import { parseIngredient } from './ingredient'; import type { Recipe, Step } from '$lib/types'; type JsonLdNode = Record; function unwrapGraph(node: unknown): JsonLdNode[] { if (Array.isArray(node)) return node.flatMap(unwrapGraph); if (node && typeof node === 'object') { const obj = node as JsonLdNode; if (obj['@graph']) return unwrapGraph(obj['@graph']); return [obj]; } return []; } function isRecipeType(t: unknown): boolean { if (typeof t === 'string') return t === 'Recipe' || t.endsWith('/Recipe'); if (Array.isArray(t)) return t.some(isRecipeType); return false; } function toText(v: unknown): string | null { if (typeof v === 'string') return v.trim() || null; if (Array.isArray(v) && v.length > 0) return toText(v[0]); if (v && typeof v === 'object') { const o = v as JsonLdNode; if (typeof o.name === 'string') return o.name.trim(); if (typeof o.text === 'string') return o.text.trim(); } return null; } function toImageUrl(v: unknown): string | null { if (typeof v === 'string') return v; if (Array.isArray(v) && v.length > 0) return toImageUrl(v[0]); if (v && typeof v === 'object') { const o = v as JsonLdNode; if (typeof o.url === 'string') return o.url; } return null; } function toStringArray(v: unknown): string[] { if (Array.isArray(v)) return v.map((x) => toText(x)).filter((x): x is string => x !== null); if (typeof v === 'string') return v.split(',').map((s) => s.trim()).filter(Boolean); return []; } function toSteps(v: unknown): Step[] { const out: Step[] = []; const walk = (x: unknown): void => { if (Array.isArray(x)) { for (const item of x) walk(item); return; } if (typeof x === 'string') { if (x.trim()) out.push({ position: out.length + 1, text: x.trim() }); return; } if (x && typeof x === 'object') { const obj = x as JsonLdNode; if (obj['@type'] === 'HowToSection' && obj.itemListElement) { walk(obj.itemListElement); return; } if (obj['@type'] === 'HowToStep' && typeof obj.text === 'string') { if (obj.text.trim()) out.push({ position: out.length + 1, text: obj.text.trim() }); return; } if (typeof obj.text === 'string' && obj.text.trim()) { out.push({ position: out.length + 1, text: obj.text.trim() }); } } }; walk(v); return out; } function toServings(v: unknown): number | null { if (typeof v === 'number' && Number.isFinite(v)) return Math.trunc(v); if (typeof v === 'string') { const m = /(\d+)/.exec(v); if (m) return parseInt(m[1], 10); } if (Array.isArray(v) && v.length > 0) return toServings(v[0]); return null; } function findRecipeNode(html: string): JsonLdNode | null { const { document } = parseHTML(html); const scripts = document.querySelectorAll('script[type="application/ld+json"]'); for (const script of scripts) { const raw = script.textContent; if (!raw) continue; try { const parsed = JSON.parse(raw); for (const node of unwrapGraph(parsed)) { if (isRecipeType(node['@type'])) return node; } } catch { // malformed JSON-LD, keep scanning } } return null; } // Microdata-Alternative zum JSON-LD: viele SSR-Sites (inkl. rezeptwelt.de) // nutzen
statt application/ld+json. // Ein einfacher Regex reicht — wir brauchen nur das Flag, nicht die Daten. const MICRODATA_RECIPE = /itemtype\s*=\s*["']https?:\/\/schema\.org\/Recipe["']/i; export function hasRecipeMarkup(html: string): boolean { if (MICRODATA_RECIPE.test(html)) return true; try { return findRecipeNode(html) !== null; } catch { return false; } } // @deprecated use hasRecipeMarkup export function hasRecipeJsonLd(html: string): boolean { return hasRecipeMarkup(html); } function microdataValueOf(el: Element): string { if (el.hasAttribute('content')) return (el.getAttribute('content') ?? '').trim(); const tag = el.tagName.toLowerCase(); if (tag === 'meta') return (el.getAttribute('content') ?? '').trim(); if (tag === 'a' || tag === 'link' || tag === 'area') return (el.getAttribute('href') ?? '').trim(); if ( tag === 'img' || tag === 'source' || tag === 'video' || tag === 'audio' || tag === 'embed' || tag === 'iframe' || tag === 'track' ) return (el.getAttribute('src') ?? '').trim(); if (tag === 'object') return (el.getAttribute('data') ?? '').trim(); if (tag === 'data' || tag === 'meter') return (el.getAttribute('value') ?? '').trim(); if (tag === 'time') return (el.getAttribute('datetime') ?? el.textContent ?? '').trim(); return (el.textContent ?? '').trim(); } type MicroProps = Map; function gatherMicrodataProps(scope: Element): MicroProps { // Alle itemprop-Descendants sammeln, dabei aber nicht in verschachtelte // itemscopes einsteigen (sonst landen z.B. HowToStep.text im Haupt-Scope). const map: MicroProps = new Map(); function walk(el: Element) { for (const child of Array.from(el.children) as Element[]) { const hasProp = child.hasAttribute('itemprop'); const hasScope = child.hasAttribute('itemscope'); if (hasProp) { const names = (child.getAttribute('itemprop') ?? '') .split(/\s+/) .filter(Boolean); for (const name of names) { const arr = map.get(name) ?? []; arr.push(child); map.set(name, arr); } } if (!hasScope) walk(child); } } walk(scope); return map; } function microText(map: MicroProps, name: string): string | null { const els = map.get(name); if (!els || els.length === 0) return null; const v = microdataValueOf(els[0]); return v || null; } function microAllTexts(map: MicroProps, name: string): string[] { const els = map.get(name) ?? []; return els.map(microdataValueOf).filter((v) => v !== ''); } function microSteps(scope: Element): Step[] { const out: Step[] = []; let pos = 1; const nodes = Array.from(scope.querySelectorAll('[itemprop="recipeInstructions"]')); for (const el of nodes) { if (el.hasAttribute('itemscope')) { const textEl = el.querySelector('[itemprop="text"]'); const t = (textEl?.textContent ?? el.textContent ?? '').trim(); if (t) out.push({ position: pos++, text: t }); } else { const lis = el.querySelectorAll('li'); if (lis.length > 0) { for (const li of Array.from(lis)) { const t = (li.textContent ?? '').trim(); if (t) out.push({ position: pos++, text: t }); } } else { const t = (el.textContent ?? '').trim(); if (t) out.push({ position: pos++, text: t }); } } } return out; } export function extractRecipeFromMicrodata(html: string): Recipe | null { let document: Document; try { ({ document } = parseHTML(html)); } catch { return null; } const scope = document.querySelector( '[itemtype*="schema.org/Recipe" i]' ); if (!scope) return null; const props = gatherMicrodataProps(scope); const title = microText(props, 'name'); if (!title) return null; const ingredients = microAllTexts(props, 'recipeIngredient') .map((raw, i) => parseIngredient(raw, i + 1)) .filter((x): x is NonNullable => x !== null); const steps = microSteps(scope); const prep = parseIso8601Duration(microText(props, 'prepTime') ?? undefined); const cook = parseIso8601Duration(microText(props, 'cookTime') ?? undefined); const total = parseIso8601Duration(microText(props, 'totalTime') ?? undefined); const tags = new Set([ ...microAllTexts(props, 'recipeCategory'), ...microAllTexts(props, 'recipeCuisine'), ...microAllTexts(props, 'keywords') ]); return { id: null, title, description: microText(props, 'description'), source_url: microText(props, 'url'), source_domain: null, image_path: microText(props, 'image'), servings_default: toServings(microText(props, 'recipeYield')), servings_unit: null, prep_time_min: prep, cook_time_min: cook, total_time_min: total, cuisine: microText(props, 'recipeCuisine'), category: microText(props, 'recipeCategory'), ingredients, steps, tags: [...tags] }; } export function extractRecipeFromHtml(html: string): Recipe | null { const node = findRecipeNode(html); if (!node) { // Fallback auf Microdata — rezeptwelt.de & andere SSR-Sites nutzen das // anstatt application/ld+json. return extractRecipeFromMicrodata(html); } const title = toText(node.name) ?? ''; if (!title) return null; const ingredients = Array.isArray(node.recipeIngredient) ? (node.recipeIngredient as unknown[]) .map((x, i) => (typeof x === 'string' ? parseIngredient(x, i + 1) : null)) .filter((x): x is NonNullable => x !== null) : []; const steps = toSteps(node.recipeInstructions); const imageUrl = toImageUrl(node.image); const prep = parseIso8601Duration( typeof node.prepTime === 'string' ? node.prepTime : undefined ); const cook = parseIso8601Duration( typeof node.cookTime === 'string' ? node.cookTime : undefined ); const total = parseIso8601Duration( typeof node.totalTime === 'string' ? node.totalTime : undefined ); const tags = new Set([ ...toStringArray(node.recipeCategory), ...toStringArray(node.recipeCuisine), ...toStringArray(node.keywords) ]); return { id: null, title, description: toText(node.description), source_url: typeof node.url === 'string' ? node.url : null, source_domain: null, image_path: imageUrl, servings_default: toServings(node.recipeYield), servings_unit: null, prep_time_min: prep, cook_time_min: cook, total_time_min: total, cuisine: toText(node.recipeCuisine), category: toText(node.recipeCategory), ingredients, steps, tags: [...tags] }; }