import { parseHTML } from 'linkedom'; import { parseIso8601Duration } from './iso8601-duration'; import { parseIngredient } from './ingredient'; import type { Recipe, Step } from '$lib/types'; type JsonLdNode = Record; function unwrapGraph(node: unknown): JsonLdNode[] { if (Array.isArray(node)) return node.flatMap(unwrapGraph); if (node && typeof node === 'object') { const obj = node as JsonLdNode; if (obj['@graph']) return unwrapGraph(obj['@graph']); return [obj]; } return []; } function isRecipeType(t: unknown): boolean { if (typeof t === 'string') return t === 'Recipe' || t.endsWith('/Recipe'); if (Array.isArray(t)) return t.some(isRecipeType); return false; } function toText(v: unknown): string | null { if (typeof v === 'string') return v.trim() || null; if (Array.isArray(v) && v.length > 0) return toText(v[0]); if (v && typeof v === 'object') { const o = v as JsonLdNode; if (typeof o.name === 'string') return o.name.trim(); if (typeof o.text === 'string') return o.text.trim(); } return null; } function toImageUrl(v: unknown): string | null { if (typeof v === 'string') return v; if (Array.isArray(v) && v.length > 0) return toImageUrl(v[0]); if (v && typeof v === 'object') { const o = v as JsonLdNode; if (typeof o.url === 'string') return o.url; } return null; } function toStringArray(v: unknown): string[] { if (Array.isArray(v)) return v.map((x) => toText(x)).filter((x): x is string => x !== null); if (typeof v === 'string') return v.split(',').map((s) => s.trim()).filter(Boolean); return []; } function toSteps(v: unknown): Step[] { const out: Step[] = []; const walk = (x: unknown): void => { if (Array.isArray(x)) { for (const item of x) walk(item); return; } if (typeof x === 'string') { if (x.trim()) out.push({ position: out.length + 1, text: x.trim() }); return; } if (x && typeof x === 'object') { const obj = x as JsonLdNode; if (obj['@type'] === 'HowToSection' && obj.itemListElement) { walk(obj.itemListElement); return; } if (obj['@type'] === 'HowToStep' && typeof obj.text === 'string') { if (obj.text.trim()) out.push({ position: out.length + 1, text: obj.text.trim() }); return; } if (typeof obj.text === 'string' && obj.text.trim()) { out.push({ position: out.length + 1, text: obj.text.trim() }); } } }; walk(v); return out; } function toServings(v: unknown): number | null { if (typeof v === 'number' && Number.isFinite(v)) return Math.trunc(v); if (typeof v === 'string') { const m = /(\d+)/.exec(v); if (m) return parseInt(m[1], 10); } if (Array.isArray(v) && v.length > 0) return toServings(v[0]); return null; } function findRecipeNode(html: string): JsonLdNode | null { const { document } = parseHTML(html); const scripts = document.querySelectorAll('script[type="application/ld+json"]'); for (const script of scripts) { const raw = script.textContent; if (!raw) continue; try { const parsed = JSON.parse(raw); for (const node of unwrapGraph(parsed)) { if (isRecipeType(node['@type'])) return node; } } catch { // malformed JSON-LD, keep scanning } } return null; } // Microdata-Alternative zum JSON-LD: viele SSR-Sites (inkl. rezeptwelt.de) // nutzen
statt application/ld+json. // Ein einfacher Regex reicht — wir brauchen nur das Flag, nicht die Daten. const MICRODATA_RECIPE = /itemtype\s*=\s*["']https?:\/\/schema\.org\/Recipe["']/i; export function hasRecipeMarkup(html: string): boolean { if (MICRODATA_RECIPE.test(html)) return true; try { return findRecipeNode(html) !== null; } catch { return false; } } // @deprecated use hasRecipeMarkup export function hasRecipeJsonLd(html: string): boolean { return hasRecipeMarkup(html); } function microdataValueOf(el: Element): string { if (el.hasAttribute('content')) return (el.getAttribute('content') ?? '').trim(); const tag = el.tagName.toLowerCase(); if (tag === 'meta') return (el.getAttribute('content') ?? '').trim(); if (tag === 'a' || tag === 'link' || tag === 'area') return (el.getAttribute('href') ?? '').trim(); if ( tag === 'img' || tag === 'source' || tag === 'video' || tag === 'audio' || tag === 'embed' || tag === 'iframe' || tag === 'track' ) return (el.getAttribute('src') ?? '').trim(); if (tag === 'object') return (el.getAttribute('data') ?? '').trim(); if (tag === 'data' || tag === 'meter') return (el.getAttribute('value') ?? '').trim(); if (tag === 'time') return (el.getAttribute('datetime') ?? el.textContent ?? '').trim(); return (el.textContent ?? '').trim(); } type MicroProps = Map; function gatherMicrodataProps(scope: Element): MicroProps { // Alle itemprop-Descendants sammeln, dabei aber nicht in verschachtelte // itemscopes einsteigen (sonst landen z.B. HowToStep.text im Haupt-Scope). const map: MicroProps = new Map(); function walk(el: Element) { for (const child of Array.from(el.children) as Element[]) { const hasProp = child.hasAttribute('itemprop'); const hasScope = child.hasAttribute('itemscope'); if (hasProp) { const names = (child.getAttribute('itemprop') ?? '') .split(/\s+/) .filter(Boolean); for (const name of names) { const arr = map.get(name) ?? []; arr.push(child); map.set(name, arr); } } if (!hasScope) walk(child); } } walk(scope); return map; } function microText(map: MicroProps, name: string): string | null { const els = map.get(name); if (!els || els.length === 0) return null; const v = microdataValueOf(els[0]); return v || null; } function microAllTexts(map: MicroProps, name: string): string[] { const els = map.get(name) ?? []; return els.map(microdataValueOf).filter((v) => v !== ''); } // Rausholen von Text mit erhaltenen Zeilenumbrüchen —
→ \n, Block- // Elemente (

,

  • …) bekommen ebenfalls Newline-Grenzen. ,