import { parseHTML } from 'linkedom'; import { parseIso8601Duration } from './iso8601-duration'; import { parseIngredient } from './ingredient'; import type { Recipe, Step } from '$lib/types'; type JsonLdNode = Record; function unwrapGraph(node: unknown): JsonLdNode[] { if (Array.isArray(node)) return node.flatMap(unwrapGraph); if (node && typeof node === 'object') { const obj = node as JsonLdNode; if (obj['@graph']) return unwrapGraph(obj['@graph']); return [obj]; } return []; } function isRecipeType(t: unknown): boolean { if (typeof t === 'string') return t === 'Recipe' || t.endsWith('/Recipe'); if (Array.isArray(t)) return t.some(isRecipeType); return false; } function toText(v: unknown): string | null { if (typeof v === 'string') return v.trim() || null; if (Array.isArray(v) && v.length > 0) return toText(v[0]); if (v && typeof v === 'object') { const o = v as JsonLdNode; if (typeof o.name === 'string') return o.name.trim(); if (typeof o.text === 'string') return o.text.trim(); } return null; } function toImageUrl(v: unknown): string | null { if (typeof v === 'string') return v; if (Array.isArray(v) && v.length > 0) return toImageUrl(v[0]); if (v && typeof v === 'object') { const o = v as JsonLdNode; if (typeof o.url === 'string') return o.url; } return null; } function toStringArray(v: unknown): string[] { if (Array.isArray(v)) return v.map((x) => toText(x)).filter((x): x is string => x !== null); if (typeof v === 'string') return v.split(',').map((s) => s.trim()).filter(Boolean); return []; } function toSteps(v: unknown): Step[] { const out: Step[] = []; const walk = (x: unknown): void => { if (Array.isArray(x)) { for (const item of x) walk(item); return; } if (typeof x === 'string') { if (x.trim()) out.push({ position: out.length + 1, text: x.trim() }); return; } if (x && typeof x === 'object') { const obj = x as JsonLdNode; if (obj['@type'] === 'HowToSection' && obj.itemListElement) { walk(obj.itemListElement); return; } if (obj['@type'] === 'HowToStep' && typeof obj.text === 'string') { if (obj.text.trim()) out.push({ position: out.length + 1, text: obj.text.trim() }); return; } if (typeof obj.text === 'string' && obj.text.trim()) { out.push({ position: out.length + 1, text: obj.text.trim() }); } } }; walk(v); return out; } function toServings(v: unknown): number | null { if (typeof v === 'number' && Number.isFinite(v)) return Math.trunc(v); if (typeof v === 'string') { const m = /(\d+)/.exec(v); if (m) return parseInt(m[1], 10); } if (Array.isArray(v) && v.length > 0) return toServings(v[0]); return null; } function findRecipeNode(html: string): JsonLdNode | null { const { document } = parseHTML(html); const scripts = document.querySelectorAll('script[type="application/ld+json"]'); for (const script of scripts) { const raw = script.textContent; if (!raw) continue; try { const parsed = JSON.parse(raw); for (const node of unwrapGraph(parsed)) { if (isRecipeType(node['@type'])) return node; } } catch { // malformed JSON-LD, keep scanning } } return null; } export function extractRecipeFromHtml(html: string): Recipe | null { const node = findRecipeNode(html); if (!node) return null; const title = toText(node.name) ?? ''; if (!title) return null; const ingredients = Array.isArray(node.recipeIngredient) ? (node.recipeIngredient as unknown[]) .map((x, i) => (typeof x === 'string' ? parseIngredient(x, i + 1) : null)) .filter((x): x is NonNullable => x !== null) : []; const steps = toSteps(node.recipeInstructions); const imageUrl = toImageUrl(node.image); const prep = parseIso8601Duration( typeof node.prepTime === 'string' ? node.prepTime : undefined ); const cook = parseIso8601Duration( typeof node.cookTime === 'string' ? node.cookTime : undefined ); const total = parseIso8601Duration( typeof node.totalTime === 'string' ? node.totalTime : undefined ); const tags = new Set([ ...toStringArray(node.recipeCategory), ...toStringArray(node.recipeCuisine), ...toStringArray(node.keywords) ]); return { id: null, title, description: toText(node.description), source_url: typeof node.url === 'string' ? node.url : null, source_domain: null, image_path: imageUrl, servings_default: toServings(node.recipeYield), servings_unit: null, prep_time_min: prep, cook_time_min: cook, total_time_min: total, cuisine: toText(node.recipeCuisine), category: toText(node.recipeCategory), ingredients, steps, tags: [...tags] }; }