2026-04-17 15:04:05 +02:00
|
|
|
import { describe, it, expect } from 'vitest';
|
|
|
|
|
import { readFileSync } from 'node:fs';
|
|
|
|
|
import { dirname, join } from 'node:path';
|
|
|
|
|
import { fileURLToPath } from 'node:url';
|
2026-04-18 08:32:18 +02:00
|
|
|
import {
|
|
|
|
|
extractRecipeFromHtml,
|
|
|
|
|
hasRecipeMarkup
|
|
|
|
|
} from '../../src/lib/server/parsers/json-ld-recipe';
|
2026-04-17 15:04:05 +02:00
|
|
|
|
|
|
|
|
const here = dirname(fileURLToPath(import.meta.url));
|
|
|
|
|
|
|
|
|
|
function load(name: string): string {
|
|
|
|
|
return readFileSync(join(here, '../fixtures', name), 'utf8');
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
describe('extractRecipeFromHtml', () => {
|
|
|
|
|
it('extracts a recipe from Chefkoch HTML', () => {
|
|
|
|
|
const html = load('chefkoch-schupfnudeln.html');
|
|
|
|
|
const r = extractRecipeFromHtml(html);
|
|
|
|
|
expect(r).not.toBeNull();
|
|
|
|
|
expect(r!.title.toLowerCase()).toContain('schupfnudel');
|
|
|
|
|
expect(r!.ingredients.length).toBeGreaterThan(2);
|
|
|
|
|
expect(r!.steps.length).toBeGreaterThan(0);
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('extracts a recipe from Emmi kocht einfach HTML', () => {
|
|
|
|
|
const html = load('emmi-bolognese.html');
|
|
|
|
|
const r = extractRecipeFromHtml(html);
|
|
|
|
|
expect(r).not.toBeNull();
|
|
|
|
|
expect(r!.title.toLowerCase()).toContain('bolognese');
|
|
|
|
|
expect(r!.ingredients.length).toBeGreaterThan(0);
|
|
|
|
|
expect(r!.steps.length).toBeGreaterThan(0);
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('returns null when no Recipe JSON-LD present', () => {
|
|
|
|
|
const html = '<html><body><p>no recipe</p></body></html>';
|
|
|
|
|
expect(extractRecipeFromHtml(html)).toBeNull();
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('returns null when JSON-LD has only non-Recipe types', () => {
|
|
|
|
|
const html = `
|
|
|
|
|
<html><head>
|
|
|
|
|
<script type="application/ld+json">{"@context":"https://schema.org","@type":"Organization","name":"Foo"}</script>
|
|
|
|
|
</head></html>`;
|
|
|
|
|
expect(extractRecipeFromHtml(html)).toBeNull();
|
|
|
|
|
});
|
|
|
|
|
});
|
2026-04-18 08:32:18 +02:00
|
|
|
|
2026-04-18 08:52:00 +02:00
|
|
|
describe('extractRecipeFromHtml — Microdata fallback', () => {
|
|
|
|
|
it('extracts title, ingredients and HowToStep instructions', () => {
|
|
|
|
|
const html = `<!doctype html><html><body>
|
|
|
|
|
<article itemscope itemtype="https://schema.org/Recipe">
|
|
|
|
|
<h1 itemprop="name">Königsberger Klopse</h1>
|
|
|
|
|
<img itemprop="image" src="/img/klopse.jpg" />
|
|
|
|
|
<p itemprop="description">Klassische Königsberger Klopse.</p>
|
|
|
|
|
<meta itemprop="prepTime" content="PT20M" />
|
|
|
|
|
<meta itemprop="cookTime" content="PT25M" />
|
|
|
|
|
<span itemprop="recipeYield">4</span>
|
|
|
|
|
<span itemprop="recipeCuisine">Ostpreußisch</span>
|
|
|
|
|
<ul>
|
|
|
|
|
<li itemprop="recipeIngredient">500 g Hackfleisch gemischt</li>
|
|
|
|
|
<li itemprop="recipeIngredient">1 Zwiebel, fein gewürfelt</li>
|
|
|
|
|
<li itemprop="recipeIngredient">2 EL Kapern</li>
|
|
|
|
|
</ul>
|
|
|
|
|
<ol>
|
|
|
|
|
<li itemprop="recipeInstructions" itemscope itemtype="https://schema.org/HowToStep">
|
|
|
|
|
<span itemprop="text">Hackfleisch und Zwiebel vermengen.</span>
|
|
|
|
|
</li>
|
|
|
|
|
<li itemprop="recipeInstructions" itemscope itemtype="https://schema.org/HowToStep">
|
|
|
|
|
<span itemprop="text">Klopse formen und in Salzwasser garen.</span>
|
|
|
|
|
</li>
|
|
|
|
|
</ol>
|
|
|
|
|
</article>
|
|
|
|
|
</body></html>`;
|
|
|
|
|
const r = extractRecipeFromHtml(html);
|
|
|
|
|
expect(r).not.toBeNull();
|
|
|
|
|
expect(r!.title).toBe('Königsberger Klopse');
|
|
|
|
|
expect(r!.ingredients.length).toBe(3);
|
|
|
|
|
expect(r!.ingredients[0].raw_text).toContain('Hackfleisch');
|
|
|
|
|
expect(r!.steps.length).toBe(2);
|
|
|
|
|
expect(r!.steps[1].text).toContain('Klopse formen');
|
|
|
|
|
expect(r!.prep_time_min).toBe(20);
|
|
|
|
|
expect(r!.cook_time_min).toBe(25);
|
|
|
|
|
expect(r!.servings_default).toBe(4);
|
|
|
|
|
expect(r!.cuisine).toBe('Ostpreußisch');
|
|
|
|
|
expect(r!.image_path).toBe('/img/klopse.jpg');
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('handles plain-text recipeInstructions without HowToStep', () => {
|
|
|
|
|
const html = `<html><body>
|
|
|
|
|
<div itemscope itemtype="http://schema.org/Recipe">
|
|
|
|
|
<span itemprop="name">Test</span>
|
|
|
|
|
<span itemprop="recipeIngredient">1 Apfel</span>
|
|
|
|
|
<div itemprop="recipeInstructions">
|
|
|
|
|
<ol>
|
|
|
|
|
<li>Schälen.</li>
|
|
|
|
|
<li>Essen.</li>
|
|
|
|
|
</ol>
|
|
|
|
|
</div>
|
|
|
|
|
</div>
|
|
|
|
|
</body></html>`;
|
|
|
|
|
const r = extractRecipeFromHtml(html);
|
|
|
|
|
expect(r).not.toBeNull();
|
|
|
|
|
expect(r!.steps.length).toBe(2);
|
|
|
|
|
expect(r!.steps[0].text).toBe('Schälen.');
|
|
|
|
|
});
|
|
|
|
|
|
2026-04-18 09:03:50 +02:00
|
|
|
it('splits a single HowToStep containing "1.<br>2.<br>3." into separate steps', () => {
|
|
|
|
|
const html = `<html><body>
|
|
|
|
|
<div itemscope itemtype="https://schema.org/Recipe">
|
|
|
|
|
<span itemprop="name">Multi-step</span>
|
|
|
|
|
<span itemprop="recipeIngredient">x</span>
|
|
|
|
|
<div itemprop="recipeInstructions" itemscope itemtype="https://schema.org/HowToStep">
|
|
|
|
|
<p itemprop="text">1. Teig kneten.<br>2. Gehen lassen.<br>3. Backen.</p>
|
|
|
|
|
</div>
|
|
|
|
|
</div>
|
|
|
|
|
</body></html>`;
|
|
|
|
|
const r = extractRecipeFromHtml(html);
|
|
|
|
|
expect(r).not.toBeNull();
|
|
|
|
|
expect(r!.steps.length).toBe(3);
|
|
|
|
|
expect(r!.steps[0].text).toBe('Teig kneten.');
|
|
|
|
|
expect(r!.steps[1].text).toBe('Gehen lassen.');
|
|
|
|
|
expect(r!.steps[2].text).toBe('Backen.');
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('handles HowToSection wrapping multiple HowToStep itemListElements', () => {
|
|
|
|
|
const html = `<html><body>
|
|
|
|
|
<div itemscope itemtype="https://schema.org/Recipe">
|
|
|
|
|
<span itemprop="name">Sections</span>
|
|
|
|
|
<span itemprop="recipeIngredient">x</span>
|
|
|
|
|
<div itemprop="recipeInstructions" itemscope itemtype="https://schema.org/HowToSection">
|
|
|
|
|
<div itemprop="itemListElement" itemscope itemtype="https://schema.org/HowToStep">
|
|
|
|
|
<span itemprop="text">Erst schneiden.</span>
|
|
|
|
|
</div>
|
|
|
|
|
<div itemprop="itemListElement" itemscope itemtype="https://schema.org/HowToStep">
|
|
|
|
|
<span itemprop="text">Dann kochen.</span>
|
|
|
|
|
</div>
|
|
|
|
|
</div>
|
|
|
|
|
</div>
|
|
|
|
|
</body></html>`;
|
|
|
|
|
const r = extractRecipeFromHtml(html);
|
|
|
|
|
expect(r!.steps.length).toBe(2);
|
|
|
|
|
expect(r!.steps[0].text).toBe('Erst schneiden.');
|
|
|
|
|
expect(r!.steps[1].text).toBe('Dann kochen.');
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('ignores <img> alt/title content in step text', () => {
|
|
|
|
|
const html = `<html><body>
|
|
|
|
|
<div itemscope itemtype="https://schema.org/Recipe">
|
|
|
|
|
<span itemprop="name">WithIcon</span>
|
|
|
|
|
<span itemprop="recipeIngredient">x</span>
|
|
|
|
|
<div itemprop="recipeInstructions" itemscope itemtype="https://schema.org/HowToStep">
|
|
|
|
|
<span itemprop="text">Teig <img alt="Icon Teig kneten" src="/x.png"> verarbeiten.</span>
|
|
|
|
|
</div>
|
|
|
|
|
</div>
|
|
|
|
|
</body></html>`;
|
|
|
|
|
const r = extractRecipeFromHtml(html);
|
|
|
|
|
expect(r!.steps[0].text).not.toMatch(/Icon Teig kneten/);
|
|
|
|
|
expect(r!.steps[0].text).toMatch(/Teig.*verarbeiten/);
|
|
|
|
|
});
|
|
|
|
|
|
2026-04-18 08:52:00 +02:00
|
|
|
it('prefers JSON-LD when both are present', () => {
|
|
|
|
|
const html = `<html><head>
|
|
|
|
|
<script type="application/ld+json">${JSON.stringify({
|
|
|
|
|
'@type': 'Recipe',
|
|
|
|
|
name: 'From JSON-LD',
|
|
|
|
|
recipeIngredient: ['x'],
|
|
|
|
|
recipeInstructions: ['y']
|
|
|
|
|
})}</script>
|
|
|
|
|
</head><body>
|
|
|
|
|
<div itemscope itemtype="https://schema.org/Recipe">
|
|
|
|
|
<span itemprop="name">From Microdata</span>
|
|
|
|
|
</div>
|
|
|
|
|
</body></html>`;
|
|
|
|
|
const r = extractRecipeFromHtml(html);
|
|
|
|
|
expect(r?.title).toBe('From JSON-LD');
|
|
|
|
|
});
|
|
|
|
|
});
|
|
|
|
|
|
2026-04-18 08:32:18 +02:00
|
|
|
describe('hasRecipeMarkup', () => {
|
|
|
|
|
it('detects JSON-LD Recipe', () => {
|
|
|
|
|
const html = `<html><head>
|
|
|
|
|
<script type="application/ld+json">{"@type":"Recipe","name":"x"}</script>
|
|
|
|
|
</head></html>`;
|
|
|
|
|
expect(hasRecipeMarkup(html)).toBe(true);
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('detects schema.org/Recipe microdata', () => {
|
|
|
|
|
const html = `<html><body>
|
|
|
|
|
<div itemscope itemtype="https://schema.org/Recipe">
|
|
|
|
|
<span itemprop="name">Königsberger Klopse</span>
|
|
|
|
|
</div>
|
|
|
|
|
</body></html>`;
|
|
|
|
|
expect(hasRecipeMarkup(html)).toBe(true);
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('also detects http:// variant of schema.org/Recipe', () => {
|
|
|
|
|
const html = `<div itemscope itemtype="http://schema.org/Recipe"></div>`;
|
|
|
|
|
expect(hasRecipeMarkup(html)).toBe(true);
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('returns false for pages without any recipe markup', () => {
|
|
|
|
|
const html = '<html><body><p>nothing here</p></body></html>';
|
|
|
|
|
expect(hasRecipeMarkup(html)).toBe(false);
|
|
|
|
|
});
|
|
|
|
|
});
|