feat(parser): add JSON-LD schema.org/Recipe extractor

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-04-17 15:04:05 +02:00
parent 789af122f4
commit 2f3248c9a3
4 changed files with 2266 additions and 0 deletions

View File

@@ -0,0 +1,44 @@
import { describe, it, expect } from 'vitest';
import { readFileSync } from 'node:fs';
import { dirname, join } from 'node:path';
import { fileURLToPath } from 'node:url';
import { extractRecipeFromHtml } from '../../src/lib/server/parsers/json-ld-recipe';
const here = dirname(fileURLToPath(import.meta.url));
function load(name: string): string {
return readFileSync(join(here, '../fixtures', name), 'utf8');
}
describe('extractRecipeFromHtml', () => {
it('extracts a recipe from Chefkoch HTML', () => {
const html = load('chefkoch-schupfnudeln.html');
const r = extractRecipeFromHtml(html);
expect(r).not.toBeNull();
expect(r!.title.toLowerCase()).toContain('schupfnudel');
expect(r!.ingredients.length).toBeGreaterThan(2);
expect(r!.steps.length).toBeGreaterThan(0);
});
it('extracts a recipe from Emmi kocht einfach HTML', () => {
const html = load('emmi-bolognese.html');
const r = extractRecipeFromHtml(html);
expect(r).not.toBeNull();
expect(r!.title.toLowerCase()).toContain('bolognese');
expect(r!.ingredients.length).toBeGreaterThan(0);
expect(r!.steps.length).toBeGreaterThan(0);
});
it('returns null when no Recipe JSON-LD present', () => {
const html = '<html><body><p>no recipe</p></body></html>';
expect(extractRecipeFromHtml(html)).toBeNull();
});
it('returns null when JSON-LD has only non-Recipe types', () => {
const html = `
<html><head>
<script type="application/ld+json">{"@context":"https://schema.org","@type":"Organization","name":"Foo"}</script>
</head></html>`;
expect(extractRecipeFromHtml(html)).toBeNull();
});
});