All checks were successful
Build & Publish Docker Image / build-and-push (push) Successful in 1m15s
Aus dem Log (q="Königsberger klopse"): 11 rezeptwelt-Treffer kamen durch alle URL-Filter, wurden aber von hasRecipeJsonLd als non-recipe gedroppt. Ursache: rezeptwelt.de nutzt Microdata (itemtype=schema.org/Recipe) statt application/ld+json. - hasRecipeJsonLd → hasRecipeMarkup: prüft jetzt zusätzlich per Regex auf itemtype=(https?://)schema.org/Recipe. Alter Export bleibt als Deprecated-Weiterleitung erhalten. - Log zeigt jetzt auch die ersten 3 gedropten URLs als dropped samples, damit neue Problem-Domains einfach zu diagnostizieren sind. - Migration 010 räumt alle thumbnail_cache-Einträge mit has_recipe=0 aus — die waren mit dem alten Check falsch-negativ und müssen neu klassifiziert werden. Tests: 4 neue Cases für hasRecipeMarkup (JSON-LD, http/https Microdata, Negativ-Fall). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
76 lines
2.5 KiB
TypeScript
76 lines
2.5 KiB
TypeScript
import { describe, it, expect } from 'vitest';
|
|
import { readFileSync } from 'node:fs';
|
|
import { dirname, join } from 'node:path';
|
|
import { fileURLToPath } from 'node:url';
|
|
import {
|
|
extractRecipeFromHtml,
|
|
hasRecipeMarkup
|
|
} from '../../src/lib/server/parsers/json-ld-recipe';
|
|
|
|
const here = dirname(fileURLToPath(import.meta.url));
|
|
|
|
function load(name: string): string {
|
|
return readFileSync(join(here, '../fixtures', name), 'utf8');
|
|
}
|
|
|
|
describe('extractRecipeFromHtml', () => {
|
|
it('extracts a recipe from Chefkoch HTML', () => {
|
|
const html = load('chefkoch-schupfnudeln.html');
|
|
const r = extractRecipeFromHtml(html);
|
|
expect(r).not.toBeNull();
|
|
expect(r!.title.toLowerCase()).toContain('schupfnudel');
|
|
expect(r!.ingredients.length).toBeGreaterThan(2);
|
|
expect(r!.steps.length).toBeGreaterThan(0);
|
|
});
|
|
|
|
it('extracts a recipe from Emmi kocht einfach HTML', () => {
|
|
const html = load('emmi-bolognese.html');
|
|
const r = extractRecipeFromHtml(html);
|
|
expect(r).not.toBeNull();
|
|
expect(r!.title.toLowerCase()).toContain('bolognese');
|
|
expect(r!.ingredients.length).toBeGreaterThan(0);
|
|
expect(r!.steps.length).toBeGreaterThan(0);
|
|
});
|
|
|
|
it('returns null when no Recipe JSON-LD present', () => {
|
|
const html = '<html><body><p>no recipe</p></body></html>';
|
|
expect(extractRecipeFromHtml(html)).toBeNull();
|
|
});
|
|
|
|
it('returns null when JSON-LD has only non-Recipe types', () => {
|
|
const html = `
|
|
<html><head>
|
|
<script type="application/ld+json">{"@context":"https://schema.org","@type":"Organization","name":"Foo"}</script>
|
|
</head></html>`;
|
|
expect(extractRecipeFromHtml(html)).toBeNull();
|
|
});
|
|
});
|
|
|
|
describe('hasRecipeMarkup', () => {
|
|
it('detects JSON-LD Recipe', () => {
|
|
const html = `<html><head>
|
|
<script type="application/ld+json">{"@type":"Recipe","name":"x"}</script>
|
|
</head></html>`;
|
|
expect(hasRecipeMarkup(html)).toBe(true);
|
|
});
|
|
|
|
it('detects schema.org/Recipe microdata', () => {
|
|
const html = `<html><body>
|
|
<div itemscope itemtype="https://schema.org/Recipe">
|
|
<span itemprop="name">Königsberger Klopse</span>
|
|
</div>
|
|
</body></html>`;
|
|
expect(hasRecipeMarkup(html)).toBe(true);
|
|
});
|
|
|
|
it('also detects http:// variant of schema.org/Recipe', () => {
|
|
const html = `<div itemscope itemtype="http://schema.org/Recipe"></div>`;
|
|
expect(hasRecipeMarkup(html)).toBe(true);
|
|
});
|
|
|
|
it('returns false for pages without any recipe markup', () => {
|
|
const html = '<html><body><p>nothing here</p></body></html>';
|
|
expect(hasRecipeMarkup(html)).toBe(false);
|
|
});
|
|
});
|