diff --git a/src/lib/server/db/migrations/010_thumbnail_cache_rerun_negatives.sql b/src/lib/server/db/migrations/010_thumbnail_cache_rerun_negatives.sql
new file mode 100644
index 0000000..5e5fac0
--- /dev/null
+++ b/src/lib/server/db/migrations/010_thumbnail_cache_rerun_negatives.sql
@@ -0,0 +1,6 @@
+-- Der Recipe-Detektor prüft ab jetzt zusätzlich zu JSON-LD auch Microdata
+-- (itemtype=schema.org/Recipe). Der Cache kann has_recipe=0-Einträge
+-- enthalten, die mit dem alten Check falsch-negativ waren (z.B. rezeptwelt.de,
+-- das Microdata statt JSON-LD nutzt). Einmalig wegräumen, damit die Seiten
+-- beim nächsten Search neu klassifiziert werden. Reiner Cache-Flush.
+DELETE FROM thumbnail_cache WHERE has_recipe = 0;
diff --git a/src/lib/server/parsers/json-ld-recipe.ts b/src/lib/server/parsers/json-ld-recipe.ts
index 1827bb0..908ae8a 100644
--- a/src/lib/server/parsers/json-ld-recipe.ts
+++ b/src/lib/server/parsers/json-ld-recipe.ts
@@ -106,7 +106,13 @@ function findRecipeNode(html: string): JsonLdNode | null {
return null;
}
-export function hasRecipeJsonLd(html: string): boolean {
+// Microdata-Alternative zum JSON-LD: viele SSR-Sites (inkl. rezeptwelt.de)
+// nutzen
statt application/ld+json.
+// Ein einfacher Regex reicht — wir brauchen nur das Flag, nicht die Daten.
+const MICRODATA_RECIPE = /itemtype\s*=\s*["']https?:\/\/schema\.org\/Recipe["']/i;
+
+export function hasRecipeMarkup(html: string): boolean {
+ if (MICRODATA_RECIPE.test(html)) return true;
try {
return findRecipeNode(html) !== null;
} catch {
@@ -114,6 +120,11 @@ export function hasRecipeJsonLd(html: string): boolean {
}
}
+// @deprecated use hasRecipeMarkup
+export function hasRecipeJsonLd(html: string): boolean {
+ return hasRecipeMarkup(html);
+}
+
export function extractRecipeFromHtml(html: string): Recipe | null {
const node = findRecipeNode(html);
if (!node) return null;
diff --git a/src/lib/server/search/searxng.ts b/src/lib/server/search/searxng.ts
index e6dc587..dbdcf23 100644
--- a/src/lib/server/search/searxng.ts
+++ b/src/lib/server/search/searxng.ts
@@ -2,7 +2,7 @@ import type Database from 'better-sqlite3';
import { parseHTML } from 'linkedom';
import { listDomains, normalizeDomain } from '../domains/repository';
import { fetchText } from '../http';
-import { hasRecipeJsonLd } from '../parsers/json-ld-recipe';
+import { hasRecipeMarkup } from '../parsers/json-ld-recipe';
export type WebHit = {
url: string;
@@ -235,7 +235,7 @@ async function enrichPageMeta(
});
meta = {
image: extractPageImage(html, url),
- hasRecipe: hasRecipeJsonLd(html) ? 1 : 0
+ hasRecipe: hasRecipeMarkup(html) ? 1 : 0
};
} catch {
// Fetch failed — leave hasRecipe null (unknown) so we don't permanently
@@ -363,9 +363,19 @@ export async function searchWeb(
);
if (opts.enrichThumbnails !== false) {
const enriched = await enrichAndFilterHits(db, hits);
+ const droppedUrls = hits
+ .filter((h) => !enriched.find((e) => e.url === h.url))
+ .map((h) => h.url);
console.log(
- `[searxng] q=${JSON.stringify(trimmed)} pageno=${pageno} enrich=${hits.length} dropped_non_recipe=${hits.length - enriched.length} final=${enriched.length}`
+ `[searxng] q=${JSON.stringify(trimmed)} pageno=${pageno} enrich=${hits.length} dropped_non_recipe=${droppedUrls.length} final=${enriched.length}`
);
+ // Nur die ersten 3 URLs mitloggen, damit das Log nicht explodiert. Genug
+ // um eine Seite manuell zu analysieren („warum wurde die abgelehnt?").
+ if (droppedUrls.length > 0) {
+ console.log(
+ `[searxng] dropped samples: ${droppedUrls.slice(0, 3).join(' | ')}`
+ );
+ }
return enriched;
}
return hits;
diff --git a/tests/unit/json-ld-recipe.test.ts b/tests/unit/json-ld-recipe.test.ts
index fb62dd0..f6f8240 100644
--- a/tests/unit/json-ld-recipe.test.ts
+++ b/tests/unit/json-ld-recipe.test.ts
@@ -2,7 +2,10 @@ import { describe, it, expect } from 'vitest';
import { readFileSync } from 'node:fs';
import { dirname, join } from 'node:path';
import { fileURLToPath } from 'node:url';
-import { extractRecipeFromHtml } from '../../src/lib/server/parsers/json-ld-recipe';
+import {
+ extractRecipeFromHtml,
+ hasRecipeMarkup
+} from '../../src/lib/server/parsers/json-ld-recipe';
const here = dirname(fileURLToPath(import.meta.url));
@@ -42,3 +45,31 @@ describe('extractRecipeFromHtml', () => {
expect(extractRecipeFromHtml(html)).toBeNull();
});
});
+
+describe('hasRecipeMarkup', () => {
+ it('detects JSON-LD Recipe', () => {
+ const html = `
+
+ `;
+ expect(hasRecipeMarkup(html)).toBe(true);
+ });
+
+ it('detects schema.org/Recipe microdata', () => {
+ const html = `
+
+ Königsberger Klopse
+
+ `;
+ expect(hasRecipeMarkup(html)).toBe(true);
+ });
+
+ it('also detects http:// variant of schema.org/Recipe', () => {
+ const html = `
`;
+ expect(hasRecipeMarkup(html)).toBe(true);
+ });
+
+ it('returns false for pages without any recipe markup', () => {
+ const html = '
nothing here
';
+ expect(hasRecipeMarkup(html)).toBe(false);
+ });
+});