feat(search): Treffer ohne Recipe-JSON-LD rausfiltern
All checks were successful
Build & Publish Docker Image / build-and-push (push) Successful in 1m17s

Wir fetchen die Trefferseite sowieso schon fürs Thumbnail — prüfen
jetzt in der gleichen HTML-Parse-Runde, ob überhaupt ein
schema.org/Recipe JSON-LD vorhanden ist. Fehlt es, wird der Treffer
aus der Liste entfernt, weil der Importer auf dieser Seite später
sowieso mit „Diese Seite enthält kein Rezept" scheitern würde.

- Migration 007: thumbnail_cache.has_recipe (NULL=unbekannt, 0=nein, 1=ja).
- Fetch-Fehler hinterlassen NULL → Treffer bleibt konservativ sichtbar.
- Neue export `hasRecipeJsonLd(html)` in json-ld-recipe.ts.
- Alle Cache-Reads/Writes nehmen den neuen Wert mit.

Tests: +2 für Filter/Failover, bestehende Thumbnail-Tests mit
Recipe-JSON-LD-Stub ergänzt, damit sie nicht selber rausgefiltert
werden.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
hsiegeln
2026-04-17 22:20:22 +02:00
parent dbc9646caa
commit 342ea0efc8
4 changed files with 125 additions and 33 deletions

View File

@@ -100,11 +100,62 @@ describe('searchWeb', () => {
expect(receivedPageno).toBe(null);
});
it('drops hits whose page lacks a Recipe JSON-LD', async () => {
const pageServer = createServer((req, res) => {
res.writeHead(200, { 'content-type': 'text/html; charset=utf-8' });
if (req.url === '/with-recipe') {
res.end(`<html><head>
<script type="application/ld+json">${JSON.stringify({
'@type': 'Recipe',
name: 'Pie',
image: 'https://cdn.example/pie.jpg'
})}</script>
</head></html>`);
} else {
// forum page: no Recipe JSON-LD
res.end('<html><head><title>Forum</title></head><body>Diskussion</body></html>');
}
});
await new Promise<void>((r) => pageServer.listen(0, '127.0.0.1', r));
const addr = pageServer.address() as AddressInfo;
try {
const db = openInMemoryForTest();
addDomain(db, '127.0.0.1');
respondWith([
{ url: `http://127.0.0.1:${addr.port}/with-recipe`, title: 'Recipe', content: '' },
{ url: `http://127.0.0.1:${addr.port}/forum-thread`, title: 'Forum', content: '' }
]);
const hits = await searchWeb(db, 'x', { searxngUrl: baseUrl });
expect(hits.length).toBe(1);
expect(hits[0].url.endsWith('/with-recipe')).toBe(true);
} finally {
await new Promise<void>((r) => pageServer.close(() => r()));
}
});
it('keeps hit when page fetch fails (unknown recipe status)', async () => {
const db = openInMemoryForTest();
addDomain(db, '127.0.0.1');
// URL points to a port nobody listens on → fetch fails
respondWith([
{ url: 'http://127.0.0.1:1/unreachable', title: 'Unreachable', content: '' }
]);
const hits = await searchWeb(db, 'x', { searxngUrl: baseUrl });
expect(hits.length).toBe(1);
});
// Minimal Recipe-JSON-LD stub so enrichAndFilterHits doesn't drop test hits
// as non-recipe pages. Used in tests that focus on thumbnail extraction.
const RECIPE_LD = `<script type="application/ld+json">${JSON.stringify({
'@type': 'Recipe',
name: 'stub'
})}</script>`;
it('enriches missing thumbnails from og:image', async () => {
const pageServer = createServer((_req, res) => {
res.writeHead(200, { 'content-type': 'text/html; charset=utf-8' });
res.end(
'<html><head><meta property="og:image" content="https://cdn.example/foo.jpg" /></head><body></body></html>'
`<html><head><meta property="og:image" content="https://cdn.example/foo.jpg" />${RECIPE_LD}</head><body></body></html>`
);
});
await new Promise<void>((r) => pageServer.listen(0, '127.0.0.1', r));
@@ -151,7 +202,7 @@ describe('searchWeb', () => {
const pageServer = createServer((_req, res) => {
res.writeHead(200, { 'content-type': 'text/html; charset=utf-8' });
res.end(
'<html><body><article><img src="/uploads/dish.jpg" alt=""></article></body></html>'
`<html><head>${RECIPE_LD}</head><body><article><img src="/uploads/dish.jpg" alt=""></article></body></html>`
);
});
await new Promise<void>((r) => pageServer.listen(0, '127.0.0.1', r));
@@ -172,7 +223,7 @@ describe('searchWeb', () => {
const pageServer = createServer((_req, res) => {
res.writeHead(200, { 'content-type': 'text/html; charset=utf-8' });
res.end(
'<html><head><meta property="og:image" content="https://cdn.example/hq.jpg" /></head></html>'
`<html><head><meta property="og:image" content="https://cdn.example/hq.jpg" />${RECIPE_LD}</head></html>`
);
});
await new Promise<void>((r) => pageServer.listen(0, '127.0.0.1', r));
@@ -194,7 +245,7 @@ describe('searchWeb', () => {
it('keeps SearXNG thumbnail when page has no image', async () => {
const pageServer = createServer((_req, res) => {
res.writeHead(200, { 'content-type': 'text/html; charset=utf-8' });
res.end('<html><head></head><body>no images here</body></html>');
res.end(`<html><head>${RECIPE_LD}</head><body>no images here</body></html>`);
});
await new Promise<void>((r) => pageServer.listen(0, '127.0.0.1', r));
const addr = pageServer.address() as AddressInfo;
@@ -217,7 +268,7 @@ describe('searchWeb', () => {
const pageServer = createServer((_req, res) => {
pageHits += 1;
res.writeHead(200, { 'content-type': 'text/html; charset=utf-8' });
res.end('<html><head><meta property="og:image" content="https://cdn.example/c.jpg"></head></html>');
res.end(`<html><head><meta property="og:image" content="https://cdn.example/c.jpg">${RECIPE_LD}</head></html>`);
});
await new Promise<void>((r) => pageServer.listen(0, '127.0.0.1', r));
const addr = pageServer.address() as AddressInfo;