feat(search): Treffer ohne Recipe-JSON-LD rausfiltern
All checks were successful
Build & Publish Docker Image / build-and-push (push) Successful in 1m17s
All checks were successful
Build & Publish Docker Image / build-and-push (push) Successful in 1m17s
Wir fetchen die Trefferseite sowieso schon fürs Thumbnail — prüfen jetzt in der gleichen HTML-Parse-Runde, ob überhaupt ein schema.org/Recipe JSON-LD vorhanden ist. Fehlt es, wird der Treffer aus der Liste entfernt, weil der Importer auf dieser Seite später sowieso mit „Diese Seite enthält kein Rezept" scheitern würde. - Migration 007: thumbnail_cache.has_recipe (NULL=unbekannt, 0=nein, 1=ja). - Fetch-Fehler hinterlassen NULL → Treffer bleibt konservativ sichtbar. - Neue export `hasRecipeJsonLd(html)` in json-ld-recipe.ts. - Alle Cache-Reads/Writes nehmen den neuen Wert mit. Tests: +2 für Filter/Failover, bestehende Thumbnail-Tests mit Recipe-JSON-LD-Stub ergänzt, damit sie nicht selber rausgefiltert werden. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -100,11 +100,62 @@ describe('searchWeb', () => {
|
||||
expect(receivedPageno).toBe(null);
|
||||
});
|
||||
|
||||
it('drops hits whose page lacks a Recipe JSON-LD', async () => {
|
||||
const pageServer = createServer((req, res) => {
|
||||
res.writeHead(200, { 'content-type': 'text/html; charset=utf-8' });
|
||||
if (req.url === '/with-recipe') {
|
||||
res.end(`<html><head>
|
||||
<script type="application/ld+json">${JSON.stringify({
|
||||
'@type': 'Recipe',
|
||||
name: 'Pie',
|
||||
image: 'https://cdn.example/pie.jpg'
|
||||
})}</script>
|
||||
</head></html>`);
|
||||
} else {
|
||||
// forum page: no Recipe JSON-LD
|
||||
res.end('<html><head><title>Forum</title></head><body>Diskussion</body></html>');
|
||||
}
|
||||
});
|
||||
await new Promise<void>((r) => pageServer.listen(0, '127.0.0.1', r));
|
||||
const addr = pageServer.address() as AddressInfo;
|
||||
try {
|
||||
const db = openInMemoryForTest();
|
||||
addDomain(db, '127.0.0.1');
|
||||
respondWith([
|
||||
{ url: `http://127.0.0.1:${addr.port}/with-recipe`, title: 'Recipe', content: '' },
|
||||
{ url: `http://127.0.0.1:${addr.port}/forum-thread`, title: 'Forum', content: '' }
|
||||
]);
|
||||
const hits = await searchWeb(db, 'x', { searxngUrl: baseUrl });
|
||||
expect(hits.length).toBe(1);
|
||||
expect(hits[0].url.endsWith('/with-recipe')).toBe(true);
|
||||
} finally {
|
||||
await new Promise<void>((r) => pageServer.close(() => r()));
|
||||
}
|
||||
});
|
||||
|
||||
it('keeps hit when page fetch fails (unknown recipe status)', async () => {
|
||||
const db = openInMemoryForTest();
|
||||
addDomain(db, '127.0.0.1');
|
||||
// URL points to a port nobody listens on → fetch fails
|
||||
respondWith([
|
||||
{ url: 'http://127.0.0.1:1/unreachable', title: 'Unreachable', content: '' }
|
||||
]);
|
||||
const hits = await searchWeb(db, 'x', { searxngUrl: baseUrl });
|
||||
expect(hits.length).toBe(1);
|
||||
});
|
||||
|
||||
// Minimal Recipe-JSON-LD stub so enrichAndFilterHits doesn't drop test hits
|
||||
// as non-recipe pages. Used in tests that focus on thumbnail extraction.
|
||||
const RECIPE_LD = `<script type="application/ld+json">${JSON.stringify({
|
||||
'@type': 'Recipe',
|
||||
name: 'stub'
|
||||
})}</script>`;
|
||||
|
||||
it('enriches missing thumbnails from og:image', async () => {
|
||||
const pageServer = createServer((_req, res) => {
|
||||
res.writeHead(200, { 'content-type': 'text/html; charset=utf-8' });
|
||||
res.end(
|
||||
'<html><head><meta property="og:image" content="https://cdn.example/foo.jpg" /></head><body></body></html>'
|
||||
`<html><head><meta property="og:image" content="https://cdn.example/foo.jpg" />${RECIPE_LD}</head><body></body></html>`
|
||||
);
|
||||
});
|
||||
await new Promise<void>((r) => pageServer.listen(0, '127.0.0.1', r));
|
||||
@@ -151,7 +202,7 @@ describe('searchWeb', () => {
|
||||
const pageServer = createServer((_req, res) => {
|
||||
res.writeHead(200, { 'content-type': 'text/html; charset=utf-8' });
|
||||
res.end(
|
||||
'<html><body><article><img src="/uploads/dish.jpg" alt=""></article></body></html>'
|
||||
`<html><head>${RECIPE_LD}</head><body><article><img src="/uploads/dish.jpg" alt=""></article></body></html>`
|
||||
);
|
||||
});
|
||||
await new Promise<void>((r) => pageServer.listen(0, '127.0.0.1', r));
|
||||
@@ -172,7 +223,7 @@ describe('searchWeb', () => {
|
||||
const pageServer = createServer((_req, res) => {
|
||||
res.writeHead(200, { 'content-type': 'text/html; charset=utf-8' });
|
||||
res.end(
|
||||
'<html><head><meta property="og:image" content="https://cdn.example/hq.jpg" /></head></html>'
|
||||
`<html><head><meta property="og:image" content="https://cdn.example/hq.jpg" />${RECIPE_LD}</head></html>`
|
||||
);
|
||||
});
|
||||
await new Promise<void>((r) => pageServer.listen(0, '127.0.0.1', r));
|
||||
@@ -194,7 +245,7 @@ describe('searchWeb', () => {
|
||||
it('keeps SearXNG thumbnail when page has no image', async () => {
|
||||
const pageServer = createServer((_req, res) => {
|
||||
res.writeHead(200, { 'content-type': 'text/html; charset=utf-8' });
|
||||
res.end('<html><head></head><body>no images here</body></html>');
|
||||
res.end(`<html><head>${RECIPE_LD}</head><body>no images here</body></html>`);
|
||||
});
|
||||
await new Promise<void>((r) => pageServer.listen(0, '127.0.0.1', r));
|
||||
const addr = pageServer.address() as AddressInfo;
|
||||
@@ -217,7 +268,7 @@ describe('searchWeb', () => {
|
||||
const pageServer = createServer((_req, res) => {
|
||||
pageHits += 1;
|
||||
res.writeHead(200, { 'content-type': 'text/html; charset=utf-8' });
|
||||
res.end('<html><head><meta property="og:image" content="https://cdn.example/c.jpg"></head></html>');
|
||||
res.end(`<html><head><meta property="og:image" content="https://cdn.example/c.jpg">${RECIPE_LD}</head></html>`);
|
||||
});
|
||||
await new Promise<void>((r) => pageServer.listen(0, '127.0.0.1', r));
|
||||
const addr = pageServer.address() as AddressInfo;
|
||||
|
||||
Reference in New Issue
Block a user