feat(search): HQ-Thumbnails durch immer aktive og:image-Extraktion
All checks were successful
Build & Publish Docker Image / build-and-push (push) Successful in 54s
All checks were successful
Build & Publish Docker Image / build-and-push (push) Successful in 54s
Vorher: nur Treffer ohne SearXNG-Thumbnail wurden mit dem Seiten-Bild
angereichert. Treffer mit Thumbnail behielten das kleine 150-200 px-
Bildchen aus dem Such-Engine-Index.
Jetzt: Alle Treffer durchlaufen die Enrichment-Pipeline. Wenn die Seite
ein og:image/JSON-LD/Content-Bild hat (und das hat sie bei Rezept-Seiten
praktisch immer), wird das kleine SearXNG-Thumbnail damit überschrieben.
Wenn die Seite kein Bild liefert, bleibt das SearXNG-Thumbnail als
Fallback erhalten.
Das ist das gleiche Bild, das auch die Vorschau anzeigt — Suchergebnis
und Vorschau sind jetzt visuell konsistent.
Performance: Pro erster Suche bis zu ~6 Sekunden zusätzliche Latenz
(max 6 parallel, je 4 s Timeout). Der 30-min In-Memory-Cache macht
Wiederholsuchen instant.
Tests (98/98):
- Neu: SearXNG-Thumbnail wird durch og:image ersetzt.
- Neu: SearXNG-Thumbnail bleibt erhalten, wenn Seite kein Bild hat.
- Alt ("leaves existing thumbnails untouched") entfernt — Verhalten
hat sich bewusst umgekehrt.
This commit is contained in:
@@ -198,9 +198,13 @@ async function enrichThumbnail(url: string): Promise<string | null> {
|
|||||||
return image;
|
return image;
|
||||||
}
|
}
|
||||||
|
|
||||||
async function enrichMissingThumbnails(hits: WebHit[]): Promise<void> {
|
async function enrichAllThumbnails(hits: WebHit[]): Promise<void> {
|
||||||
const queue = hits.filter((h) => !h.thumbnail);
|
// Always fetch the page image even when SearXNG gave us a thumbnail —
|
||||||
if (queue.length === 0) return;
|
// the search engine's thumbnail is typically 150-200px, while og:image
|
||||||
|
// / JSON-LD image on the page is the full-resolution recipe photo.
|
||||||
|
// The 30-min URL cache makes repeat searches instant.
|
||||||
|
if (hits.length === 0) return;
|
||||||
|
const queue = [...hits];
|
||||||
const LIMIT = 6;
|
const LIMIT = 6;
|
||||||
const workers = Array.from({ length: Math.min(LIMIT, queue.length) }, async () => {
|
const workers = Array.from({ length: Math.min(LIMIT, queue.length) }, async () => {
|
||||||
while (queue.length > 0) {
|
while (queue.length > 0) {
|
||||||
@@ -268,7 +272,7 @@ export async function searchWeb(
|
|||||||
if (hits.length >= limit) break;
|
if (hits.length >= limit) break;
|
||||||
}
|
}
|
||||||
if (opts.enrichThumbnails !== false) {
|
if (opts.enrichThumbnails !== false) {
|
||||||
await enrichMissingThumbnails(hits);
|
await enrichAllThumbnails(hits);
|
||||||
}
|
}
|
||||||
return hits;
|
return hits;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -140,19 +140,48 @@ describe('searchWeb', () => {
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
it('leaves existing thumbnails untouched (no enrichment fetch)', async () => {
|
it('upgrades low-res SearXNG thumbnail with HQ og:image from page', async () => {
|
||||||
const db = openInMemoryForTest();
|
const pageServer = createServer((_req, res) => {
|
||||||
addDomain(db, 'chefkoch.de');
|
res.writeHead(200, { 'content-type': 'text/html; charset=utf-8' });
|
||||||
respondWith([
|
res.end(
|
||||||
{
|
'<html><head><meta property="og:image" content="https://cdn.example/hq.jpg" /></head></html>'
|
||||||
url: 'https://www.chefkoch.de/rezepte/1/x.html',
|
);
|
||||||
title: 'X',
|
});
|
||||||
thumbnail: 'https://cdn.chefkoch/x.jpg'
|
await new Promise<void>((r) => pageServer.listen(0, '127.0.0.1', r));
|
||||||
}
|
const addr = pageServer.address() as AddressInfo;
|
||||||
]);
|
const pageUrl = `http://127.0.0.1:${addr.port}/dish`;
|
||||||
// enrichment enabled, but thumbnail is set → no fetch expected, no hang
|
try {
|
||||||
const hits = await searchWeb(db, 'x', { searxngUrl: baseUrl });
|
const db = openInMemoryForTest();
|
||||||
expect(hits[0].thumbnail).toBe('https://cdn.chefkoch/x.jpg');
|
addDomain(db, '127.0.0.1');
|
||||||
|
respondWith([
|
||||||
|
{ url: pageUrl, title: 'Dish', thumbnail: 'https://searxng-cdn/small-thumb.jpg' }
|
||||||
|
]);
|
||||||
|
const hits = await searchWeb(db, 'dish', { searxngUrl: baseUrl });
|
||||||
|
expect(hits[0].thumbnail).toBe('https://cdn.example/hq.jpg');
|
||||||
|
} finally {
|
||||||
|
await new Promise<void>((r) => pageServer.close(() => r()));
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
it('keeps SearXNG thumbnail when page has no image', async () => {
|
||||||
|
const pageServer = createServer((_req, res) => {
|
||||||
|
res.writeHead(200, { 'content-type': 'text/html; charset=utf-8' });
|
||||||
|
res.end('<html><head></head><body>no images here</body></html>');
|
||||||
|
});
|
||||||
|
await new Promise<void>((r) => pageServer.listen(0, '127.0.0.1', r));
|
||||||
|
const addr = pageServer.address() as AddressInfo;
|
||||||
|
const pageUrl = `http://127.0.0.1:${addr.port}/noimg`;
|
||||||
|
try {
|
||||||
|
const db = openInMemoryForTest();
|
||||||
|
addDomain(db, '127.0.0.1');
|
||||||
|
respondWith([
|
||||||
|
{ url: pageUrl, title: 'X', thumbnail: 'https://searxng-cdn/fallback.jpg' }
|
||||||
|
]);
|
||||||
|
const hits = await searchWeb(db, 'x', { searxngUrl: baseUrl });
|
||||||
|
expect(hits[0].thumbnail).toBe('https://searxng-cdn/fallback.jpg');
|
||||||
|
} finally {
|
||||||
|
await new Promise<void>((r) => pageServer.close(() => r()));
|
||||||
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
it('filters out forum/magazine/listing URLs', async () => {
|
it('filters out forum/magazine/listing URLs', async () => {
|
||||||
|
|||||||
Reference in New Issue
Block a user