diff --git a/src/lib/server/search/searxng.ts b/src/lib/server/search/searxng.ts
index ca74508..9ec6a93 100644
--- a/src/lib/server/search/searxng.ts
+++ b/src/lib/server/search/searxng.ts
@@ -77,10 +77,61 @@ function looksLikeRecipePage(url: string): boolean {
}
}
+const OG_IMAGE_RE =
+ /]+(?:property|name)=["']og:image(?::url)?["'][^>]+content=["']([^"']+)["']/i;
+const OG_IMAGE_RE_REV =
+ /]+content=["']([^"']+)["'][^>]+(?:property|name)=["']og:image(?::url)?["']/i;
+const TWITTER_IMAGE_RE =
+ /]+(?:property|name)=["']twitter:image["'][^>]+content=["']([^"']+)["']/i;
+
+function extractOgImage(html: string): string | null {
+ const m = OG_IMAGE_RE.exec(html) ?? OG_IMAGE_RE_REV.exec(html) ?? TWITTER_IMAGE_RE.exec(html);
+ if (!m) return null;
+ try {
+ return new URL(m[1]).toString();
+ } catch {
+ return null;
+ }
+}
+
+type ThumbCacheEntry = { image: string | null; expires: number };
+const thumbCache = new Map();
+const THUMB_TTL_MS = 30 * 60 * 1000;
+
+async function enrichThumbnail(url: string): Promise {
+ const now = Date.now();
+ const cached = thumbCache.get(url);
+ if (cached && cached.expires > now) return cached.image;
+ let image: string | null = null;
+ try {
+ const html = await fetchText(url, { timeoutMs: 4_000, maxBytes: 256 * 1024 });
+ image = extractOgImage(html);
+ } catch {
+ image = null;
+ }
+ thumbCache.set(url, { image, expires: now + THUMB_TTL_MS });
+ return image;
+}
+
+async function enrichMissingThumbnails(hits: WebHit[]): Promise {
+ const queue = hits.filter((h) => !h.thumbnail);
+ if (queue.length === 0) return;
+ const LIMIT = 6;
+ const workers = Array.from({ length: Math.min(LIMIT, queue.length) }, async () => {
+ while (queue.length > 0) {
+ const h = queue.shift();
+ if (!h) break;
+ const image = await enrichThumbnail(h.url);
+ if (image) h.thumbnail = image;
+ }
+ });
+ await Promise.all(workers);
+}
+
export async function searchWeb(
db: Database.Database,
query: string,
- opts: { searxngUrl?: string; limit?: number } = {}
+ opts: { searxngUrl?: string; limit?: number; enrichThumbnails?: boolean } = {}
): Promise {
const trimmed = query.trim();
if (!trimmed) return [];
@@ -131,5 +182,8 @@ export async function searchWeb(
});
if (hits.length >= limit) break;
}
+ if (opts.enrichThumbnails !== false) {
+ await enrichMissingThumbnails(hits);
+ }
return hits;
}
diff --git a/tests/integration/searxng.test.ts b/tests/integration/searxng.test.ts
index 1d47ac0..2e164fc 100644
--- a/tests/integration/searxng.test.ts
+++ b/tests/integration/searxng.test.ts
@@ -42,7 +42,7 @@ describe('searchWeb', () => {
content: 'blocked'
}
]);
- const hits = await searchWeb(db, 'carbonara', { searxngUrl: baseUrl });
+ const hits = await searchWeb(db, 'carbonara', { searxngUrl: baseUrl, enrichThumbnails: false });
expect(hits.length).toBe(1);
expect(hits[0].domain).toBe('chefkoch.de');
expect(hits[0].title).toBe('Carbonara');
@@ -55,23 +55,60 @@ describe('searchWeb', () => {
{ url: 'https://www.chefkoch.de/a', title: 'A', content: '' },
{ url: 'https://www.chefkoch.de/a', title: 'A dup', content: '' }
]);
- const hits = await searchWeb(db, 'a', { searxngUrl: baseUrl });
+ const hits = await searchWeb(db, 'a', { searxngUrl: baseUrl, enrichThumbnails: false });
expect(hits.length).toBe(1);
});
it('returns empty list when no domains configured', async () => {
const db = openInMemoryForTest();
- const hits = await searchWeb(db, 'x', { searxngUrl: baseUrl });
+ const hits = await searchWeb(db, 'x', { searxngUrl: baseUrl, enrichThumbnails: false });
expect(hits).toEqual([]);
});
it('returns empty for empty query', async () => {
const db = openInMemoryForTest();
addDomain(db, 'chefkoch.de');
- const hits = await searchWeb(db, ' ', { searxngUrl: baseUrl });
+ const hits = await searchWeb(db, ' ', { searxngUrl: baseUrl, enrichThumbnails: false });
expect(hits).toEqual([]);
});
+ it('enriches missing thumbnails from og:image', async () => {
+ const pageServer = createServer((_req, res) => {
+ res.writeHead(200, { 'content-type': 'text/html; charset=utf-8' });
+ res.end(
+ ''
+ );
+ });
+ await new Promise((r) => pageServer.listen(0, '127.0.0.1', r));
+ const addr = pageServer.address() as AddressInfo;
+ const pageUrl = `http://127.0.0.1:${addr.port}/rezept`;
+ try {
+ const db = openInMemoryForTest();
+ addDomain(db, '127.0.0.1');
+ respondWith([{ url: pageUrl, title: 'Kuchen', content: '' }]);
+ const hits = await searchWeb(db, 'kuchen', { searxngUrl: baseUrl });
+ expect(hits.length).toBe(1);
+ expect(hits[0].thumbnail).toBe('https://cdn.example/foo.jpg');
+ } finally {
+ await new Promise((r) => pageServer.close(() => r()));
+ }
+ });
+
+ it('leaves existing thumbnails untouched (no enrichment fetch)', async () => {
+ const db = openInMemoryForTest();
+ addDomain(db, 'chefkoch.de');
+ respondWith([
+ {
+ url: 'https://www.chefkoch.de/rezepte/1/x.html',
+ title: 'X',
+ thumbnail: 'https://cdn.chefkoch/x.jpg'
+ }
+ ]);
+ // enrichment enabled, but thumbnail is set → no fetch expected, no hang
+ const hits = await searchWeb(db, 'x', { searxngUrl: baseUrl });
+ expect(hits[0].thumbnail).toBe('https://cdn.chefkoch/x.jpg');
+ });
+
it('filters out forum/magazine/listing URLs', async () => {
const db = openInMemoryForTest();
addDomain(db, 'chefkoch.de');
@@ -83,7 +120,7 @@ describe('searchWeb', () => {
{ url: 'https://www.chefkoch.de/themen/ravioli/', title: 'Themen' },
{ url: 'https://www.chefkoch.de/rezepte/', title: 'Rezepte Übersicht' }
]);
- const hits = await searchWeb(db, 'ravioli', { searxngUrl: baseUrl });
+ const hits = await searchWeb(db, 'ravioli', { searxngUrl: baseUrl, enrichThumbnails: false });
expect(hits.length).toBe(1);
expect(hits[0].title).toBe('Ravioli');
});