From 4d90d515012a3156ee05703aeb5d507331d19bb6 Mon Sep 17 00:00:00 2001
From: hsiegeln <37154749+hsiegeln@users.noreply.github.com>
Date: Fri, 17 Apr 2026 18:34:29 +0200
Subject: [PATCH] feat(search): persistenter Thumbnail-Cache in SQLite,
Default-TTL 30 Tage
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Vorher: In-Memory-Map, TTL 30 Minuten. Container-Neustart verwarf den
kompletten Cache, also musste nach jedem Deploy jede Suche wieder alle
Seiten laden.
Jetzt:
- Neue Tabelle thumbnail_cache (url PK, image, expires_at)
- Default-TTL 30 Tage, per Env KOCHWAS_THUMB_TTL_DAYS konfigurierbar
(7, 365, was der User will — is alles ok laut Nutzer)
- Negative Cache: Seiten ohne Bild werden mit image=NULL gespeichert,
damit wir nicht jede Suche die gleiche kaputte Seite wieder laden
- Lazy-Cleanup: pro searchWeb-Aufruf werden abgelaufene Zeilen via
DELETE ... WHERE expires_at <= now() weggeräumt (Index-Scan, billig)
Migration 003_thumbnail_cache.sql: nicht-destruktiv, nur neue Tabelle.
Bestehende DB bekommt sie beim nächsten Start automatisch dazu.
Tests (99/99):
- Neuer Cache-Test: zweiter searchWeb für dieselbe URL macht keinen
Page-Fetch mehr und liest die image-Spalte aus SQLite.
---
.../db/migrations/003_thumbnail_cache.sql | 10 ++++
src/lib/server/search/searxng.ts | 55 +++++++++++++++----
tests/integration/searxng.test.ts | 28 ++++++++++
3 files changed, 81 insertions(+), 12 deletions(-)
create mode 100644 src/lib/server/db/migrations/003_thumbnail_cache.sql
diff --git a/src/lib/server/db/migrations/003_thumbnail_cache.sql b/src/lib/server/db/migrations/003_thumbnail_cache.sql
new file mode 100644
index 0000000..c5e3f5b
--- /dev/null
+++ b/src/lib/server/db/migrations/003_thumbnail_cache.sql
@@ -0,0 +1,10 @@
+-- Long-term cache for page → image URL mappings extracted via og:image,
+-- JSON-LD, or first content
. Fetching every recipe page on every
+-- search is expensive; store the mapping with a 30-day default TTL.
+CREATE TABLE thumbnail_cache (
+ url TEXT PRIMARY KEY,
+ image TEXT, -- NULL = page has no image (cache the negative too)
+ expires_at TEXT NOT NULL -- ISO-8601 UTC
+);
+
+CREATE INDEX idx_thumbnail_cache_expires ON thumbnail_cache(expires_at);
diff --git a/src/lib/server/search/searxng.ts b/src/lib/server/search/searxng.ts
index e88dfdb..f171e28 100644
--- a/src/lib/server/search/searxng.ts
+++ b/src/lib/server/search/searxng.ts
@@ -179,14 +179,38 @@ function extractPageImage(html: string, baseUrl: string): string | null {
}
}
-type ThumbCacheEntry = { image: string | null; expires: number };
-const thumbCache = new Map();
-const THUMB_TTL_MS = 30 * 60 * 1000;
+const THUMB_TTL_DAYS = Number(process.env.KOCHWAS_THUMB_TTL_DAYS ?? 30);
+const THUMB_TTL_MS = THUMB_TTL_DAYS * 24 * 60 * 60 * 1000;
-async function enrichThumbnail(url: string): Promise {
- const now = Date.now();
- const cached = thumbCache.get(url);
- if (cached && cached.expires > now) return cached.image;
+function readCachedThumbnail(
+ db: Database.Database,
+ url: string
+): { image: string | null } | null {
+ const row = db
+ .prepare<[string, string], { image: string | null }>(
+ "SELECT image FROM thumbnail_cache WHERE url = ? AND expires_at > ?"
+ )
+ .get(url, new Date().toISOString());
+ return row ?? null;
+}
+
+function writeCachedThumbnail(
+ db: Database.Database,
+ url: string,
+ image: string | null
+): void {
+ const expiresAt = new Date(Date.now() + THUMB_TTL_MS).toISOString();
+ db.prepare(
+ 'INSERT OR REPLACE INTO thumbnail_cache (url, image, expires_at) VALUES (?, ?, ?)'
+ ).run(url, image, expiresAt);
+}
+
+async function enrichThumbnail(
+ db: Database.Database,
+ url: string
+): Promise {
+ const cached = readCachedThumbnail(db, url);
+ if (cached) return cached.image;
let image: string | null = null;
try {
const html = await fetchText(url, { timeoutMs: 4_000, maxBytes: 512 * 1024 });
@@ -194,23 +218,30 @@ async function enrichThumbnail(url: string): Promise {
} catch {
image = null;
}
- thumbCache.set(url, { image, expires: now + THUMB_TTL_MS });
+ writeCachedThumbnail(db, url, image);
return image;
}
-async function enrichAllThumbnails(hits: WebHit[]): Promise {
+async function enrichAllThumbnails(
+ db: Database.Database,
+ hits: WebHit[]
+): Promise {
// Always fetch the page image even when SearXNG gave us a thumbnail —
// the search engine's thumbnail is typically 150-200px, while og:image
// / JSON-LD image on the page is the full-resolution recipe photo.
- // The 30-min URL cache makes repeat searches instant.
+ // The thumbnail_cache table (default 30-day TTL) makes repeat searches instant.
if (hits.length === 0) return;
+ // Lazy cleanup of expired entries — O(log n) index scan, cheap.
+ db.prepare('DELETE FROM thumbnail_cache WHERE expires_at <= ?').run(
+ new Date().toISOString()
+ );
const queue = [...hits];
const LIMIT = 6;
const workers = Array.from({ length: Math.min(LIMIT, queue.length) }, async () => {
while (queue.length > 0) {
const h = queue.shift();
if (!h) break;
- const image = await enrichThumbnail(h.url);
+ const image = await enrichThumbnail(db, h.url);
if (image) h.thumbnail = image;
}
});
@@ -272,7 +303,7 @@ export async function searchWeb(
if (hits.length >= limit) break;
}
if (opts.enrichThumbnails !== false) {
- await enrichAllThumbnails(hits);
+ await enrichAllThumbnails(db, hits);
}
return hits;
}
diff --git a/tests/integration/searxng.test.ts b/tests/integration/searxng.test.ts
index ccbc069..b7d3016 100644
--- a/tests/integration/searxng.test.ts
+++ b/tests/integration/searxng.test.ts
@@ -184,6 +184,34 @@ describe('searchWeb', () => {
}
});
+ it('SQLite cache: second search does not re-fetch the page', async () => {
+ let pageHits = 0;
+ const pageServer = createServer((_req, res) => {
+ pageHits += 1;
+ res.writeHead(200, { 'content-type': 'text/html; charset=utf-8' });
+ res.end('');
+ });
+ await new Promise((r) => pageServer.listen(0, '127.0.0.1', r));
+ const addr = pageServer.address() as AddressInfo;
+ const pageUrl = `http://127.0.0.1:${addr.port}/cached`;
+ try {
+ const db = openInMemoryForTest();
+ addDomain(db, '127.0.0.1');
+ respondWith([{ url: pageUrl, title: 'C', content: '' }]);
+ const first = await searchWeb(db, 'c', { searxngUrl: baseUrl });
+ const second = await searchWeb(db, 'c', { searxngUrl: baseUrl });
+ expect(first[0].thumbnail).toBe('https://cdn.example/c.jpg');
+ expect(second[0].thumbnail).toBe('https://cdn.example/c.jpg');
+ expect(pageHits).toBe(1); // second call read from SQLite cache
+ const row = db
+ .prepare('SELECT image FROM thumbnail_cache WHERE url = ?')
+ .get(pageUrl) as { image: string };
+ expect(row.image).toBe('https://cdn.example/c.jpg');
+ } finally {
+ await new Promise((r) => pageServer.close(() => r()));
+ }
+ });
+
it('filters out forum/magazine/listing URLs', async () => {
const db = openInMemoryForTest();
addDomain(db, 'chefkoch.de');