feat(search): persistenter Thumbnail-Cache in SQLite, Default-TTL 30 Tage
All checks were successful
Build & Publish Docker Image / build-and-push (push) Successful in 54s

Vorher: In-Memory-Map, TTL 30 Minuten. Container-Neustart verwarf den
kompletten Cache, also musste nach jedem Deploy jede Suche wieder alle
Seiten laden.

Jetzt:
- Neue Tabelle thumbnail_cache (url PK, image, expires_at)
- Default-TTL 30 Tage, per Env KOCHWAS_THUMB_TTL_DAYS konfigurierbar
  (7, 365, was der User will — is alles ok laut Nutzer)
- Negative Cache: Seiten ohne Bild werden mit image=NULL gespeichert,
  damit wir nicht jede Suche die gleiche kaputte Seite wieder laden
- Lazy-Cleanup: pro searchWeb-Aufruf werden abgelaufene Zeilen via
  DELETE ... WHERE expires_at <= now() weggeräumt (Index-Scan, billig)

Migration 003_thumbnail_cache.sql: nicht-destruktiv, nur neue Tabelle.
Bestehende DB bekommt sie beim nächsten Start automatisch dazu.

Tests (99/99):
- Neuer Cache-Test: zweiter searchWeb für dieselbe URL macht keinen
  Page-Fetch mehr und liest die image-Spalte aus SQLite.
This commit is contained in:
hsiegeln
2026-04-17 18:34:29 +02:00
parent 1712263fd1
commit 4d90d51501
3 changed files with 81 additions and 12 deletions

View File

@@ -0,0 +1,10 @@
-- Long-term cache for page → image URL mappings extracted via og:image,
-- JSON-LD, or first content <img>. Fetching every recipe page on every
-- search is expensive; store the mapping with a 30-day default TTL.
CREATE TABLE thumbnail_cache (
url TEXT PRIMARY KEY,
image TEXT, -- NULL = page has no image (cache the negative too)
expires_at TEXT NOT NULL -- ISO-8601 UTC
);
CREATE INDEX idx_thumbnail_cache_expires ON thumbnail_cache(expires_at);

View File

@@ -179,14 +179,38 @@ function extractPageImage(html: string, baseUrl: string): string | null {
}
}
type ThumbCacheEntry = { image: string | null; expires: number };
const thumbCache = new Map<string, ThumbCacheEntry>();
const THUMB_TTL_MS = 30 * 60 * 1000;
const THUMB_TTL_DAYS = Number(process.env.KOCHWAS_THUMB_TTL_DAYS ?? 30);
const THUMB_TTL_MS = THUMB_TTL_DAYS * 24 * 60 * 60 * 1000;
async function enrichThumbnail(url: string): Promise<string | null> {
const now = Date.now();
const cached = thumbCache.get(url);
if (cached && cached.expires > now) return cached.image;
function readCachedThumbnail(
db: Database.Database,
url: string
): { image: string | null } | null {
const row = db
.prepare<[string, string], { image: string | null }>(
"SELECT image FROM thumbnail_cache WHERE url = ? AND expires_at > ?"
)
.get(url, new Date().toISOString());
return row ?? null;
}
function writeCachedThumbnail(
db: Database.Database,
url: string,
image: string | null
): void {
const expiresAt = new Date(Date.now() + THUMB_TTL_MS).toISOString();
db.prepare(
'INSERT OR REPLACE INTO thumbnail_cache (url, image, expires_at) VALUES (?, ?, ?)'
).run(url, image, expiresAt);
}
async function enrichThumbnail(
db: Database.Database,
url: string
): Promise<string | null> {
const cached = readCachedThumbnail(db, url);
if (cached) return cached.image;
let image: string | null = null;
try {
const html = await fetchText(url, { timeoutMs: 4_000, maxBytes: 512 * 1024 });
@@ -194,23 +218,30 @@ async function enrichThumbnail(url: string): Promise<string | null> {
} catch {
image = null;
}
thumbCache.set(url, { image, expires: now + THUMB_TTL_MS });
writeCachedThumbnail(db, url, image);
return image;
}
async function enrichAllThumbnails(hits: WebHit[]): Promise<void> {
async function enrichAllThumbnails(
db: Database.Database,
hits: WebHit[]
): Promise<void> {
// Always fetch the page image even when SearXNG gave us a thumbnail —
// the search engine's thumbnail is typically 150-200px, while og:image
// / JSON-LD image on the page is the full-resolution recipe photo.
// The 30-min URL cache makes repeat searches instant.
// The thumbnail_cache table (default 30-day TTL) makes repeat searches instant.
if (hits.length === 0) return;
// Lazy cleanup of expired entries — O(log n) index scan, cheap.
db.prepare('DELETE FROM thumbnail_cache WHERE expires_at <= ?').run(
new Date().toISOString()
);
const queue = [...hits];
const LIMIT = 6;
const workers = Array.from({ length: Math.min(LIMIT, queue.length) }, async () => {
while (queue.length > 0) {
const h = queue.shift();
if (!h) break;
const image = await enrichThumbnail(h.url);
const image = await enrichThumbnail(db, h.url);
if (image) h.thumbnail = image;
}
});
@@ -272,7 +303,7 @@ export async function searchWeb(
if (hits.length >= limit) break;
}
if (opts.enrichThumbnails !== false) {
await enrichAllThumbnails(hits);
await enrichAllThumbnails(db, hits);
}
return hits;
}