From 6c2b24d06058f77f58dd6388fa0f371066d0863a Mon Sep 17 00:00:00 2001 From: hsiegeln <37154749+hsiegeln@users.noreply.github.com> Date: Sat, 18 Apr 2026 08:20:35 +0200 Subject: [PATCH] =?UTF-8?q?feat(searxng):=20Suche-Pipeline=20loggen=20f?= =?UTF-8?q?=C3=BCr=20Diagnose?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Statt eine separate Debug-Seite zu bauen: bei jeder Web-Suche werden zwei kompakte Log-Zeilen nach stdout geschrieben, die den Filter-Verlust pro Pipeline-Schritt zeigen. In den Pi-Docker-Logs (docker compose logs kochwas) leicht über grep '[searxng]' zu finden. Format: [searxng] q="…" pageno=1 domains=3 raw=12 non_whitelist=2 non_recipe_url=4 dup=0 kept_pre_enrich=6 [searxng] q="…" pageno=1 enrich=6 dropped_non_recipe=3 final=3 Damit lässt sich gezielt sehen, ob rezeptwelt-Treffer am looksLikeRecipePage- Filter, am hasRecipe-Check oder schon bei SearXNG selbst verloren gehen. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/lib/server/search/searxng.ts | 27 +++++++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) diff --git a/src/lib/server/search/searxng.ts b/src/lib/server/search/searxng.ts index fad6aa8..e6dc587 100644 --- a/src/lib/server/search/searxng.ts +++ b/src/lib/server/search/searxng.ts @@ -331,11 +331,23 @@ export async function searchWeb( const allowed = new Set(domains); const seen = new Set(); const hits: WebHit[] = []; + let dropNonWhitelist = 0; + let dropNonRecipeUrl = 0; + let dropDup = 0; for (const r of results) { const host = hostnameFromUrl(r.url); - if (!host || !allowed.has(host)) continue; - if (!looksLikeRecipePage(r.url)) continue; - if (seen.has(r.url)) continue; + if (!host || !allowed.has(host)) { + dropNonWhitelist += 1; + continue; + } + if (!looksLikeRecipePage(r.url)) { + dropNonRecipeUrl += 1; + continue; + } + if (seen.has(r.url)) { + dropDup += 1; + continue; + } seen.add(r.url); hits.push({ url: r.url, @@ -346,8 +358,15 @@ export async function searchWeb( }); if (hits.length >= limit) break; } + console.log( + `[searxng] q=${JSON.stringify(trimmed)} pageno=${pageno} domains=${domains.length} raw=${results.length} non_whitelist=${dropNonWhitelist} non_recipe_url=${dropNonRecipeUrl} dup=${dropDup} kept_pre_enrich=${hits.length}` + ); if (opts.enrichThumbnails !== false) { - return await enrichAndFilterHits(db, hits); + const enriched = await enrichAndFilterHits(db, hits); + console.log( + `[searxng] q=${JSON.stringify(trimmed)} pageno=${pageno} enrich=${hits.length} dropped_non_recipe=${hits.length - enriched.length} final=${enriched.length}` + ); + return enriched; } return hits; }