diff --git a/src/lib/server/search/searxng.ts b/src/lib/server/search/searxng.ts index fad6aa8..e6dc587 100644 --- a/src/lib/server/search/searxng.ts +++ b/src/lib/server/search/searxng.ts @@ -331,11 +331,23 @@ export async function searchWeb( const allowed = new Set(domains); const seen = new Set(); const hits: WebHit[] = []; + let dropNonWhitelist = 0; + let dropNonRecipeUrl = 0; + let dropDup = 0; for (const r of results) { const host = hostnameFromUrl(r.url); - if (!host || !allowed.has(host)) continue; - if (!looksLikeRecipePage(r.url)) continue; - if (seen.has(r.url)) continue; + if (!host || !allowed.has(host)) { + dropNonWhitelist += 1; + continue; + } + if (!looksLikeRecipePage(r.url)) { + dropNonRecipeUrl += 1; + continue; + } + if (seen.has(r.url)) { + dropDup += 1; + continue; + } seen.add(r.url); hits.push({ url: r.url, @@ -346,8 +358,15 @@ export async function searchWeb( }); if (hits.length >= limit) break; } + console.log( + `[searxng] q=${JSON.stringify(trimmed)} pageno=${pageno} domains=${domains.length} raw=${results.length} non_whitelist=${dropNonWhitelist} non_recipe_url=${dropNonRecipeUrl} dup=${dropDup} kept_pre_enrich=${hits.length}` + ); if (opts.enrichThumbnails !== false) { - return await enrichAndFilterHits(db, hits); + const enriched = await enrichAndFilterHits(db, hits); + console.log( + `[searxng] q=${JSON.stringify(trimmed)} pageno=${pageno} enrich=${hits.length} dropped_non_recipe=${hits.length - enriched.length} final=${enriched.length}` + ); + return enriched; } return hits; }