import type Database from 'better-sqlite3'; import { listDomains, normalizeDomain } from '../domains/repository'; import { fetchText } from '../http'; export type WebHit = { url: string; title: string; domain: string; snippet: string | null; thumbnail: string | null; }; type SearxngResult = { url: string; title: string; content?: string; thumbnail?: string; img_src?: string; }; type SearxngResponse = { results?: SearxngResult[]; }; function hostnameFromUrl(url: string): string | null { try { return normalizeDomain(new URL(url).hostname); } catch { return null; } } export async function searchWeb( db: Database.Database, query: string, opts: { searxngUrl?: string; limit?: number } = {} ): Promise { const trimmed = query.trim(); if (!trimmed) return []; const domains = listDomains(db).map((d) => d.domain); if (domains.length === 0) return []; const searxngUrl = opts.searxngUrl ?? process.env.SEARXNG_URL ?? 'http://localhost:8888'; const limit = opts.limit ?? 20; const siteFilter = domains.map((d) => `site:${d}`).join(' OR '); const q = `${trimmed} (${siteFilter})`; const endpoint = new URL('/search', searxngUrl); endpoint.searchParams.set('q', q); endpoint.searchParams.set('format', 'json'); endpoint.searchParams.set('language', 'de'); const body = await fetchText(endpoint.toString(), { timeoutMs: 15_000 }); let parsed: SearxngResponse; try { parsed = JSON.parse(body) as SearxngResponse; } catch { throw new Error('SearXNG did not return JSON'); } const results = parsed.results ?? []; const allowed = new Set(domains); const seen = new Set(); const hits: WebHit[] = []; for (const r of results) { const host = hostnameFromUrl(r.url); if (!host || !allowed.has(host)) continue; if (seen.has(r.url)) continue; seen.add(r.url); hits.push({ url: r.url, title: r.title, domain: host, snippet: r.content ?? null, thumbnail: r.thumbnail ?? r.img_src ?? null }); if (hits.length >= limit) break; } return hits; }