feat(search): add SearXNG client with whitelist-filtered web search
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
78
src/lib/server/search/searxng.ts
Normal file
78
src/lib/server/search/searxng.ts
Normal file
@@ -0,0 +1,78 @@
|
||||
import type Database from 'better-sqlite3';
|
||||
import { listDomains, normalizeDomain } from '../domains/repository';
|
||||
import { fetchText } from '../http';
|
||||
|
||||
export type WebHit = {
|
||||
url: string;
|
||||
title: string;
|
||||
domain: string;
|
||||
snippet: string | null;
|
||||
thumbnail: string | null;
|
||||
};
|
||||
|
||||
type SearxngResult = {
|
||||
url: string;
|
||||
title: string;
|
||||
content?: string;
|
||||
thumbnail?: string;
|
||||
img_src?: string;
|
||||
};
|
||||
|
||||
type SearxngResponse = {
|
||||
results?: SearxngResult[];
|
||||
};
|
||||
|
||||
function hostnameFromUrl(url: string): string | null {
|
||||
try {
|
||||
return normalizeDomain(new URL(url).hostname);
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
export async function searchWeb(
|
||||
db: Database.Database,
|
||||
query: string,
|
||||
opts: { searxngUrl?: string; limit?: number } = {}
|
||||
): Promise<WebHit[]> {
|
||||
const trimmed = query.trim();
|
||||
if (!trimmed) return [];
|
||||
const domains = listDomains(db).map((d) => d.domain);
|
||||
if (domains.length === 0) return [];
|
||||
|
||||
const searxngUrl = opts.searxngUrl ?? process.env.SEARXNG_URL ?? 'http://localhost:8888';
|
||||
const limit = opts.limit ?? 20;
|
||||
const siteFilter = domains.map((d) => `site:${d}`).join(' OR ');
|
||||
const q = `${trimmed} (${siteFilter})`;
|
||||
const endpoint = new URL('/search', searxngUrl);
|
||||
endpoint.searchParams.set('q', q);
|
||||
endpoint.searchParams.set('format', 'json');
|
||||
endpoint.searchParams.set('language', 'de');
|
||||
|
||||
const body = await fetchText(endpoint.toString(), { timeoutMs: 15_000 });
|
||||
let parsed: SearxngResponse;
|
||||
try {
|
||||
parsed = JSON.parse(body) as SearxngResponse;
|
||||
} catch {
|
||||
throw new Error('SearXNG did not return JSON');
|
||||
}
|
||||
const results = parsed.results ?? [];
|
||||
const allowed = new Set(domains);
|
||||
const seen = new Set<string>();
|
||||
const hits: WebHit[] = [];
|
||||
for (const r of results) {
|
||||
const host = hostnameFromUrl(r.url);
|
||||
if (!host || !allowed.has(host)) continue;
|
||||
if (seen.has(r.url)) continue;
|
||||
seen.add(r.url);
|
||||
hits.push({
|
||||
url: r.url,
|
||||
title: r.title,
|
||||
domain: host,
|
||||
snippet: r.content ?? null,
|
||||
thumbnail: r.thumbnail ?? r.img_src ?? null
|
||||
});
|
||||
if (hits.length >= limit) break;
|
||||
}
|
||||
return hits;
|
||||
}
|
||||
Reference in New Issue
Block a user