feat(search): add SearXNG client with whitelist-filtered web search
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
78
src/lib/server/search/searxng.ts
Normal file
78
src/lib/server/search/searxng.ts
Normal file
@@ -0,0 +1,78 @@
|
|||||||
|
import type Database from 'better-sqlite3';
|
||||||
|
import { listDomains, normalizeDomain } from '../domains/repository';
|
||||||
|
import { fetchText } from '../http';
|
||||||
|
|
||||||
|
export type WebHit = {
|
||||||
|
url: string;
|
||||||
|
title: string;
|
||||||
|
domain: string;
|
||||||
|
snippet: string | null;
|
||||||
|
thumbnail: string | null;
|
||||||
|
};
|
||||||
|
|
||||||
|
type SearxngResult = {
|
||||||
|
url: string;
|
||||||
|
title: string;
|
||||||
|
content?: string;
|
||||||
|
thumbnail?: string;
|
||||||
|
img_src?: string;
|
||||||
|
};
|
||||||
|
|
||||||
|
type SearxngResponse = {
|
||||||
|
results?: SearxngResult[];
|
||||||
|
};
|
||||||
|
|
||||||
|
function hostnameFromUrl(url: string): string | null {
|
||||||
|
try {
|
||||||
|
return normalizeDomain(new URL(url).hostname);
|
||||||
|
} catch {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function searchWeb(
|
||||||
|
db: Database.Database,
|
||||||
|
query: string,
|
||||||
|
opts: { searxngUrl?: string; limit?: number } = {}
|
||||||
|
): Promise<WebHit[]> {
|
||||||
|
const trimmed = query.trim();
|
||||||
|
if (!trimmed) return [];
|
||||||
|
const domains = listDomains(db).map((d) => d.domain);
|
||||||
|
if (domains.length === 0) return [];
|
||||||
|
|
||||||
|
const searxngUrl = opts.searxngUrl ?? process.env.SEARXNG_URL ?? 'http://localhost:8888';
|
||||||
|
const limit = opts.limit ?? 20;
|
||||||
|
const siteFilter = domains.map((d) => `site:${d}`).join(' OR ');
|
||||||
|
const q = `${trimmed} (${siteFilter})`;
|
||||||
|
const endpoint = new URL('/search', searxngUrl);
|
||||||
|
endpoint.searchParams.set('q', q);
|
||||||
|
endpoint.searchParams.set('format', 'json');
|
||||||
|
endpoint.searchParams.set('language', 'de');
|
||||||
|
|
||||||
|
const body = await fetchText(endpoint.toString(), { timeoutMs: 15_000 });
|
||||||
|
let parsed: SearxngResponse;
|
||||||
|
try {
|
||||||
|
parsed = JSON.parse(body) as SearxngResponse;
|
||||||
|
} catch {
|
||||||
|
throw new Error('SearXNG did not return JSON');
|
||||||
|
}
|
||||||
|
const results = parsed.results ?? [];
|
||||||
|
const allowed = new Set(domains);
|
||||||
|
const seen = new Set<string>();
|
||||||
|
const hits: WebHit[] = [];
|
||||||
|
for (const r of results) {
|
||||||
|
const host = hostnameFromUrl(r.url);
|
||||||
|
if (!host || !allowed.has(host)) continue;
|
||||||
|
if (seen.has(r.url)) continue;
|
||||||
|
seen.add(r.url);
|
||||||
|
hits.push({
|
||||||
|
url: r.url,
|
||||||
|
title: r.title,
|
||||||
|
domain: host,
|
||||||
|
snippet: r.content ?? null,
|
||||||
|
thumbnail: r.thumbnail ?? r.img_src ?? null
|
||||||
|
});
|
||||||
|
if (hits.length >= limit) break;
|
||||||
|
}
|
||||||
|
return hits;
|
||||||
|
}
|
||||||
15
src/routes/api/recipes/search/web/+server.ts
Normal file
15
src/routes/api/recipes/search/web/+server.ts
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
import type { RequestHandler } from './$types';
|
||||||
|
import { json, error } from '@sveltejs/kit';
|
||||||
|
import { getDb } from '$lib/server/db';
|
||||||
|
import { searchWeb } from '$lib/server/search/searxng';
|
||||||
|
|
||||||
|
export const GET: RequestHandler = async ({ url }) => {
|
||||||
|
const q = url.searchParams.get('q')?.trim() ?? '';
|
||||||
|
if (!q) error(400, { message: 'Missing ?q=' });
|
||||||
|
try {
|
||||||
|
const hits = await searchWeb(getDb(), q);
|
||||||
|
return json({ query: q, hits });
|
||||||
|
} catch (e) {
|
||||||
|
error(502, { message: `Web search unavailable: ${(e as Error).message}` });
|
||||||
|
}
|
||||||
|
};
|
||||||
74
tests/integration/searxng.test.ts
Normal file
74
tests/integration/searxng.test.ts
Normal file
@@ -0,0 +1,74 @@
|
|||||||
|
import { describe, it, expect, beforeEach, afterEach } from 'vitest';
|
||||||
|
import { createServer, type Server } from 'node:http';
|
||||||
|
import type { AddressInfo } from 'node:net';
|
||||||
|
import { openInMemoryForTest } from '../../src/lib/server/db';
|
||||||
|
import { addDomain } from '../../src/lib/server/domains/repository';
|
||||||
|
import { searchWeb } from '../../src/lib/server/search/searxng';
|
||||||
|
|
||||||
|
let server: Server;
|
||||||
|
let baseUrl: string;
|
||||||
|
|
||||||
|
function respondWith(results: Record<string, unknown>[]) {
|
||||||
|
server.on('request', (_req, res) => {
|
||||||
|
res.writeHead(200, { 'content-type': 'application/json' });
|
||||||
|
res.end(JSON.stringify({ results }));
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
beforeEach(async () => {
|
||||||
|
server = createServer();
|
||||||
|
await new Promise<void>((r) => server.listen(0, '127.0.0.1', r));
|
||||||
|
const addr = server.address() as AddressInfo;
|
||||||
|
baseUrl = `http://127.0.0.1:${addr.port}`;
|
||||||
|
});
|
||||||
|
|
||||||
|
afterEach(async () => {
|
||||||
|
await new Promise<void>((r) => server.close(() => r()));
|
||||||
|
});
|
||||||
|
|
||||||
|
describe('searchWeb', () => {
|
||||||
|
it('filters results by whitelist', async () => {
|
||||||
|
const db = openInMemoryForTest();
|
||||||
|
addDomain(db, 'chefkoch.de');
|
||||||
|
respondWith([
|
||||||
|
{
|
||||||
|
url: 'https://www.chefkoch.de/rezepte/123/a.html',
|
||||||
|
title: 'Carbonara',
|
||||||
|
content: 'Pasta'
|
||||||
|
},
|
||||||
|
{
|
||||||
|
url: 'https://fake.de/x',
|
||||||
|
title: 'Not allowed',
|
||||||
|
content: 'blocked'
|
||||||
|
}
|
||||||
|
]);
|
||||||
|
const hits = await searchWeb(db, 'carbonara', { searxngUrl: baseUrl });
|
||||||
|
expect(hits.length).toBe(1);
|
||||||
|
expect(hits[0].domain).toBe('chefkoch.de');
|
||||||
|
expect(hits[0].title).toBe('Carbonara');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('dedupes identical URLs', async () => {
|
||||||
|
const db = openInMemoryForTest();
|
||||||
|
addDomain(db, 'chefkoch.de');
|
||||||
|
respondWith([
|
||||||
|
{ url: 'https://www.chefkoch.de/a', title: 'A', content: '' },
|
||||||
|
{ url: 'https://www.chefkoch.de/a', title: 'A dup', content: '' }
|
||||||
|
]);
|
||||||
|
const hits = await searchWeb(db, 'a', { searxngUrl: baseUrl });
|
||||||
|
expect(hits.length).toBe(1);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('returns empty list when no domains configured', async () => {
|
||||||
|
const db = openInMemoryForTest();
|
||||||
|
const hits = await searchWeb(db, 'x', { searxngUrl: baseUrl });
|
||||||
|
expect(hits).toEqual([]);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('returns empty for empty query', async () => {
|
||||||
|
const db = openInMemoryForTest();
|
||||||
|
addDomain(db, 'chefkoch.de');
|
||||||
|
const hits = await searchWeb(db, ' ', { searxngUrl: baseUrl });
|
||||||
|
expect(hits).toEqual([]);
|
||||||
|
});
|
||||||
|
});
|
||||||
Reference in New Issue
Block a user