diff --git a/src/lib/server/search/searxng.ts b/src/lib/server/search/searxng.ts new file mode 100644 index 0000000..fb412c2 --- /dev/null +++ b/src/lib/server/search/searxng.ts @@ -0,0 +1,78 @@ +import type Database from 'better-sqlite3'; +import { listDomains, normalizeDomain } from '../domains/repository'; +import { fetchText } from '../http'; + +export type WebHit = { + url: string; + title: string; + domain: string; + snippet: string | null; + thumbnail: string | null; +}; + +type SearxngResult = { + url: string; + title: string; + content?: string; + thumbnail?: string; + img_src?: string; +}; + +type SearxngResponse = { + results?: SearxngResult[]; +}; + +function hostnameFromUrl(url: string): string | null { + try { + return normalizeDomain(new URL(url).hostname); + } catch { + return null; + } +} + +export async function searchWeb( + db: Database.Database, + query: string, + opts: { searxngUrl?: string; limit?: number } = {} +): Promise { + const trimmed = query.trim(); + if (!trimmed) return []; + const domains = listDomains(db).map((d) => d.domain); + if (domains.length === 0) return []; + + const searxngUrl = opts.searxngUrl ?? process.env.SEARXNG_URL ?? 'http://localhost:8888'; + const limit = opts.limit ?? 20; + const siteFilter = domains.map((d) => `site:${d}`).join(' OR '); + const q = `${trimmed} (${siteFilter})`; + const endpoint = new URL('/search', searxngUrl); + endpoint.searchParams.set('q', q); + endpoint.searchParams.set('format', 'json'); + endpoint.searchParams.set('language', 'de'); + + const body = await fetchText(endpoint.toString(), { timeoutMs: 15_000 }); + let parsed: SearxngResponse; + try { + parsed = JSON.parse(body) as SearxngResponse; + } catch { + throw new Error('SearXNG did not return JSON'); + } + const results = parsed.results ?? []; + const allowed = new Set(domains); + const seen = new Set(); + const hits: WebHit[] = []; + for (const r of results) { + const host = hostnameFromUrl(r.url); + if (!host || !allowed.has(host)) continue; + if (seen.has(r.url)) continue; + seen.add(r.url); + hits.push({ + url: r.url, + title: r.title, + domain: host, + snippet: r.content ?? null, + thumbnail: r.thumbnail ?? r.img_src ?? null + }); + if (hits.length >= limit) break; + } + return hits; +} diff --git a/src/routes/api/recipes/search/web/+server.ts b/src/routes/api/recipes/search/web/+server.ts new file mode 100644 index 0000000..f0ba7b7 --- /dev/null +++ b/src/routes/api/recipes/search/web/+server.ts @@ -0,0 +1,15 @@ +import type { RequestHandler } from './$types'; +import { json, error } from '@sveltejs/kit'; +import { getDb } from '$lib/server/db'; +import { searchWeb } from '$lib/server/search/searxng'; + +export const GET: RequestHandler = async ({ url }) => { + const q = url.searchParams.get('q')?.trim() ?? ''; + if (!q) error(400, { message: 'Missing ?q=' }); + try { + const hits = await searchWeb(getDb(), q); + return json({ query: q, hits }); + } catch (e) { + error(502, { message: `Web search unavailable: ${(e as Error).message}` }); + } +}; diff --git a/tests/integration/searxng.test.ts b/tests/integration/searxng.test.ts new file mode 100644 index 0000000..5dd9ba3 --- /dev/null +++ b/tests/integration/searxng.test.ts @@ -0,0 +1,74 @@ +import { describe, it, expect, beforeEach, afterEach } from 'vitest'; +import { createServer, type Server } from 'node:http'; +import type { AddressInfo } from 'node:net'; +import { openInMemoryForTest } from '../../src/lib/server/db'; +import { addDomain } from '../../src/lib/server/domains/repository'; +import { searchWeb } from '../../src/lib/server/search/searxng'; + +let server: Server; +let baseUrl: string; + +function respondWith(results: Record[]) { + server.on('request', (_req, res) => { + res.writeHead(200, { 'content-type': 'application/json' }); + res.end(JSON.stringify({ results })); + }); +} + +beforeEach(async () => { + server = createServer(); + await new Promise((r) => server.listen(0, '127.0.0.1', r)); + const addr = server.address() as AddressInfo; + baseUrl = `http://127.0.0.1:${addr.port}`; +}); + +afterEach(async () => { + await new Promise((r) => server.close(() => r())); +}); + +describe('searchWeb', () => { + it('filters results by whitelist', async () => { + const db = openInMemoryForTest(); + addDomain(db, 'chefkoch.de'); + respondWith([ + { + url: 'https://www.chefkoch.de/rezepte/123/a.html', + title: 'Carbonara', + content: 'Pasta' + }, + { + url: 'https://fake.de/x', + title: 'Not allowed', + content: 'blocked' + } + ]); + const hits = await searchWeb(db, 'carbonara', { searxngUrl: baseUrl }); + expect(hits.length).toBe(1); + expect(hits[0].domain).toBe('chefkoch.de'); + expect(hits[0].title).toBe('Carbonara'); + }); + + it('dedupes identical URLs', async () => { + const db = openInMemoryForTest(); + addDomain(db, 'chefkoch.de'); + respondWith([ + { url: 'https://www.chefkoch.de/a', title: 'A', content: '' }, + { url: 'https://www.chefkoch.de/a', title: 'A dup', content: '' } + ]); + const hits = await searchWeb(db, 'a', { searxngUrl: baseUrl }); + expect(hits.length).toBe(1); + }); + + it('returns empty list when no domains configured', async () => { + const db = openInMemoryForTest(); + const hits = await searchWeb(db, 'x', { searxngUrl: baseUrl }); + expect(hits).toEqual([]); + }); + + it('returns empty for empty query', async () => { + const db = openInMemoryForTest(); + addDomain(db, 'chefkoch.de'); + const hits = await searchWeb(db, ' ', { searxngUrl: baseUrl }); + expect(hits).toEqual([]); + }); +});