import { describe, it, expect, beforeEach, afterEach } from 'vitest'; import { createServer, type Server } from 'node:http'; import type { AddressInfo } from 'node:net'; import { openInMemoryForTest } from '../../src/lib/server/db'; import { addDomain } from '../../src/lib/server/domains/repository'; import { searchWeb } from '../../src/lib/server/search/searxng'; let server: Server; let baseUrl: string; function respondWith(results: Record[]) { server.on('request', (_req, res) => { res.writeHead(200, { 'content-type': 'application/json' }); res.end(JSON.stringify({ results })); }); } beforeEach(async () => { server = createServer(); await new Promise((r) => server.listen(0, '127.0.0.1', r)); const addr = server.address() as AddressInfo; baseUrl = `http://127.0.0.1:${addr.port}`; }); afterEach(async () => { await new Promise((r) => server.close(() => r())); }); describe('searchWeb', () => { it('filters results by whitelist', async () => { const db = openInMemoryForTest(); addDomain(db, 'chefkoch.de'); respondWith([ { url: 'https://www.chefkoch.de/rezepte/123/a.html', title: 'Carbonara', content: 'Pasta' }, { url: 'https://fake.de/x', title: 'Not allowed', content: 'blocked' } ]); const hits = await searchWeb(db, 'carbonara', { searxngUrl: baseUrl, enrichThumbnails: false }); expect(hits.length).toBe(1); expect(hits[0].domain).toBe('chefkoch.de'); expect(hits[0].title).toBe('Carbonara'); }); it('dedupes identical URLs', async () => { const db = openInMemoryForTest(); addDomain(db, 'chefkoch.de'); respondWith([ { url: 'https://www.chefkoch.de/a', title: 'A', content: '' }, { url: 'https://www.chefkoch.de/a', title: 'A dup', content: '' } ]); const hits = await searchWeb(db, 'a', { searxngUrl: baseUrl, enrichThumbnails: false }); expect(hits.length).toBe(1); }); it('returns empty list when no domains configured', async () => { const db = openInMemoryForTest(); const hits = await searchWeb(db, 'x', { searxngUrl: baseUrl, enrichThumbnails: false }); expect(hits).toEqual([]); }); it('returns empty for empty query', async () => { const db = openInMemoryForTest(); addDomain(db, 'chefkoch.de'); const hits = await searchWeb(db, ' ', { searxngUrl: baseUrl, enrichThumbnails: false }); expect(hits).toEqual([]); }); it('domain filter restricts site:-query to supplied subset', async () => { const db = openInMemoryForTest(); addDomain(db, 'chefkoch.de'); addDomain(db, 'rezeptwelt.de'); let receivedQ: string | null = null; server.on('request', (req, res) => { const u = new URL(req.url ?? '/', 'http://localhost'); receivedQ = u.searchParams.get('q'); res.writeHead(200, { 'content-type': 'application/json' }); res.end(JSON.stringify({ results: [] })); }); await searchWeb(db, 'apfel', { searxngUrl: baseUrl, enrichThumbnails: false, domains: ['rezeptwelt.de'] }); expect(receivedQ).toMatch(/site:rezeptwelt\.de/); expect(receivedQ).not.toMatch(/site:chefkoch\.de/); }); it('ignores domain filter entries that are not in whitelist', async () => { const db = openInMemoryForTest(); addDomain(db, 'chefkoch.de'); let receivedQ: string | null = null; server.on('request', (req, res) => { const u = new URL(req.url ?? '/', 'http://localhost'); receivedQ = u.searchParams.get('q'); res.writeHead(200, { 'content-type': 'application/json' }); res.end(JSON.stringify({ results: [] })); }); // Only "evil.com" requested — not in whitelist → fall back to full whitelist. await searchWeb(db, 'x', { searxngUrl: baseUrl, enrichThumbnails: false, domains: ['evil.com'] }); expect(receivedQ).toMatch(/site:chefkoch\.de/); expect(receivedQ).not.toMatch(/site:evil\.com/); }); it('passes pageno to SearXNG when > 1', async () => { const db = openInMemoryForTest(); addDomain(db, 'chefkoch.de'); let receivedPageno: string | null = 'not set'; server.on('request', (req, res) => { const u = new URL(req.url ?? '/', 'http://localhost'); receivedPageno = u.searchParams.get('pageno'); res.writeHead(200, { 'content-type': 'application/json' }); res.end(JSON.stringify({ results: [] })); }); await searchWeb(db, 'x', { searxngUrl: baseUrl, enrichThumbnails: false, pageno: 3 }); expect(receivedPageno).toBe('3'); }); it('omits pageno param when 1', async () => { const db = openInMemoryForTest(); addDomain(db, 'chefkoch.de'); let receivedPageno: string | null = 'not set'; server.on('request', (req, res) => { const u = new URL(req.url ?? '/', 'http://localhost'); receivedPageno = u.searchParams.get('pageno'); res.writeHead(200, { 'content-type': 'application/json' }); res.end(JSON.stringify({ results: [] })); }); await searchWeb(db, 'x', { searxngUrl: baseUrl, enrichThumbnails: false }); expect(receivedPageno).toBe(null); }); it('drops hits whose page lacks a Recipe JSON-LD', async () => { const pageServer = createServer((req, res) => { res.writeHead(200, { 'content-type': 'text/html; charset=utf-8' }); if (req.url === '/with-recipe') { res.end(` `); } else { // forum page: no Recipe JSON-LD res.end('ForumDiskussion'); } }); await new Promise((r) => pageServer.listen(0, '127.0.0.1', r)); const addr = pageServer.address() as AddressInfo; try { const db = openInMemoryForTest(); addDomain(db, '127.0.0.1'); respondWith([ { url: `http://127.0.0.1:${addr.port}/with-recipe`, title: 'Recipe', content: '' }, { url: `http://127.0.0.1:${addr.port}/forum-thread`, title: 'Forum', content: '' } ]); const hits = await searchWeb(db, 'x', { searxngUrl: baseUrl }); expect(hits.length).toBe(1); expect(hits[0].url.endsWith('/with-recipe')).toBe(true); } finally { await new Promise((r) => pageServer.close(() => r())); } }); it('keeps hit when page fetch fails (unknown recipe status)', async () => { const db = openInMemoryForTest(); addDomain(db, '127.0.0.1'); // URL points to a port nobody listens on → fetch fails respondWith([ { url: 'http://127.0.0.1:1/unreachable', title: 'Unreachable', content: '' } ]); const hits = await searchWeb(db, 'x', { searxngUrl: baseUrl }); expect(hits.length).toBe(1); }); // Minimal Recipe-JSON-LD stub so enrichAndFilterHits doesn't drop test hits // as non-recipe pages. Used in tests that focus on thumbnail extraction. const RECIPE_LD = ``; it('enriches missing thumbnails from og:image', async () => { const pageServer = createServer((_req, res) => { res.writeHead(200, { 'content-type': 'text/html; charset=utf-8' }); res.end( `${RECIPE_LD}` ); }); await new Promise((r) => pageServer.listen(0, '127.0.0.1', r)); const addr = pageServer.address() as AddressInfo; const pageUrl = `http://127.0.0.1:${addr.port}/rezept`; try { const db = openInMemoryForTest(); addDomain(db, '127.0.0.1'); respondWith([{ url: pageUrl, title: 'Kuchen', content: '' }]); const hits = await searchWeb(db, 'kuchen', { searxngUrl: baseUrl }); expect(hits.length).toBe(1); expect(hits[0].thumbnail).toBe('https://cdn.example/foo.jpg'); } finally { await new Promise((r) => pageServer.close(() => r())); } }); it('falls back to JSON-LD image when no og:image', async () => { const pageServer = createServer((_req, res) => { res.writeHead(200, { 'content-type': 'text/html; charset=utf-8' }); res.end(` `); }); await new Promise((r) => pageServer.listen(0, '127.0.0.1', r)); const addr = pageServer.address() as AddressInfo; const pageUrl = `http://127.0.0.1:${addr.port}/pie`; try { const db = openInMemoryForTest(); addDomain(db, '127.0.0.1'); respondWith([{ url: pageUrl, title: 'Pie', content: '' }]); const hits = await searchWeb(db, 'pie', { searxngUrl: baseUrl }); expect(hits[0].thumbnail).toBe('https://cdn.example/pie.jpg'); } finally { await new Promise((r) => pageServer.close(() => r())); } }); it('falls back to first content image when no meta/JSON-LD image', async () => { const pageServer = createServer((_req, res) => { res.writeHead(200, { 'content-type': 'text/html; charset=utf-8' }); res.end( `${RECIPE_LD}
` ); }); await new Promise((r) => pageServer.listen(0, '127.0.0.1', r)); const addr = pageServer.address() as AddressInfo; const pageUrl = `http://127.0.0.1:${addr.port}/article`; try { const db = openInMemoryForTest(); addDomain(db, '127.0.0.1'); respondWith([{ url: pageUrl, title: 'Dish', content: '' }]); const hits = await searchWeb(db, 'dish', { searxngUrl: baseUrl }); expect(hits[0].thumbnail).toBe(`http://127.0.0.1:${addr.port}/uploads/dish.jpg`); } finally { await new Promise((r) => pageServer.close(() => r())); } }); it('upgrades low-res SearXNG thumbnail with HQ og:image from page', async () => { const pageServer = createServer((_req, res) => { res.writeHead(200, { 'content-type': 'text/html; charset=utf-8' }); res.end( `${RECIPE_LD}` ); }); await new Promise((r) => pageServer.listen(0, '127.0.0.1', r)); const addr = pageServer.address() as AddressInfo; const pageUrl = `http://127.0.0.1:${addr.port}/dish`; try { const db = openInMemoryForTest(); addDomain(db, '127.0.0.1'); respondWith([ { url: pageUrl, title: 'Dish', thumbnail: 'https://searxng-cdn/small-thumb.jpg' } ]); const hits = await searchWeb(db, 'dish', { searxngUrl: baseUrl }); expect(hits[0].thumbnail).toBe('https://cdn.example/hq.jpg'); } finally { await new Promise((r) => pageServer.close(() => r())); } }); it('keeps SearXNG thumbnail when page has no image', async () => { const pageServer = createServer((_req, res) => { res.writeHead(200, { 'content-type': 'text/html; charset=utf-8' }); res.end(`${RECIPE_LD}no images here`); }); await new Promise((r) => pageServer.listen(0, '127.0.0.1', r)); const addr = pageServer.address() as AddressInfo; const pageUrl = `http://127.0.0.1:${addr.port}/noimg`; try { const db = openInMemoryForTest(); addDomain(db, '127.0.0.1'); respondWith([ { url: pageUrl, title: 'X', thumbnail: 'https://searxng-cdn/fallback.jpg' } ]); const hits = await searchWeb(db, 'x', { searxngUrl: baseUrl }); expect(hits[0].thumbnail).toBe('https://searxng-cdn/fallback.jpg'); } finally { await new Promise((r) => pageServer.close(() => r())); } }); it('SQLite cache: second search does not re-fetch the page', async () => { let pageHits = 0; const pageServer = createServer((_req, res) => { pageHits += 1; res.writeHead(200, { 'content-type': 'text/html; charset=utf-8' }); res.end(`${RECIPE_LD}`); }); await new Promise((r) => pageServer.listen(0, '127.0.0.1', r)); const addr = pageServer.address() as AddressInfo; const pageUrl = `http://127.0.0.1:${addr.port}/cached`; try { const db = openInMemoryForTest(); addDomain(db, '127.0.0.1'); respondWith([{ url: pageUrl, title: 'C', content: '' }]); const first = await searchWeb(db, 'c', { searxngUrl: baseUrl }); const second = await searchWeb(db, 'c', { searxngUrl: baseUrl }); expect(first[0].thumbnail).toBe('https://cdn.example/c.jpg'); expect(second[0].thumbnail).toBe('https://cdn.example/c.jpg'); expect(pageHits).toBe(1); // second call read from SQLite cache const row = db .prepare('SELECT image FROM thumbnail_cache WHERE url = ?') .get(pageUrl) as { image: string }; expect(row.image).toBe('https://cdn.example/c.jpg'); } finally { await new Promise((r) => pageServer.close(() => r())); } }); it('filters out forum/magazine/listing URLs', async () => { const db = openInMemoryForTest(); addDomain(db, 'chefkoch.de'); respondWith([ { url: 'https://www.chefkoch.de/rezepte/123/Ravioli.html', title: 'Ravioli' }, { url: 'https://www.chefkoch.de/forum/2,17,89865/ravioli.html', title: 'Forum Ravioli' }, { url: 'https://www.chefkoch.de/magazin/artikel/x.html', title: 'Magazin' }, { url: 'https://www.chefkoch.de/suche/ravioli', title: 'Suche' }, { url: 'https://www.chefkoch.de/themen/ravioli/', title: 'Themen' }, { url: 'https://www.chefkoch.de/rezepte/', title: 'Rezepte Übersicht' } ]); const hits = await searchWeb(db, 'ravioli', { searxngUrl: baseUrl, enrichThumbnails: false }); expect(hits.length).toBe(1); expect(hits[0].title).toBe('Ravioli'); }); });