Blocks common non-recipe paths like /forum/, /magazin/, /suche/, /themen/, Chefkoch's /rs/s\d+/ search URLs and /Rezepte.html listings. Before: 'ravioli' search returned forum threads and listing pages that triggered 'No schema.org/Recipe JSON-LD' on preview. After: only real recipe URLs pass through. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
91 lines
3.1 KiB
TypeScript
91 lines
3.1 KiB
TypeScript
import { describe, it, expect, beforeEach, afterEach } from 'vitest';
|
|
import { createServer, type Server } from 'node:http';
|
|
import type { AddressInfo } from 'node:net';
|
|
import { openInMemoryForTest } from '../../src/lib/server/db';
|
|
import { addDomain } from '../../src/lib/server/domains/repository';
|
|
import { searchWeb } from '../../src/lib/server/search/searxng';
|
|
|
|
let server: Server;
|
|
let baseUrl: string;
|
|
|
|
function respondWith(results: Record<string, unknown>[]) {
|
|
server.on('request', (_req, res) => {
|
|
res.writeHead(200, { 'content-type': 'application/json' });
|
|
res.end(JSON.stringify({ results }));
|
|
});
|
|
}
|
|
|
|
beforeEach(async () => {
|
|
server = createServer();
|
|
await new Promise<void>((r) => server.listen(0, '127.0.0.1', r));
|
|
const addr = server.address() as AddressInfo;
|
|
baseUrl = `http://127.0.0.1:${addr.port}`;
|
|
});
|
|
|
|
afterEach(async () => {
|
|
await new Promise<void>((r) => server.close(() => r()));
|
|
});
|
|
|
|
describe('searchWeb', () => {
|
|
it('filters results by whitelist', async () => {
|
|
const db = openInMemoryForTest();
|
|
addDomain(db, 'chefkoch.de');
|
|
respondWith([
|
|
{
|
|
url: 'https://www.chefkoch.de/rezepte/123/a.html',
|
|
title: 'Carbonara',
|
|
content: 'Pasta'
|
|
},
|
|
{
|
|
url: 'https://fake.de/x',
|
|
title: 'Not allowed',
|
|
content: 'blocked'
|
|
}
|
|
]);
|
|
const hits = await searchWeb(db, 'carbonara', { searxngUrl: baseUrl });
|
|
expect(hits.length).toBe(1);
|
|
expect(hits[0].domain).toBe('chefkoch.de');
|
|
expect(hits[0].title).toBe('Carbonara');
|
|
});
|
|
|
|
it('dedupes identical URLs', async () => {
|
|
const db = openInMemoryForTest();
|
|
addDomain(db, 'chefkoch.de');
|
|
respondWith([
|
|
{ url: 'https://www.chefkoch.de/a', title: 'A', content: '' },
|
|
{ url: 'https://www.chefkoch.de/a', title: 'A dup', content: '' }
|
|
]);
|
|
const hits = await searchWeb(db, 'a', { searxngUrl: baseUrl });
|
|
expect(hits.length).toBe(1);
|
|
});
|
|
|
|
it('returns empty list when no domains configured', async () => {
|
|
const db = openInMemoryForTest();
|
|
const hits = await searchWeb(db, 'x', { searxngUrl: baseUrl });
|
|
expect(hits).toEqual([]);
|
|
});
|
|
|
|
it('returns empty for empty query', async () => {
|
|
const db = openInMemoryForTest();
|
|
addDomain(db, 'chefkoch.de');
|
|
const hits = await searchWeb(db, ' ', { searxngUrl: baseUrl });
|
|
expect(hits).toEqual([]);
|
|
});
|
|
|
|
it('filters out forum/magazine/listing URLs', async () => {
|
|
const db = openInMemoryForTest();
|
|
addDomain(db, 'chefkoch.de');
|
|
respondWith([
|
|
{ url: 'https://www.chefkoch.de/rezepte/123/Ravioli.html', title: 'Ravioli' },
|
|
{ url: 'https://www.chefkoch.de/forum/2,17,89865/ravioli.html', title: 'Forum Ravioli' },
|
|
{ url: 'https://www.chefkoch.de/magazin/artikel/x.html', title: 'Magazin' },
|
|
{ url: 'https://www.chefkoch.de/suche/ravioli', title: 'Suche' },
|
|
{ url: 'https://www.chefkoch.de/themen/ravioli/', title: 'Themen' },
|
|
{ url: 'https://www.chefkoch.de/rezepte/', title: 'Rezepte Übersicht' }
|
|
]);
|
|
const hits = await searchWeb(db, 'ravioli', { searxngUrl: baseUrl });
|
|
expect(hits.length).toBe(1);
|
|
expect(hits[0].title).toBe('Ravioli');
|
|
});
|
|
});
|