2026-04-17 15:33:21 +02:00
|
|
|
import { describe, it, expect, beforeEach, afterEach } from 'vitest';
|
|
|
|
|
import { createServer, type Server } from 'node:http';
|
|
|
|
|
import type { AddressInfo } from 'node:net';
|
|
|
|
|
import { openInMemoryForTest } from '../../src/lib/server/db';
|
|
|
|
|
import { addDomain } from '../../src/lib/server/domains/repository';
|
|
|
|
|
import { searchWeb } from '../../src/lib/server/search/searxng';
|
|
|
|
|
|
|
|
|
|
let server: Server;
|
|
|
|
|
let baseUrl: string;
|
|
|
|
|
|
|
|
|
|
function respondWith(results: Record<string, unknown>[]) {
|
|
|
|
|
server.on('request', (_req, res) => {
|
|
|
|
|
res.writeHead(200, { 'content-type': 'application/json' });
|
|
|
|
|
res.end(JSON.stringify({ results }));
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
beforeEach(async () => {
|
|
|
|
|
server = createServer();
|
|
|
|
|
await new Promise<void>((r) => server.listen(0, '127.0.0.1', r));
|
|
|
|
|
const addr = server.address() as AddressInfo;
|
|
|
|
|
baseUrl = `http://127.0.0.1:${addr.port}`;
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
afterEach(async () => {
|
|
|
|
|
await new Promise<void>((r) => server.close(() => r()));
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
describe('searchWeb', () => {
|
|
|
|
|
it('filters results by whitelist', async () => {
|
|
|
|
|
const db = openInMemoryForTest();
|
|
|
|
|
addDomain(db, 'chefkoch.de');
|
|
|
|
|
respondWith([
|
|
|
|
|
{
|
|
|
|
|
url: 'https://www.chefkoch.de/rezepte/123/a.html',
|
|
|
|
|
title: 'Carbonara',
|
|
|
|
|
content: 'Pasta'
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
url: 'https://fake.de/x',
|
|
|
|
|
title: 'Not allowed',
|
|
|
|
|
content: 'blocked'
|
|
|
|
|
}
|
|
|
|
|
]);
|
2026-04-17 17:55:53 +02:00
|
|
|
const hits = await searchWeb(db, 'carbonara', { searxngUrl: baseUrl, enrichThumbnails: false });
|
2026-04-17 15:33:21 +02:00
|
|
|
expect(hits.length).toBe(1);
|
|
|
|
|
expect(hits[0].domain).toBe('chefkoch.de');
|
|
|
|
|
expect(hits[0].title).toBe('Carbonara');
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('dedupes identical URLs', async () => {
|
|
|
|
|
const db = openInMemoryForTest();
|
|
|
|
|
addDomain(db, 'chefkoch.de');
|
|
|
|
|
respondWith([
|
|
|
|
|
{ url: 'https://www.chefkoch.de/a', title: 'A', content: '' },
|
|
|
|
|
{ url: 'https://www.chefkoch.de/a', title: 'A dup', content: '' }
|
|
|
|
|
]);
|
2026-04-17 17:55:53 +02:00
|
|
|
const hits = await searchWeb(db, 'a', { searxngUrl: baseUrl, enrichThumbnails: false });
|
2026-04-17 15:33:21 +02:00
|
|
|
expect(hits.length).toBe(1);
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('returns empty list when no domains configured', async () => {
|
|
|
|
|
const db = openInMemoryForTest();
|
2026-04-17 17:55:53 +02:00
|
|
|
const hits = await searchWeb(db, 'x', { searxngUrl: baseUrl, enrichThumbnails: false });
|
2026-04-17 15:33:21 +02:00
|
|
|
expect(hits).toEqual([]);
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('returns empty for empty query', async () => {
|
|
|
|
|
const db = openInMemoryForTest();
|
|
|
|
|
addDomain(db, 'chefkoch.de');
|
2026-04-17 17:55:53 +02:00
|
|
|
const hits = await searchWeb(db, ' ', { searxngUrl: baseUrl, enrichThumbnails: false });
|
2026-04-17 15:33:21 +02:00
|
|
|
expect(hits).toEqual([]);
|
|
|
|
|
});
|
2026-04-18 08:13:33 +02:00
|
|
|
|
|
|
|
|
it('domain filter restricts site:-query to supplied subset', async () => {
|
|
|
|
|
const db = openInMemoryForTest();
|
|
|
|
|
addDomain(db, 'chefkoch.de');
|
|
|
|
|
addDomain(db, 'rezeptwelt.de');
|
|
|
|
|
let receivedQ: string | null = null;
|
|
|
|
|
server.on('request', (req, res) => {
|
|
|
|
|
const u = new URL(req.url ?? '/', 'http://localhost');
|
|
|
|
|
receivedQ = u.searchParams.get('q');
|
|
|
|
|
res.writeHead(200, { 'content-type': 'application/json' });
|
|
|
|
|
res.end(JSON.stringify({ results: [] }));
|
|
|
|
|
});
|
|
|
|
|
await searchWeb(db, 'apfel', {
|
|
|
|
|
searxngUrl: baseUrl,
|
|
|
|
|
enrichThumbnails: false,
|
|
|
|
|
domains: ['rezeptwelt.de']
|
|
|
|
|
});
|
|
|
|
|
expect(receivedQ).toMatch(/site:rezeptwelt\.de/);
|
|
|
|
|
expect(receivedQ).not.toMatch(/site:chefkoch\.de/);
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('ignores domain filter entries that are not in whitelist', async () => {
|
|
|
|
|
const db = openInMemoryForTest();
|
|
|
|
|
addDomain(db, 'chefkoch.de');
|
|
|
|
|
let receivedQ: string | null = null;
|
|
|
|
|
server.on('request', (req, res) => {
|
|
|
|
|
const u = new URL(req.url ?? '/', 'http://localhost');
|
|
|
|
|
receivedQ = u.searchParams.get('q');
|
|
|
|
|
res.writeHead(200, { 'content-type': 'application/json' });
|
|
|
|
|
res.end(JSON.stringify({ results: [] }));
|
|
|
|
|
});
|
|
|
|
|
// Only "evil.com" requested — not in whitelist → fall back to full whitelist.
|
|
|
|
|
await searchWeb(db, 'x', {
|
|
|
|
|
searxngUrl: baseUrl,
|
|
|
|
|
enrichThumbnails: false,
|
|
|
|
|
domains: ['evil.com']
|
|
|
|
|
});
|
|
|
|
|
expect(receivedQ).toMatch(/site:chefkoch\.de/);
|
|
|
|
|
expect(receivedQ).not.toMatch(/site:evil\.com/);
|
|
|
|
|
});
|
2026-04-17 15:47:28 +02:00
|
|
|
|
2026-04-17 21:58:47 +02:00
|
|
|
it('passes pageno to SearXNG when > 1', async () => {
|
|
|
|
|
const db = openInMemoryForTest();
|
|
|
|
|
addDomain(db, 'chefkoch.de');
|
|
|
|
|
let receivedPageno: string | null = 'not set';
|
|
|
|
|
server.on('request', (req, res) => {
|
|
|
|
|
const u = new URL(req.url ?? '/', 'http://localhost');
|
|
|
|
|
receivedPageno = u.searchParams.get('pageno');
|
|
|
|
|
res.writeHead(200, { 'content-type': 'application/json' });
|
|
|
|
|
res.end(JSON.stringify({ results: [] }));
|
|
|
|
|
});
|
|
|
|
|
await searchWeb(db, 'x', { searxngUrl: baseUrl, enrichThumbnails: false, pageno: 3 });
|
|
|
|
|
expect(receivedPageno).toBe('3');
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('omits pageno param when 1', async () => {
|
|
|
|
|
const db = openInMemoryForTest();
|
|
|
|
|
addDomain(db, 'chefkoch.de');
|
|
|
|
|
let receivedPageno: string | null = 'not set';
|
|
|
|
|
server.on('request', (req, res) => {
|
|
|
|
|
const u = new URL(req.url ?? '/', 'http://localhost');
|
|
|
|
|
receivedPageno = u.searchParams.get('pageno');
|
|
|
|
|
res.writeHead(200, { 'content-type': 'application/json' });
|
|
|
|
|
res.end(JSON.stringify({ results: [] }));
|
|
|
|
|
});
|
|
|
|
|
await searchWeb(db, 'x', { searxngUrl: baseUrl, enrichThumbnails: false });
|
|
|
|
|
expect(receivedPageno).toBe(null);
|
|
|
|
|
});
|
|
|
|
|
|
2026-04-17 22:20:22 +02:00
|
|
|
it('drops hits whose page lacks a Recipe JSON-LD', async () => {
|
|
|
|
|
const pageServer = createServer((req, res) => {
|
|
|
|
|
res.writeHead(200, { 'content-type': 'text/html; charset=utf-8' });
|
|
|
|
|
if (req.url === '/with-recipe') {
|
|
|
|
|
res.end(`<html><head>
|
|
|
|
|
<script type="application/ld+json">${JSON.stringify({
|
|
|
|
|
'@type': 'Recipe',
|
|
|
|
|
name: 'Pie',
|
|
|
|
|
image: 'https://cdn.example/pie.jpg'
|
|
|
|
|
})}</script>
|
|
|
|
|
</head></html>`);
|
|
|
|
|
} else {
|
|
|
|
|
// forum page: no Recipe JSON-LD
|
|
|
|
|
res.end('<html><head><title>Forum</title></head><body>Diskussion</body></html>');
|
|
|
|
|
}
|
|
|
|
|
});
|
|
|
|
|
await new Promise<void>((r) => pageServer.listen(0, '127.0.0.1', r));
|
|
|
|
|
const addr = pageServer.address() as AddressInfo;
|
|
|
|
|
try {
|
|
|
|
|
const db = openInMemoryForTest();
|
|
|
|
|
addDomain(db, '127.0.0.1');
|
|
|
|
|
respondWith([
|
|
|
|
|
{ url: `http://127.0.0.1:${addr.port}/with-recipe`, title: 'Recipe', content: '' },
|
|
|
|
|
{ url: `http://127.0.0.1:${addr.port}/forum-thread`, title: 'Forum', content: '' }
|
|
|
|
|
]);
|
|
|
|
|
const hits = await searchWeb(db, 'x', { searxngUrl: baseUrl });
|
|
|
|
|
expect(hits.length).toBe(1);
|
|
|
|
|
expect(hits[0].url.endsWith('/with-recipe')).toBe(true);
|
|
|
|
|
} finally {
|
|
|
|
|
await new Promise<void>((r) => pageServer.close(() => r()));
|
|
|
|
|
}
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('keeps hit when page fetch fails (unknown recipe status)', async () => {
|
|
|
|
|
const db = openInMemoryForTest();
|
|
|
|
|
addDomain(db, '127.0.0.1');
|
|
|
|
|
// URL points to a port nobody listens on → fetch fails
|
|
|
|
|
respondWith([
|
|
|
|
|
{ url: 'http://127.0.0.1:1/unreachable', title: 'Unreachable', content: '' }
|
|
|
|
|
]);
|
|
|
|
|
const hits = await searchWeb(db, 'x', { searxngUrl: baseUrl });
|
|
|
|
|
expect(hits.length).toBe(1);
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
// Minimal Recipe-JSON-LD stub so enrichAndFilterHits doesn't drop test hits
|
|
|
|
|
// as non-recipe pages. Used in tests that focus on thumbnail extraction.
|
|
|
|
|
const RECIPE_LD = `<script type="application/ld+json">${JSON.stringify({
|
|
|
|
|
'@type': 'Recipe',
|
|
|
|
|
name: 'stub'
|
|
|
|
|
})}</script>`;
|
|
|
|
|
|
2026-04-17 17:55:53 +02:00
|
|
|
it('enriches missing thumbnails from og:image', async () => {
|
|
|
|
|
const pageServer = createServer((_req, res) => {
|
|
|
|
|
res.writeHead(200, { 'content-type': 'text/html; charset=utf-8' });
|
|
|
|
|
res.end(
|
2026-04-17 22:20:22 +02:00
|
|
|
`<html><head><meta property="og:image" content="https://cdn.example/foo.jpg" />${RECIPE_LD}</head><body></body></html>`
|
2026-04-17 17:55:53 +02:00
|
|
|
);
|
|
|
|
|
});
|
|
|
|
|
await new Promise<void>((r) => pageServer.listen(0, '127.0.0.1', r));
|
|
|
|
|
const addr = pageServer.address() as AddressInfo;
|
|
|
|
|
const pageUrl = `http://127.0.0.1:${addr.port}/rezept`;
|
|
|
|
|
try {
|
|
|
|
|
const db = openInMemoryForTest();
|
|
|
|
|
addDomain(db, '127.0.0.1');
|
|
|
|
|
respondWith([{ url: pageUrl, title: 'Kuchen', content: '' }]);
|
|
|
|
|
const hits = await searchWeb(db, 'kuchen', { searxngUrl: baseUrl });
|
|
|
|
|
expect(hits.length).toBe(1);
|
|
|
|
|
expect(hits[0].thumbnail).toBe('https://cdn.example/foo.jpg');
|
|
|
|
|
} finally {
|
|
|
|
|
await new Promise<void>((r) => pageServer.close(() => r()));
|
|
|
|
|
}
|
|
|
|
|
});
|
|
|
|
|
|
2026-04-17 18:04:59 +02:00
|
|
|
it('falls back to JSON-LD image when no og:image', async () => {
|
|
|
|
|
const pageServer = createServer((_req, res) => {
|
|
|
|
|
res.writeHead(200, { 'content-type': 'text/html; charset=utf-8' });
|
|
|
|
|
res.end(`<html><head>
|
|
|
|
|
<script type="application/ld+json">${JSON.stringify({
|
|
|
|
|
'@type': 'Recipe',
|
|
|
|
|
name: 'Pie',
|
|
|
|
|
image: 'https://cdn.example/pie.jpg'
|
|
|
|
|
})}</script>
|
|
|
|
|
</head><body></body></html>`);
|
|
|
|
|
});
|
|
|
|
|
await new Promise<void>((r) => pageServer.listen(0, '127.0.0.1', r));
|
|
|
|
|
const addr = pageServer.address() as AddressInfo;
|
|
|
|
|
const pageUrl = `http://127.0.0.1:${addr.port}/pie`;
|
|
|
|
|
try {
|
|
|
|
|
const db = openInMemoryForTest();
|
|
|
|
|
addDomain(db, '127.0.0.1');
|
|
|
|
|
respondWith([{ url: pageUrl, title: 'Pie', content: '' }]);
|
|
|
|
|
const hits = await searchWeb(db, 'pie', { searxngUrl: baseUrl });
|
|
|
|
|
expect(hits[0].thumbnail).toBe('https://cdn.example/pie.jpg');
|
|
|
|
|
} finally {
|
|
|
|
|
await new Promise<void>((r) => pageServer.close(() => r()));
|
|
|
|
|
}
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('falls back to first content image when no meta/JSON-LD image', async () => {
|
|
|
|
|
const pageServer = createServer((_req, res) => {
|
|
|
|
|
res.writeHead(200, { 'content-type': 'text/html; charset=utf-8' });
|
|
|
|
|
res.end(
|
2026-04-17 22:20:22 +02:00
|
|
|
`<html><head>${RECIPE_LD}</head><body><article><img src="/uploads/dish.jpg" alt=""></article></body></html>`
|
2026-04-17 18:04:59 +02:00
|
|
|
);
|
|
|
|
|
});
|
|
|
|
|
await new Promise<void>((r) => pageServer.listen(0, '127.0.0.1', r));
|
|
|
|
|
const addr = pageServer.address() as AddressInfo;
|
|
|
|
|
const pageUrl = `http://127.0.0.1:${addr.port}/article`;
|
|
|
|
|
try {
|
|
|
|
|
const db = openInMemoryForTest();
|
|
|
|
|
addDomain(db, '127.0.0.1');
|
|
|
|
|
respondWith([{ url: pageUrl, title: 'Dish', content: '' }]);
|
|
|
|
|
const hits = await searchWeb(db, 'dish', { searxngUrl: baseUrl });
|
|
|
|
|
expect(hits[0].thumbnail).toBe(`http://127.0.0.1:${addr.port}/uploads/dish.jpg`);
|
|
|
|
|
} finally {
|
|
|
|
|
await new Promise<void>((r) => pageServer.close(() => r()));
|
|
|
|
|
}
|
|
|
|
|
});
|
|
|
|
|
|
2026-04-17 18:31:42 +02:00
|
|
|
it('upgrades low-res SearXNG thumbnail with HQ og:image from page', async () => {
|
|
|
|
|
const pageServer = createServer((_req, res) => {
|
|
|
|
|
res.writeHead(200, { 'content-type': 'text/html; charset=utf-8' });
|
|
|
|
|
res.end(
|
2026-04-17 22:20:22 +02:00
|
|
|
`<html><head><meta property="og:image" content="https://cdn.example/hq.jpg" />${RECIPE_LD}</head></html>`
|
2026-04-17 18:31:42 +02:00
|
|
|
);
|
|
|
|
|
});
|
|
|
|
|
await new Promise<void>((r) => pageServer.listen(0, '127.0.0.1', r));
|
|
|
|
|
const addr = pageServer.address() as AddressInfo;
|
|
|
|
|
const pageUrl = `http://127.0.0.1:${addr.port}/dish`;
|
|
|
|
|
try {
|
|
|
|
|
const db = openInMemoryForTest();
|
|
|
|
|
addDomain(db, '127.0.0.1');
|
|
|
|
|
respondWith([
|
|
|
|
|
{ url: pageUrl, title: 'Dish', thumbnail: 'https://searxng-cdn/small-thumb.jpg' }
|
|
|
|
|
]);
|
|
|
|
|
const hits = await searchWeb(db, 'dish', { searxngUrl: baseUrl });
|
|
|
|
|
expect(hits[0].thumbnail).toBe('https://cdn.example/hq.jpg');
|
|
|
|
|
} finally {
|
|
|
|
|
await new Promise<void>((r) => pageServer.close(() => r()));
|
|
|
|
|
}
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('keeps SearXNG thumbnail when page has no image', async () => {
|
|
|
|
|
const pageServer = createServer((_req, res) => {
|
|
|
|
|
res.writeHead(200, { 'content-type': 'text/html; charset=utf-8' });
|
2026-04-17 22:20:22 +02:00
|
|
|
res.end(`<html><head>${RECIPE_LD}</head><body>no images here</body></html>`);
|
2026-04-17 18:31:42 +02:00
|
|
|
});
|
|
|
|
|
await new Promise<void>((r) => pageServer.listen(0, '127.0.0.1', r));
|
|
|
|
|
const addr = pageServer.address() as AddressInfo;
|
|
|
|
|
const pageUrl = `http://127.0.0.1:${addr.port}/noimg`;
|
|
|
|
|
try {
|
|
|
|
|
const db = openInMemoryForTest();
|
|
|
|
|
addDomain(db, '127.0.0.1');
|
|
|
|
|
respondWith([
|
|
|
|
|
{ url: pageUrl, title: 'X', thumbnail: 'https://searxng-cdn/fallback.jpg' }
|
|
|
|
|
]);
|
|
|
|
|
const hits = await searchWeb(db, 'x', { searxngUrl: baseUrl });
|
|
|
|
|
expect(hits[0].thumbnail).toBe('https://searxng-cdn/fallback.jpg');
|
|
|
|
|
} finally {
|
|
|
|
|
await new Promise<void>((r) => pageServer.close(() => r()));
|
|
|
|
|
}
|
2026-04-17 17:55:53 +02:00
|
|
|
});
|
|
|
|
|
|
feat(search): persistenter Thumbnail-Cache in SQLite, Default-TTL 30 Tage
Vorher: In-Memory-Map, TTL 30 Minuten. Container-Neustart verwarf den
kompletten Cache, also musste nach jedem Deploy jede Suche wieder alle
Seiten laden.
Jetzt:
- Neue Tabelle thumbnail_cache (url PK, image, expires_at)
- Default-TTL 30 Tage, per Env KOCHWAS_THUMB_TTL_DAYS konfigurierbar
(7, 365, was der User will — is alles ok laut Nutzer)
- Negative Cache: Seiten ohne Bild werden mit image=NULL gespeichert,
damit wir nicht jede Suche die gleiche kaputte Seite wieder laden
- Lazy-Cleanup: pro searchWeb-Aufruf werden abgelaufene Zeilen via
DELETE ... WHERE expires_at <= now() weggeräumt (Index-Scan, billig)
Migration 003_thumbnail_cache.sql: nicht-destruktiv, nur neue Tabelle.
Bestehende DB bekommt sie beim nächsten Start automatisch dazu.
Tests (99/99):
- Neuer Cache-Test: zweiter searchWeb für dieselbe URL macht keinen
Page-Fetch mehr und liest die image-Spalte aus SQLite.
2026-04-17 18:34:29 +02:00
|
|
|
it('SQLite cache: second search does not re-fetch the page', async () => {
|
|
|
|
|
let pageHits = 0;
|
|
|
|
|
const pageServer = createServer((_req, res) => {
|
|
|
|
|
pageHits += 1;
|
|
|
|
|
res.writeHead(200, { 'content-type': 'text/html; charset=utf-8' });
|
2026-04-17 22:20:22 +02:00
|
|
|
res.end(`<html><head><meta property="og:image" content="https://cdn.example/c.jpg">${RECIPE_LD}</head></html>`);
|
feat(search): persistenter Thumbnail-Cache in SQLite, Default-TTL 30 Tage
Vorher: In-Memory-Map, TTL 30 Minuten. Container-Neustart verwarf den
kompletten Cache, also musste nach jedem Deploy jede Suche wieder alle
Seiten laden.
Jetzt:
- Neue Tabelle thumbnail_cache (url PK, image, expires_at)
- Default-TTL 30 Tage, per Env KOCHWAS_THUMB_TTL_DAYS konfigurierbar
(7, 365, was der User will — is alles ok laut Nutzer)
- Negative Cache: Seiten ohne Bild werden mit image=NULL gespeichert,
damit wir nicht jede Suche die gleiche kaputte Seite wieder laden
- Lazy-Cleanup: pro searchWeb-Aufruf werden abgelaufene Zeilen via
DELETE ... WHERE expires_at <= now() weggeräumt (Index-Scan, billig)
Migration 003_thumbnail_cache.sql: nicht-destruktiv, nur neue Tabelle.
Bestehende DB bekommt sie beim nächsten Start automatisch dazu.
Tests (99/99):
- Neuer Cache-Test: zweiter searchWeb für dieselbe URL macht keinen
Page-Fetch mehr und liest die image-Spalte aus SQLite.
2026-04-17 18:34:29 +02:00
|
|
|
});
|
|
|
|
|
await new Promise<void>((r) => pageServer.listen(0, '127.0.0.1', r));
|
|
|
|
|
const addr = pageServer.address() as AddressInfo;
|
|
|
|
|
const pageUrl = `http://127.0.0.1:${addr.port}/cached`;
|
|
|
|
|
try {
|
|
|
|
|
const db = openInMemoryForTest();
|
|
|
|
|
addDomain(db, '127.0.0.1');
|
|
|
|
|
respondWith([{ url: pageUrl, title: 'C', content: '' }]);
|
|
|
|
|
const first = await searchWeb(db, 'c', { searxngUrl: baseUrl });
|
|
|
|
|
const second = await searchWeb(db, 'c', { searxngUrl: baseUrl });
|
|
|
|
|
expect(first[0].thumbnail).toBe('https://cdn.example/c.jpg');
|
|
|
|
|
expect(second[0].thumbnail).toBe('https://cdn.example/c.jpg');
|
|
|
|
|
expect(pageHits).toBe(1); // second call read from SQLite cache
|
|
|
|
|
const row = db
|
|
|
|
|
.prepare('SELECT image FROM thumbnail_cache WHERE url = ?')
|
|
|
|
|
.get(pageUrl) as { image: string };
|
|
|
|
|
expect(row.image).toBe('https://cdn.example/c.jpg');
|
|
|
|
|
} finally {
|
|
|
|
|
await new Promise<void>((r) => pageServer.close(() => r()));
|
|
|
|
|
}
|
|
|
|
|
});
|
|
|
|
|
|
2026-04-17 15:47:28 +02:00
|
|
|
it('filters out forum/magazine/listing URLs', async () => {
|
|
|
|
|
const db = openInMemoryForTest();
|
|
|
|
|
addDomain(db, 'chefkoch.de');
|
|
|
|
|
respondWith([
|
|
|
|
|
{ url: 'https://www.chefkoch.de/rezepte/123/Ravioli.html', title: 'Ravioli' },
|
|
|
|
|
{ url: 'https://www.chefkoch.de/forum/2,17,89865/ravioli.html', title: 'Forum Ravioli' },
|
|
|
|
|
{ url: 'https://www.chefkoch.de/magazin/artikel/x.html', title: 'Magazin' },
|
|
|
|
|
{ url: 'https://www.chefkoch.de/suche/ravioli', title: 'Suche' },
|
|
|
|
|
{ url: 'https://www.chefkoch.de/themen/ravioli/', title: 'Themen' },
|
|
|
|
|
{ url: 'https://www.chefkoch.de/rezepte/', title: 'Rezepte Übersicht' }
|
|
|
|
|
]);
|
2026-04-17 17:55:53 +02:00
|
|
|
const hits = await searchWeb(db, 'ravioli', { searxngUrl: baseUrl, enrichThumbnails: false });
|
2026-04-17 15:47:28 +02:00
|
|
|
expect(hits.length).toBe(1);
|
|
|
|
|
expect(hits[0].title).toBe('Ravioli');
|
|
|
|
|
});
|
2026-04-17 15:33:21 +02:00
|
|
|
});
|