Files
kochwas/tests/integration/searxng.test.ts
hsiegeln 1712263fd1
All checks were successful
Build & Publish Docker Image / build-and-push (push) Successful in 54s
feat(search): HQ-Thumbnails durch immer aktive og:image-Extraktion
Vorher: nur Treffer ohne SearXNG-Thumbnail wurden mit dem Seiten-Bild
angereichert. Treffer mit Thumbnail behielten das kleine 150-200 px-
Bildchen aus dem Such-Engine-Index.

Jetzt: Alle Treffer durchlaufen die Enrichment-Pipeline. Wenn die Seite
ein og:image/JSON-LD/Content-Bild hat (und das hat sie bei Rezept-Seiten
praktisch immer), wird das kleine SearXNG-Thumbnail damit überschrieben.
Wenn die Seite kein Bild liefert, bleibt das SearXNG-Thumbnail als
Fallback erhalten.

Das ist das gleiche Bild, das auch die Vorschau anzeigt — Suchergebnis
und Vorschau sind jetzt visuell konsistent.

Performance: Pro erster Suche bis zu ~6 Sekunden zusätzliche Latenz
(max 6 parallel, je 4 s Timeout). Der 30-min In-Memory-Cache macht
Wiederholsuchen instant.

Tests (98/98):
- Neu: SearXNG-Thumbnail wird durch og:image ersetzt.
- Neu: SearXNG-Thumbnail bleibt erhalten, wenn Seite kein Bild hat.
- Alt ("leaves existing thumbnails untouched") entfernt — Verhalten
  hat sich bewusst umgekehrt.
2026-04-17 18:31:42 +02:00

203 lines
7.9 KiB
TypeScript

import { describe, it, expect, beforeEach, afterEach } from 'vitest';
import { createServer, type Server } from 'node:http';
import type { AddressInfo } from 'node:net';
import { openInMemoryForTest } from '../../src/lib/server/db';
import { addDomain } from '../../src/lib/server/domains/repository';
import { searchWeb } from '../../src/lib/server/search/searxng';
let server: Server;
let baseUrl: string;
function respondWith(results: Record<string, unknown>[]) {
server.on('request', (_req, res) => {
res.writeHead(200, { 'content-type': 'application/json' });
res.end(JSON.stringify({ results }));
});
}
beforeEach(async () => {
server = createServer();
await new Promise<void>((r) => server.listen(0, '127.0.0.1', r));
const addr = server.address() as AddressInfo;
baseUrl = `http://127.0.0.1:${addr.port}`;
});
afterEach(async () => {
await new Promise<void>((r) => server.close(() => r()));
});
describe('searchWeb', () => {
it('filters results by whitelist', async () => {
const db = openInMemoryForTest();
addDomain(db, 'chefkoch.de');
respondWith([
{
url: 'https://www.chefkoch.de/rezepte/123/a.html',
title: 'Carbonara',
content: 'Pasta'
},
{
url: 'https://fake.de/x',
title: 'Not allowed',
content: 'blocked'
}
]);
const hits = await searchWeb(db, 'carbonara', { searxngUrl: baseUrl, enrichThumbnails: false });
expect(hits.length).toBe(1);
expect(hits[0].domain).toBe('chefkoch.de');
expect(hits[0].title).toBe('Carbonara');
});
it('dedupes identical URLs', async () => {
const db = openInMemoryForTest();
addDomain(db, 'chefkoch.de');
respondWith([
{ url: 'https://www.chefkoch.de/a', title: 'A', content: '' },
{ url: 'https://www.chefkoch.de/a', title: 'A dup', content: '' }
]);
const hits = await searchWeb(db, 'a', { searxngUrl: baseUrl, enrichThumbnails: false });
expect(hits.length).toBe(1);
});
it('returns empty list when no domains configured', async () => {
const db = openInMemoryForTest();
const hits = await searchWeb(db, 'x', { searxngUrl: baseUrl, enrichThumbnails: false });
expect(hits).toEqual([]);
});
it('returns empty for empty query', async () => {
const db = openInMemoryForTest();
addDomain(db, 'chefkoch.de');
const hits = await searchWeb(db, ' ', { searxngUrl: baseUrl, enrichThumbnails: false });
expect(hits).toEqual([]);
});
it('enriches missing thumbnails from og:image', async () => {
const pageServer = createServer((_req, res) => {
res.writeHead(200, { 'content-type': 'text/html; charset=utf-8' });
res.end(
'<html><head><meta property="og:image" content="https://cdn.example/foo.jpg" /></head><body></body></html>'
);
});
await new Promise<void>((r) => pageServer.listen(0, '127.0.0.1', r));
const addr = pageServer.address() as AddressInfo;
const pageUrl = `http://127.0.0.1:${addr.port}/rezept`;
try {
const db = openInMemoryForTest();
addDomain(db, '127.0.0.1');
respondWith([{ url: pageUrl, title: 'Kuchen', content: '' }]);
const hits = await searchWeb(db, 'kuchen', { searxngUrl: baseUrl });
expect(hits.length).toBe(1);
expect(hits[0].thumbnail).toBe('https://cdn.example/foo.jpg');
} finally {
await new Promise<void>((r) => pageServer.close(() => r()));
}
});
it('falls back to JSON-LD image when no og:image', async () => {
const pageServer = createServer((_req, res) => {
res.writeHead(200, { 'content-type': 'text/html; charset=utf-8' });
res.end(`<html><head>
<script type="application/ld+json">${JSON.stringify({
'@type': 'Recipe',
name: 'Pie',
image: 'https://cdn.example/pie.jpg'
})}</script>
</head><body></body></html>`);
});
await new Promise<void>((r) => pageServer.listen(0, '127.0.0.1', r));
const addr = pageServer.address() as AddressInfo;
const pageUrl = `http://127.0.0.1:${addr.port}/pie`;
try {
const db = openInMemoryForTest();
addDomain(db, '127.0.0.1');
respondWith([{ url: pageUrl, title: 'Pie', content: '' }]);
const hits = await searchWeb(db, 'pie', { searxngUrl: baseUrl });
expect(hits[0].thumbnail).toBe('https://cdn.example/pie.jpg');
} finally {
await new Promise<void>((r) => pageServer.close(() => r()));
}
});
it('falls back to first content image when no meta/JSON-LD image', async () => {
const pageServer = createServer((_req, res) => {
res.writeHead(200, { 'content-type': 'text/html; charset=utf-8' });
res.end(
'<html><body><article><img src="/uploads/dish.jpg" alt=""></article></body></html>'
);
});
await new Promise<void>((r) => pageServer.listen(0, '127.0.0.1', r));
const addr = pageServer.address() as AddressInfo;
const pageUrl = `http://127.0.0.1:${addr.port}/article`;
try {
const db = openInMemoryForTest();
addDomain(db, '127.0.0.1');
respondWith([{ url: pageUrl, title: 'Dish', content: '' }]);
const hits = await searchWeb(db, 'dish', { searxngUrl: baseUrl });
expect(hits[0].thumbnail).toBe(`http://127.0.0.1:${addr.port}/uploads/dish.jpg`);
} finally {
await new Promise<void>((r) => pageServer.close(() => r()));
}
});
it('upgrades low-res SearXNG thumbnail with HQ og:image from page', async () => {
const pageServer = createServer((_req, res) => {
res.writeHead(200, { 'content-type': 'text/html; charset=utf-8' });
res.end(
'<html><head><meta property="og:image" content="https://cdn.example/hq.jpg" /></head></html>'
);
});
await new Promise<void>((r) => pageServer.listen(0, '127.0.0.1', r));
const addr = pageServer.address() as AddressInfo;
const pageUrl = `http://127.0.0.1:${addr.port}/dish`;
try {
const db = openInMemoryForTest();
addDomain(db, '127.0.0.1');
respondWith([
{ url: pageUrl, title: 'Dish', thumbnail: 'https://searxng-cdn/small-thumb.jpg' }
]);
const hits = await searchWeb(db, 'dish', { searxngUrl: baseUrl });
expect(hits[0].thumbnail).toBe('https://cdn.example/hq.jpg');
} finally {
await new Promise<void>((r) => pageServer.close(() => r()));
}
});
it('keeps SearXNG thumbnail when page has no image', async () => {
const pageServer = createServer((_req, res) => {
res.writeHead(200, { 'content-type': 'text/html; charset=utf-8' });
res.end('<html><head></head><body>no images here</body></html>');
});
await new Promise<void>((r) => pageServer.listen(0, '127.0.0.1', r));
const addr = pageServer.address() as AddressInfo;
const pageUrl = `http://127.0.0.1:${addr.port}/noimg`;
try {
const db = openInMemoryForTest();
addDomain(db, '127.0.0.1');
respondWith([
{ url: pageUrl, title: 'X', thumbnail: 'https://searxng-cdn/fallback.jpg' }
]);
const hits = await searchWeb(db, 'x', { searxngUrl: baseUrl });
expect(hits[0].thumbnail).toBe('https://searxng-cdn/fallback.jpg');
} finally {
await new Promise<void>((r) => pageServer.close(() => r()));
}
});
it('filters out forum/magazine/listing URLs', async () => {
const db = openInMemoryForTest();
addDomain(db, 'chefkoch.de');
respondWith([
{ url: 'https://www.chefkoch.de/rezepte/123/Ravioli.html', title: 'Ravioli' },
{ url: 'https://www.chefkoch.de/forum/2,17,89865/ravioli.html', title: 'Forum Ravioli' },
{ url: 'https://www.chefkoch.de/magazin/artikel/x.html', title: 'Magazin' },
{ url: 'https://www.chefkoch.de/suche/ravioli', title: 'Suche' },
{ url: 'https://www.chefkoch.de/themen/ravioli/', title: 'Themen' },
{ url: 'https://www.chefkoch.de/rezepte/', title: 'Rezepte Übersicht' }
]);
const hits = await searchWeb(db, 'ravioli', { searxngUrl: baseUrl, enrichThumbnails: false });
expect(hits.length).toBe(1);
expect(hits[0].title).toBe('Ravioli');
});
});