fix(search): filter forum/magazin/listing URLs from web search results

Blocks common non-recipe paths like /forum/, /magazin/, /suche/, /themen/,
Chefkoch's /rs/s\d+/ search URLs and /Rezepte.html listings.

Before: 'ravioli' search returned forum threads and listing pages that
triggered 'No schema.org/Recipe JSON-LD' on preview.
After: only real recipe URLs pass through.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-04-17 15:47:28 +02:00
parent bce9e87095
commit 041ef12582
2 changed files with 64 additions and 0 deletions

View File

@@ -71,4 +71,20 @@ describe('searchWeb', () => {
const hits = await searchWeb(db, ' ', { searxngUrl: baseUrl });
expect(hits).toEqual([]);
});
it('filters out forum/magazine/listing URLs', async () => {
const db = openInMemoryForTest();
addDomain(db, 'chefkoch.de');
respondWith([
{ url: 'https://www.chefkoch.de/rezepte/123/Ravioli.html', title: 'Ravioli' },
{ url: 'https://www.chefkoch.de/forum/2,17,89865/ravioli.html', title: 'Forum Ravioli' },
{ url: 'https://www.chefkoch.de/magazin/artikel/x.html', title: 'Magazin' },
{ url: 'https://www.chefkoch.de/suche/ravioli', title: 'Suche' },
{ url: 'https://www.chefkoch.de/themen/ravioli/', title: 'Themen' },
{ url: 'https://www.chefkoch.de/rezepte/', title: 'Rezepte Übersicht' }
]);
const hits = await searchWeb(db, 'ravioli', { searxngUrl: baseUrl });
expect(hits.length).toBe(1);
expect(hits[0].title).toBe('Ravioli');
});
});