fix(search): filter forum/magazin/listing URLs from web search results
Blocks common non-recipe paths like /forum/, /magazin/, /suche/, /themen/, Chefkoch's /rs/s\d+/ search URLs and /Rezepte.html listings. Before: 'ravioli' search returned forum threads and listing pages that triggered 'No schema.org/Recipe JSON-LD' on preview. After: only real recipe URLs pass through. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -71,4 +71,20 @@ describe('searchWeb', () => {
|
||||
const hits = await searchWeb(db, ' ', { searxngUrl: baseUrl });
|
||||
expect(hits).toEqual([]);
|
||||
});
|
||||
|
||||
it('filters out forum/magazine/listing URLs', async () => {
|
||||
const db = openInMemoryForTest();
|
||||
addDomain(db, 'chefkoch.de');
|
||||
respondWith([
|
||||
{ url: 'https://www.chefkoch.de/rezepte/123/Ravioli.html', title: 'Ravioli' },
|
||||
{ url: 'https://www.chefkoch.de/forum/2,17,89865/ravioli.html', title: 'Forum Ravioli' },
|
||||
{ url: 'https://www.chefkoch.de/magazin/artikel/x.html', title: 'Magazin' },
|
||||
{ url: 'https://www.chefkoch.de/suche/ravioli', title: 'Suche' },
|
||||
{ url: 'https://www.chefkoch.de/themen/ravioli/', title: 'Themen' },
|
||||
{ url: 'https://www.chefkoch.de/rezepte/', title: 'Rezepte Übersicht' }
|
||||
]);
|
||||
const hits = await searchWeb(db, 'ravioli', { searxngUrl: baseUrl });
|
||||
expect(hits.length).toBe(1);
|
||||
expect(hits[0].title).toBe('Ravioli');
|
||||
});
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user