From 2807dd1cab4eccfdb41fb3da0256c128ec22d144 Mon Sep 17 00:00:00 2001 From: hsiegeln <37154749+hsiegeln@users.noreply.github.com> Date: Sat, 18 Apr 2026 15:18:46 +0200 Subject: [PATCH] feat(import): manuelle URL-Importe von allen Domains zulassen MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Der User pastet bewusst eine URL und erwartet, dass der Import klappt — die Whitelist-Prüfung (DOMAIN_BLOCKED) im previewRecipe war da nur Reibung. Die Whitelist bleibt für die Web-Suche relevant (dort muss das Crawl-Feld eingeschränkt werden), für Imports nicht mehr. Dropped: isDomainAllowed + whitelist.ts, DOMAIN_BLOCKED-Code in ImporterError, die zugehörige Branch in mapImporterError. Tests entsprechend angepasst: statt "DOMAIN_BLOCKED wenn nicht whitelisted" prüft der Preview-Test jetzt "klappt auch ohne Whitelist-Eintrag". Co-Authored-By: Claude Opus 4.7 (1M context) --- src/lib/server/domains/whitelist.ts | 16 ---------------- src/lib/server/errors.ts | 6 ++---- src/lib/server/recipes/importer.ts | 11 +++++------ tests/integration/importer.test.ts | 26 +++++++++++--------------- tests/integration/whitelist.test.ts | 13 ++----------- 5 files changed, 20 insertions(+), 52 deletions(-) delete mode 100644 src/lib/server/domains/whitelist.ts diff --git a/src/lib/server/domains/whitelist.ts b/src/lib/server/domains/whitelist.ts deleted file mode 100644 index 7db9746..0000000 --- a/src/lib/server/domains/whitelist.ts +++ /dev/null @@ -1,16 +0,0 @@ -import type Database from 'better-sqlite3'; -import { normalizeDomain } from './repository'; - -export function isDomainAllowed(db: Database.Database, urlString: string): boolean { - let host: string; - try { - host = new URL(urlString).hostname; - } catch { - return false; - } - const normalized = normalizeDomain(host); - const row = db - .prepare('SELECT 1 AS ok FROM allowed_domain WHERE domain = ? LIMIT 1') - .get(normalized); - return row !== undefined; -} diff --git a/src/lib/server/errors.ts b/src/lib/server/errors.ts index 2cfecea..a489db7 100644 --- a/src/lib/server/errors.ts +++ b/src/lib/server/errors.ts @@ -4,10 +4,8 @@ import { ImporterError } from './recipes/importer'; export function mapImporterError(e: unknown): never { if (e instanceof ImporterError) { const status = - e.code === 'INVALID_URL' || e.code === 'DOMAIN_BLOCKED' - ? e.code === 'DOMAIN_BLOCKED' - ? 403 - : 400 + e.code === 'INVALID_URL' + ? 400 : e.code === 'NO_RECIPE_FOUND' ? 422 : 502; // FETCH_FAILED diff --git a/src/lib/server/recipes/importer.ts b/src/lib/server/recipes/importer.ts index 711db94..4ab62f0 100644 --- a/src/lib/server/recipes/importer.ts +++ b/src/lib/server/recipes/importer.ts @@ -2,7 +2,6 @@ import type Database from 'better-sqlite3'; import type { Recipe } from '$lib/types'; import { fetchText } from '../http'; import { extractRecipeFromHtml } from '../parsers/json-ld-recipe'; -import { isDomainAllowed } from '../domains/whitelist'; import { downloadImage } from '../images/image-downloader'; import { getRecipeById, @@ -14,7 +13,6 @@ export class ImporterError extends Error { constructor( public readonly code: | 'INVALID_URL' - | 'DOMAIN_BLOCKED' | 'FETCH_FAILED' | 'NO_RECIPE_FOUND', message: string @@ -32,11 +30,12 @@ function hostnameOrThrow(url: string): string { } } -export async function previewRecipe(db: Database.Database, url: string): Promise { +// Manuelle URL-Importe sind absichtlich NICHT mehr auf die allowed_domain- +// Whitelist beschränkt — der User pastet bewusst eine URL und erwartet, +// dass der Import klappt. Die Whitelist bleibt für die Web-Suche (searxng) +// relevant, weil dort ein breites Crawl-Feld eingeschränkt werden soll. +export async function previewRecipe(_db: Database.Database, url: string): Promise { const host = hostnameOrThrow(url); - if (!isDomainAllowed(db, url)) { - throw new ImporterError('DOMAIN_BLOCKED', `Domain not allowed: ${host}`); - } let html: string; try { html = await fetchText(url); diff --git a/tests/integration/importer.test.ts b/tests/integration/importer.test.ts index aab6894..35700f8 100644 --- a/tests/integration/importer.test.ts +++ b/tests/integration/importer.test.ts @@ -7,7 +7,6 @@ import { tmpdir } from 'node:os'; import { dirname, join } from 'node:path'; import { fileURLToPath } from 'node:url'; import { openInMemoryForTest } from '../../src/lib/server/db'; -import { addDomain } from '../../src/lib/server/domains/repository'; import { importRecipe, previewRecipe, ImporterError } from '../../src/lib/server/recipes/importer'; const here = dirname(fileURLToPath(import.meta.url)); @@ -61,17 +60,9 @@ afterEach(async () => { }); describe('previewRecipe', () => { - it('throws DOMAIN_BLOCKED if host not whitelisted', async () => { + it('accepts any domain — manuelle URL-Importe sind nicht auf die Whitelist beschränkt', async () => { const db = openInMemoryForTest(); - // note: no domain added - await expect(previewRecipe(db, `${baseUrl}/recipe`)).rejects.toMatchObject({ - code: 'DOMAIN_BLOCKED' - }); - }); - - it('returns parsed recipe for whitelisted domain', async () => { - const db = openInMemoryForTest(); - addDomain(db, '127.0.0.1'); + // keine Domain in der Whitelist — preview muss trotzdem klappen const r = await previewRecipe(db, `${baseUrl}/recipe`); expect(r.title.toLowerCase()).toContain('schupfnudel'); expect(r.source_url).toBe(`${baseUrl}/recipe`); @@ -80,17 +71,22 @@ describe('previewRecipe', () => { it('throws NO_RECIPE_FOUND when HTML has no Recipe JSON-LD', async () => { const db = openInMemoryForTest(); - addDomain(db, '127.0.0.1'); await expect(previewRecipe(db, `${baseUrl}/bare`)).rejects.toMatchObject({ code: 'NO_RECIPE_FOUND' }); }); + + it('throws INVALID_URL for malformed input', async () => { + const db = openInMemoryForTest(); + await expect(previewRecipe(db, 'not a url')).rejects.toMatchObject({ + code: 'INVALID_URL' + }); + }); }); describe('importRecipe', () => { it('imports, persists, and is idempotent', async () => { const db = openInMemoryForTest(); - addDomain(db, '127.0.0.1'); const first = await importRecipe(db, imgDir, `${baseUrl}/recipe`); expect(first.duplicate).toBe(false); expect(first.id).toBeGreaterThan(0); @@ -104,9 +100,9 @@ describe('importRecipe', () => { expect(second.id).toBe(first.id); }); - it('surfaces ImporterError type', async () => { + it('surfaces ImporterError type when no recipe on page', async () => { const db = openInMemoryForTest(); - await expect(importRecipe(db, imgDir, `${baseUrl}/recipe`)).rejects.toBeInstanceOf( + await expect(importRecipe(db, imgDir, `${baseUrl}/bare`)).rejects.toBeInstanceOf( ImporterError ); }); diff --git a/tests/integration/whitelist.test.ts b/tests/integration/whitelist.test.ts index 0ed1db6..2d54ffa 100644 --- a/tests/integration/whitelist.test.ts +++ b/tests/integration/whitelist.test.ts @@ -8,7 +8,6 @@ import { updateDomain, getDomainById } from '../../src/lib/server/domains/repository'; -import { isDomainAllowed } from '../../src/lib/server/domains/whitelist'; describe('allowed domains', () => { it('round-trips domains', () => { @@ -19,18 +18,10 @@ describe('allowed domains', () => { expect(all.map((d) => d.domain).sort()).toEqual(['chefkoch.de', 'emmikochteinfach.de']); }); - it('normalizes www. and case', () => { + it('normalizes www. and case via addDomain', () => { const db = openInMemoryForTest(); addDomain(db, 'WWW.Chefkoch.DE'); - expect(isDomainAllowed(db, 'https://chefkoch.de/abc')).toBe(true); - expect(isDomainAllowed(db, 'https://www.chefkoch.de/abc')).toBe(true); - expect(isDomainAllowed(db, 'https://fake.de/abc')).toBe(false); - }); - - it('rejects invalid urls', () => { - const db = openInMemoryForTest(); - addDomain(db, 'chefkoch.de'); - expect(isDomainAllowed(db, 'not a url')).toBe(false); + expect(listDomains(db)[0].domain).toBe('chefkoch.de'); }); it('removes domains', () => {