From 99afc45c2961ea2d1f866d300355471b0029b8e6 Mon Sep 17 00:00:00 2001 From: Hendrik Date: Fri, 17 Apr 2026 15:11:23 +0200 Subject: [PATCH] feat(recipes): add recipe importer (preview + persist) Co-Authored-By: Claude Opus 4.7 (1M context) --- src/lib/server/recipes/importer.ts | 74 +++++++++++++++++++ tests/integration/importer.test.ts | 113 +++++++++++++++++++++++++++++ 2 files changed, 187 insertions(+) create mode 100644 src/lib/server/recipes/importer.ts create mode 100644 tests/integration/importer.test.ts diff --git a/src/lib/server/recipes/importer.ts b/src/lib/server/recipes/importer.ts new file mode 100644 index 0000000..711db94 --- /dev/null +++ b/src/lib/server/recipes/importer.ts @@ -0,0 +1,74 @@ +import type Database from 'better-sqlite3'; +import type { Recipe } from '$lib/types'; +import { fetchText } from '../http'; +import { extractRecipeFromHtml } from '../parsers/json-ld-recipe'; +import { isDomainAllowed } from '../domains/whitelist'; +import { downloadImage } from '../images/image-downloader'; +import { + getRecipeById, + getRecipeIdBySourceUrl, + insertRecipe +} from './repository'; + +export class ImporterError extends Error { + constructor( + public readonly code: + | 'INVALID_URL' + | 'DOMAIN_BLOCKED' + | 'FETCH_FAILED' + | 'NO_RECIPE_FOUND', + message: string + ) { + super(message); + this.name = 'ImporterError'; + } +} + +function hostnameOrThrow(url: string): string { + try { + return new URL(url).hostname.toLowerCase(); + } catch { + throw new ImporterError('INVALID_URL', `Not a valid URL: ${url}`); + } +} + +export async function previewRecipe(db: Database.Database, url: string): Promise { + const host = hostnameOrThrow(url); + if (!isDomainAllowed(db, url)) { + throw new ImporterError('DOMAIN_BLOCKED', `Domain not allowed: ${host}`); + } + let html: string; + try { + html = await fetchText(url); + } catch (e) { + throw new ImporterError('FETCH_FAILED', (e as Error).message); + } + const recipe = extractRecipeFromHtml(html); + if (!recipe) { + throw new ImporterError('NO_RECIPE_FOUND', 'No schema.org/Recipe JSON-LD on page'); + } + recipe.source_url = url; + recipe.source_domain = host.replace(/^www\./, ''); + return recipe; +} + +export async function importRecipe( + db: Database.Database, + imageDir: string, + url: string +): Promise<{ id: number; duplicate: boolean; recipe: Recipe }> { + const existingId = getRecipeIdBySourceUrl(db, url); + if (existingId !== null) { + const recipe = getRecipeById(db, existingId); + if (recipe) return { id: existingId, duplicate: true, recipe }; + } + const recipe = await previewRecipe(db, url); + let imageFilename: string | null = null; + if (recipe.image_path) { + imageFilename = await downloadImage(recipe.image_path, imageDir); + } + recipe.image_path = imageFilename; + const id = insertRecipe(db, recipe); + const persisted = getRecipeById(db, id); + return { id, duplicate: false, recipe: persisted ?? recipe }; +} diff --git a/tests/integration/importer.test.ts b/tests/integration/importer.test.ts new file mode 100644 index 0000000..aab6894 --- /dev/null +++ b/tests/integration/importer.test.ts @@ -0,0 +1,113 @@ +import { describe, it, expect, beforeEach, afterEach } from 'vitest'; +import { createServer, type Server } from 'node:http'; +import type { AddressInfo } from 'node:net'; +import { mkdtemp, rm, readFile } from 'node:fs/promises'; +import { readFileSync } from 'node:fs'; +import { tmpdir } from 'node:os'; +import { dirname, join } from 'node:path'; +import { fileURLToPath } from 'node:url'; +import { openInMemoryForTest } from '../../src/lib/server/db'; +import { addDomain } from '../../src/lib/server/domains/repository'; +import { importRecipe, previewRecipe, ImporterError } from '../../src/lib/server/recipes/importer'; + +const here = dirname(fileURLToPath(import.meta.url)); +const fixtureHtml = readFileSync(join(here, '../fixtures', 'chefkoch-schupfnudeln.html'), 'utf8'); + +// 1×1 PNG +const PNG = Buffer.from( + '89504e470d0a1a0a0000000d49484452000000010000000108060000001f15c489' + + '0000000d49444154789c6300010000000500010d0a2db40000000049454e44ae426082', + 'hex' +); + +let server: Server; +let baseUrl: string; +let imgDir: string; + +beforeEach(async () => { + server = createServer((req, res) => { + if (req.url?.startsWith('/recipe')) { + // Return our fixture but swap the image URL to point to our local server + const patched = fixtureHtml.replace( + /https?:\/\/img\.chefkoch-cdn\.de\/[^"']+/g, + `${baseUrl}/image.png` + ); + res.writeHead(200, { 'content-type': 'text/html' }); + res.end(patched); + return; + } + if (req.url?.startsWith('/image')) { + res.writeHead(200, { 'content-type': 'image/png' }); + res.end(PNG); + return; + } + if (req.url === '/bare') { + res.writeHead(200, { 'content-type': 'text/html' }); + res.end('no recipe'); + return; + } + res.writeHead(404); + res.end(); + }); + await new Promise((r) => server.listen(0, '127.0.0.1', r)); + const addr = server.address() as AddressInfo; + baseUrl = `http://127.0.0.1:${addr.port}`; + imgDir = await mkdtemp(join(tmpdir(), 'kochwas-imp-')); +}); + +afterEach(async () => { + await new Promise((r) => server.close(() => r())); + await rm(imgDir, { recursive: true, force: true }); +}); + +describe('previewRecipe', () => { + it('throws DOMAIN_BLOCKED if host not whitelisted', async () => { + const db = openInMemoryForTest(); + // note: no domain added + await expect(previewRecipe(db, `${baseUrl}/recipe`)).rejects.toMatchObject({ + code: 'DOMAIN_BLOCKED' + }); + }); + + it('returns parsed recipe for whitelisted domain', async () => { + const db = openInMemoryForTest(); + addDomain(db, '127.0.0.1'); + const r = await previewRecipe(db, `${baseUrl}/recipe`); + expect(r.title.toLowerCase()).toContain('schupfnudel'); + expect(r.source_url).toBe(`${baseUrl}/recipe`); + expect(r.ingredients.length).toBeGreaterThan(0); + }); + + it('throws NO_RECIPE_FOUND when HTML has no Recipe JSON-LD', async () => { + const db = openInMemoryForTest(); + addDomain(db, '127.0.0.1'); + await expect(previewRecipe(db, `${baseUrl}/bare`)).rejects.toMatchObject({ + code: 'NO_RECIPE_FOUND' + }); + }); +}); + +describe('importRecipe', () => { + it('imports, persists, and is idempotent', async () => { + const db = openInMemoryForTest(); + addDomain(db, '127.0.0.1'); + const first = await importRecipe(db, imgDir, `${baseUrl}/recipe`); + expect(first.duplicate).toBe(false); + expect(first.id).toBeGreaterThan(0); + expect(first.recipe.ingredients.length).toBeGreaterThan(0); + expect(first.recipe.image_path).not.toBeNull(); + const saved = await readFile(join(imgDir, first.recipe.image_path!)); + expect(saved.equals(PNG)).toBe(true); + + const second = await importRecipe(db, imgDir, `${baseUrl}/recipe`); + expect(second.duplicate).toBe(true); + expect(second.id).toBe(first.id); + }); + + it('surfaces ImporterError type', async () => { + const db = openInMemoryForTest(); + await expect(importRecipe(db, imgDir, `${baseUrl}/recipe`)).rejects.toBeInstanceOf( + ImporterError + ); + }); +});