feat(recipes): add recipe importer (preview + persist)

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-04-17 15:11:23 +02:00
parent aea07c5eb2
commit 99afc45c29
2 changed files with 187 additions and 0 deletions

View File

@@ -0,0 +1,74 @@
import type Database from 'better-sqlite3';
import type { Recipe } from '$lib/types';
import { fetchText } from '../http';
import { extractRecipeFromHtml } from '../parsers/json-ld-recipe';
import { isDomainAllowed } from '../domains/whitelist';
import { downloadImage } from '../images/image-downloader';
import {
getRecipeById,
getRecipeIdBySourceUrl,
insertRecipe
} from './repository';
export class ImporterError extends Error {
constructor(
public readonly code:
| 'INVALID_URL'
| 'DOMAIN_BLOCKED'
| 'FETCH_FAILED'
| 'NO_RECIPE_FOUND',
message: string
) {
super(message);
this.name = 'ImporterError';
}
}
function hostnameOrThrow(url: string): string {
try {
return new URL(url).hostname.toLowerCase();
} catch {
throw new ImporterError('INVALID_URL', `Not a valid URL: ${url}`);
}
}
export async function previewRecipe(db: Database.Database, url: string): Promise<Recipe> {
const host = hostnameOrThrow(url);
if (!isDomainAllowed(db, url)) {
throw new ImporterError('DOMAIN_BLOCKED', `Domain not allowed: ${host}`);
}
let html: string;
try {
html = await fetchText(url);
} catch (e) {
throw new ImporterError('FETCH_FAILED', (e as Error).message);
}
const recipe = extractRecipeFromHtml(html);
if (!recipe) {
throw new ImporterError('NO_RECIPE_FOUND', 'No schema.org/Recipe JSON-LD on page');
}
recipe.source_url = url;
recipe.source_domain = host.replace(/^www\./, '');
return recipe;
}
export async function importRecipe(
db: Database.Database,
imageDir: string,
url: string
): Promise<{ id: number; duplicate: boolean; recipe: Recipe }> {
const existingId = getRecipeIdBySourceUrl(db, url);
if (existingId !== null) {
const recipe = getRecipeById(db, existingId);
if (recipe) return { id: existingId, duplicate: true, recipe };
}
const recipe = await previewRecipe(db, url);
let imageFilename: string | null = null;
if (recipe.image_path) {
imageFilename = await downloadImage(recipe.image_path, imageDir);
}
recipe.image_path = imageFilename;
const id = insertRecipe(db, recipe);
const persisted = getRecipeById(db, id);
return { id, duplicate: false, recipe: persisted ?? recipe };
}

View File

@@ -0,0 +1,113 @@
import { describe, it, expect, beforeEach, afterEach } from 'vitest';
import { createServer, type Server } from 'node:http';
import type { AddressInfo } from 'node:net';
import { mkdtemp, rm, readFile } from 'node:fs/promises';
import { readFileSync } from 'node:fs';
import { tmpdir } from 'node:os';
import { dirname, join } from 'node:path';
import { fileURLToPath } from 'node:url';
import { openInMemoryForTest } from '../../src/lib/server/db';
import { addDomain } from '../../src/lib/server/domains/repository';
import { importRecipe, previewRecipe, ImporterError } from '../../src/lib/server/recipes/importer';
const here = dirname(fileURLToPath(import.meta.url));
const fixtureHtml = readFileSync(join(here, '../fixtures', 'chefkoch-schupfnudeln.html'), 'utf8');
// 1×1 PNG
const PNG = Buffer.from(
'89504e470d0a1a0a0000000d49484452000000010000000108060000001f15c489' +
'0000000d49444154789c6300010000000500010d0a2db40000000049454e44ae426082',
'hex'
);
let server: Server;
let baseUrl: string;
let imgDir: string;
beforeEach(async () => {
server = createServer((req, res) => {
if (req.url?.startsWith('/recipe')) {
// Return our fixture but swap the image URL to point to our local server
const patched = fixtureHtml.replace(
/https?:\/\/img\.chefkoch-cdn\.de\/[^"']+/g,
`${baseUrl}/image.png`
);
res.writeHead(200, { 'content-type': 'text/html' });
res.end(patched);
return;
}
if (req.url?.startsWith('/image')) {
res.writeHead(200, { 'content-type': 'image/png' });
res.end(PNG);
return;
}
if (req.url === '/bare') {
res.writeHead(200, { 'content-type': 'text/html' });
res.end('<html><body>no recipe</body></html>');
return;
}
res.writeHead(404);
res.end();
});
await new Promise<void>((r) => server.listen(0, '127.0.0.1', r));
const addr = server.address() as AddressInfo;
baseUrl = `http://127.0.0.1:${addr.port}`;
imgDir = await mkdtemp(join(tmpdir(), 'kochwas-imp-'));
});
afterEach(async () => {
await new Promise<void>((r) => server.close(() => r()));
await rm(imgDir, { recursive: true, force: true });
});
describe('previewRecipe', () => {
it('throws DOMAIN_BLOCKED if host not whitelisted', async () => {
const db = openInMemoryForTest();
// note: no domain added
await expect(previewRecipe(db, `${baseUrl}/recipe`)).rejects.toMatchObject({
code: 'DOMAIN_BLOCKED'
});
});
it('returns parsed recipe for whitelisted domain', async () => {
const db = openInMemoryForTest();
addDomain(db, '127.0.0.1');
const r = await previewRecipe(db, `${baseUrl}/recipe`);
expect(r.title.toLowerCase()).toContain('schupfnudel');
expect(r.source_url).toBe(`${baseUrl}/recipe`);
expect(r.ingredients.length).toBeGreaterThan(0);
});
it('throws NO_RECIPE_FOUND when HTML has no Recipe JSON-LD', async () => {
const db = openInMemoryForTest();
addDomain(db, '127.0.0.1');
await expect(previewRecipe(db, `${baseUrl}/bare`)).rejects.toMatchObject({
code: 'NO_RECIPE_FOUND'
});
});
});
describe('importRecipe', () => {
it('imports, persists, and is idempotent', async () => {
const db = openInMemoryForTest();
addDomain(db, '127.0.0.1');
const first = await importRecipe(db, imgDir, `${baseUrl}/recipe`);
expect(first.duplicate).toBe(false);
expect(first.id).toBeGreaterThan(0);
expect(first.recipe.ingredients.length).toBeGreaterThan(0);
expect(first.recipe.image_path).not.toBeNull();
const saved = await readFile(join(imgDir, first.recipe.image_path!));
expect(saved.equals(PNG)).toBe(true);
const second = await importRecipe(db, imgDir, `${baseUrl}/recipe`);
expect(second.duplicate).toBe(true);
expect(second.id).toBe(first.id);
});
it('surfaces ImporterError type', async () => {
const db = openInMemoryForTest();
await expect(importRecipe(db, imgDir, `${baseUrl}/recipe`)).rejects.toBeInstanceOf(
ImporterError
);
});
});