feat(recipes): add recipe importer (preview + persist)
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
74
src/lib/server/recipes/importer.ts
Normal file
74
src/lib/server/recipes/importer.ts
Normal file
@@ -0,0 +1,74 @@
|
|||||||
|
import type Database from 'better-sqlite3';
|
||||||
|
import type { Recipe } from '$lib/types';
|
||||||
|
import { fetchText } from '../http';
|
||||||
|
import { extractRecipeFromHtml } from '../parsers/json-ld-recipe';
|
||||||
|
import { isDomainAllowed } from '../domains/whitelist';
|
||||||
|
import { downloadImage } from '../images/image-downloader';
|
||||||
|
import {
|
||||||
|
getRecipeById,
|
||||||
|
getRecipeIdBySourceUrl,
|
||||||
|
insertRecipe
|
||||||
|
} from './repository';
|
||||||
|
|
||||||
|
export class ImporterError extends Error {
|
||||||
|
constructor(
|
||||||
|
public readonly code:
|
||||||
|
| 'INVALID_URL'
|
||||||
|
| 'DOMAIN_BLOCKED'
|
||||||
|
| 'FETCH_FAILED'
|
||||||
|
| 'NO_RECIPE_FOUND',
|
||||||
|
message: string
|
||||||
|
) {
|
||||||
|
super(message);
|
||||||
|
this.name = 'ImporterError';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function hostnameOrThrow(url: string): string {
|
||||||
|
try {
|
||||||
|
return new URL(url).hostname.toLowerCase();
|
||||||
|
} catch {
|
||||||
|
throw new ImporterError('INVALID_URL', `Not a valid URL: ${url}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function previewRecipe(db: Database.Database, url: string): Promise<Recipe> {
|
||||||
|
const host = hostnameOrThrow(url);
|
||||||
|
if (!isDomainAllowed(db, url)) {
|
||||||
|
throw new ImporterError('DOMAIN_BLOCKED', `Domain not allowed: ${host}`);
|
||||||
|
}
|
||||||
|
let html: string;
|
||||||
|
try {
|
||||||
|
html = await fetchText(url);
|
||||||
|
} catch (e) {
|
||||||
|
throw new ImporterError('FETCH_FAILED', (e as Error).message);
|
||||||
|
}
|
||||||
|
const recipe = extractRecipeFromHtml(html);
|
||||||
|
if (!recipe) {
|
||||||
|
throw new ImporterError('NO_RECIPE_FOUND', 'No schema.org/Recipe JSON-LD on page');
|
||||||
|
}
|
||||||
|
recipe.source_url = url;
|
||||||
|
recipe.source_domain = host.replace(/^www\./, '');
|
||||||
|
return recipe;
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function importRecipe(
|
||||||
|
db: Database.Database,
|
||||||
|
imageDir: string,
|
||||||
|
url: string
|
||||||
|
): Promise<{ id: number; duplicate: boolean; recipe: Recipe }> {
|
||||||
|
const existingId = getRecipeIdBySourceUrl(db, url);
|
||||||
|
if (existingId !== null) {
|
||||||
|
const recipe = getRecipeById(db, existingId);
|
||||||
|
if (recipe) return { id: existingId, duplicate: true, recipe };
|
||||||
|
}
|
||||||
|
const recipe = await previewRecipe(db, url);
|
||||||
|
let imageFilename: string | null = null;
|
||||||
|
if (recipe.image_path) {
|
||||||
|
imageFilename = await downloadImage(recipe.image_path, imageDir);
|
||||||
|
}
|
||||||
|
recipe.image_path = imageFilename;
|
||||||
|
const id = insertRecipe(db, recipe);
|
||||||
|
const persisted = getRecipeById(db, id);
|
||||||
|
return { id, duplicate: false, recipe: persisted ?? recipe };
|
||||||
|
}
|
||||||
113
tests/integration/importer.test.ts
Normal file
113
tests/integration/importer.test.ts
Normal file
@@ -0,0 +1,113 @@
|
|||||||
|
import { describe, it, expect, beforeEach, afterEach } from 'vitest';
|
||||||
|
import { createServer, type Server } from 'node:http';
|
||||||
|
import type { AddressInfo } from 'node:net';
|
||||||
|
import { mkdtemp, rm, readFile } from 'node:fs/promises';
|
||||||
|
import { readFileSync } from 'node:fs';
|
||||||
|
import { tmpdir } from 'node:os';
|
||||||
|
import { dirname, join } from 'node:path';
|
||||||
|
import { fileURLToPath } from 'node:url';
|
||||||
|
import { openInMemoryForTest } from '../../src/lib/server/db';
|
||||||
|
import { addDomain } from '../../src/lib/server/domains/repository';
|
||||||
|
import { importRecipe, previewRecipe, ImporterError } from '../../src/lib/server/recipes/importer';
|
||||||
|
|
||||||
|
const here = dirname(fileURLToPath(import.meta.url));
|
||||||
|
const fixtureHtml = readFileSync(join(here, '../fixtures', 'chefkoch-schupfnudeln.html'), 'utf8');
|
||||||
|
|
||||||
|
// 1×1 PNG
|
||||||
|
const PNG = Buffer.from(
|
||||||
|
'89504e470d0a1a0a0000000d49484452000000010000000108060000001f15c489' +
|
||||||
|
'0000000d49444154789c6300010000000500010d0a2db40000000049454e44ae426082',
|
||||||
|
'hex'
|
||||||
|
);
|
||||||
|
|
||||||
|
let server: Server;
|
||||||
|
let baseUrl: string;
|
||||||
|
let imgDir: string;
|
||||||
|
|
||||||
|
beforeEach(async () => {
|
||||||
|
server = createServer((req, res) => {
|
||||||
|
if (req.url?.startsWith('/recipe')) {
|
||||||
|
// Return our fixture but swap the image URL to point to our local server
|
||||||
|
const patched = fixtureHtml.replace(
|
||||||
|
/https?:\/\/img\.chefkoch-cdn\.de\/[^"']+/g,
|
||||||
|
`${baseUrl}/image.png`
|
||||||
|
);
|
||||||
|
res.writeHead(200, { 'content-type': 'text/html' });
|
||||||
|
res.end(patched);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (req.url?.startsWith('/image')) {
|
||||||
|
res.writeHead(200, { 'content-type': 'image/png' });
|
||||||
|
res.end(PNG);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (req.url === '/bare') {
|
||||||
|
res.writeHead(200, { 'content-type': 'text/html' });
|
||||||
|
res.end('<html><body>no recipe</body></html>');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
res.writeHead(404);
|
||||||
|
res.end();
|
||||||
|
});
|
||||||
|
await new Promise<void>((r) => server.listen(0, '127.0.0.1', r));
|
||||||
|
const addr = server.address() as AddressInfo;
|
||||||
|
baseUrl = `http://127.0.0.1:${addr.port}`;
|
||||||
|
imgDir = await mkdtemp(join(tmpdir(), 'kochwas-imp-'));
|
||||||
|
});
|
||||||
|
|
||||||
|
afterEach(async () => {
|
||||||
|
await new Promise<void>((r) => server.close(() => r()));
|
||||||
|
await rm(imgDir, { recursive: true, force: true });
|
||||||
|
});
|
||||||
|
|
||||||
|
describe('previewRecipe', () => {
|
||||||
|
it('throws DOMAIN_BLOCKED if host not whitelisted', async () => {
|
||||||
|
const db = openInMemoryForTest();
|
||||||
|
// note: no domain added
|
||||||
|
await expect(previewRecipe(db, `${baseUrl}/recipe`)).rejects.toMatchObject({
|
||||||
|
code: 'DOMAIN_BLOCKED'
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
it('returns parsed recipe for whitelisted domain', async () => {
|
||||||
|
const db = openInMemoryForTest();
|
||||||
|
addDomain(db, '127.0.0.1');
|
||||||
|
const r = await previewRecipe(db, `${baseUrl}/recipe`);
|
||||||
|
expect(r.title.toLowerCase()).toContain('schupfnudel');
|
||||||
|
expect(r.source_url).toBe(`${baseUrl}/recipe`);
|
||||||
|
expect(r.ingredients.length).toBeGreaterThan(0);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('throws NO_RECIPE_FOUND when HTML has no Recipe JSON-LD', async () => {
|
||||||
|
const db = openInMemoryForTest();
|
||||||
|
addDomain(db, '127.0.0.1');
|
||||||
|
await expect(previewRecipe(db, `${baseUrl}/bare`)).rejects.toMatchObject({
|
||||||
|
code: 'NO_RECIPE_FOUND'
|
||||||
|
});
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe('importRecipe', () => {
|
||||||
|
it('imports, persists, and is idempotent', async () => {
|
||||||
|
const db = openInMemoryForTest();
|
||||||
|
addDomain(db, '127.0.0.1');
|
||||||
|
const first = await importRecipe(db, imgDir, `${baseUrl}/recipe`);
|
||||||
|
expect(first.duplicate).toBe(false);
|
||||||
|
expect(first.id).toBeGreaterThan(0);
|
||||||
|
expect(first.recipe.ingredients.length).toBeGreaterThan(0);
|
||||||
|
expect(first.recipe.image_path).not.toBeNull();
|
||||||
|
const saved = await readFile(join(imgDir, first.recipe.image_path!));
|
||||||
|
expect(saved.equals(PNG)).toBe(true);
|
||||||
|
|
||||||
|
const second = await importRecipe(db, imgDir, `${baseUrl}/recipe`);
|
||||||
|
expect(second.duplicate).toBe(true);
|
||||||
|
expect(second.id).toBe(first.id);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('surfaces ImporterError type', async () => {
|
||||||
|
const db = openInMemoryForTest();
|
||||||
|
await expect(importRecipe(db, imgDir, `${baseUrl}/recipe`)).rejects.toBeInstanceOf(
|
||||||
|
ImporterError
|
||||||
|
);
|
||||||
|
});
|
||||||
|
});
|
||||||
Reference in New Issue
Block a user