export type FetchOptions = { maxBytes?: number; timeoutMs?: number; userAgent?: string; extraHeaders?: Record; /** * When true, return the data read up to `maxBytes` instead of throwing. * Useful when we only care about the page head (og:image, JSON-LD) — most * recipe sites are >1 MB today because of inlined bundles, but the head is * usually well under 512 KB. */ allowTruncate?: boolean; }; const DEFAULTS: Required> = { maxBytes: 10 * 1024 * 1024, timeoutMs: 10_000, userAgent: 'Kochwas/0.1' }; function assertSafeUrl(url: string): void { let u: URL; try { u = new URL(url); } catch { throw new Error(`Invalid URL: ${url}`); } if (u.protocol !== 'http:' && u.protocol !== 'https:') { throw new Error(`Unsupported URL scheme: ${u.protocol}`); } } async function readBody( response: Response, maxBytes: number, allowTruncate: boolean ): Promise<{ data: Uint8Array; total: number; truncated: boolean }> { const reader = response.body?.getReader(); if (!reader) { const buf = new Uint8Array(await response.arrayBuffer()); if (buf.byteLength > maxBytes) { if (allowTruncate) { return { data: buf.slice(0, maxBytes), total: maxBytes, truncated: true }; } throw new Error(`Response exceeds ${maxBytes} bytes`); } return { data: buf, total: buf.byteLength, truncated: false }; } const chunks: Uint8Array[] = []; let total = 0; let truncated = false; for (;;) { const { value, done } = await reader.read(); if (done) break; if (value) { total += value.byteLength; if (total > maxBytes) { await reader.cancel(); if (allowTruncate) { // keep what we have up to the chunk boundary; good enough for HTML head const keep = value.byteLength - (total - maxBytes); if (keep > 0) chunks.push(value.slice(0, keep)); total = maxBytes; truncated = true; break; } throw new Error(`Response exceeds ${maxBytes} bytes`); } chunks.push(value); } } const merged = new Uint8Array(total); let offset = 0; for (const c of chunks) { merged.set(c, offset); offset += c.byteLength; } return { data: merged, total, truncated }; } async function doFetch(url: string, opts: FetchOptions): Promise { assertSafeUrl(url); const timeoutMs = opts.timeoutMs ?? DEFAULTS.timeoutMs; const userAgent = opts.userAgent ?? DEFAULTS.userAgent; const controller = new AbortController(); const timer = setTimeout(() => controller.abort(), timeoutMs); const headers: Record = { 'user-agent': userAgent, ...(opts.extraHeaders ?? {}) }; try { const res = await fetch(url, { signal: controller.signal, redirect: 'follow', headers }); if (!res.ok) throw new Error(`HTTP ${res.status} for ${url}`); return res; } finally { clearTimeout(timer); } } export async function fetchText(url: string, opts: FetchOptions = {}): Promise { const maxBytes = opts.maxBytes ?? DEFAULTS.maxBytes; const res = await doFetch(url, opts); const { data } = await readBody(res, maxBytes, opts.allowTruncate ?? false); return new TextDecoder('utf-8').decode(data); } export async function fetchBuffer( url: string, opts: FetchOptions = {} ): Promise<{ data: Uint8Array; contentType: string | null }> { const maxBytes = opts.maxBytes ?? DEFAULTS.maxBytes; const res = await doFetch(url, opts); const { data } = await readBody(res, maxBytes, opts.allowTruncate ?? false); return { data, contentType: res.headers.get('content-type') }; }