2026-04-17 15:09:31 +02:00
|
|
|
export type FetchOptions = {
|
|
|
|
|
maxBytes?: number;
|
|
|
|
|
timeoutMs?: number;
|
|
|
|
|
userAgent?: string;
|
2026-04-17 16:56:13 +02:00
|
|
|
extraHeaders?: Record<string, string>;
|
2026-04-17 22:33:55 +02:00
|
|
|
/**
|
|
|
|
|
* When true, return the data read up to `maxBytes` instead of throwing.
|
|
|
|
|
* Useful when we only care about the page head (og:image, JSON-LD) — most
|
|
|
|
|
* recipe sites are >1 MB today because of inlined bundles, but the head is
|
|
|
|
|
* usually well under 512 KB.
|
|
|
|
|
*/
|
|
|
|
|
allowTruncate?: boolean;
|
2026-04-17 15:09:31 +02:00
|
|
|
};
|
|
|
|
|
|
2026-04-17 22:33:55 +02:00
|
|
|
const DEFAULTS: Required<Omit<FetchOptions, 'extraHeaders' | 'allowTruncate'>> = {
|
2026-04-17 15:09:31 +02:00
|
|
|
maxBytes: 10 * 1024 * 1024,
|
|
|
|
|
timeoutMs: 10_000,
|
|
|
|
|
userAgent: 'Kochwas/0.1'
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
function assertSafeUrl(url: string): void {
|
|
|
|
|
let u: URL;
|
|
|
|
|
try {
|
|
|
|
|
u = new URL(url);
|
|
|
|
|
} catch {
|
|
|
|
|
throw new Error(`Invalid URL: ${url}`);
|
|
|
|
|
}
|
|
|
|
|
if (u.protocol !== 'http:' && u.protocol !== 'https:') {
|
|
|
|
|
throw new Error(`Unsupported URL scheme: ${u.protocol}`);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
async function readBody(
|
|
|
|
|
response: Response,
|
2026-04-17 22:33:55 +02:00
|
|
|
maxBytes: number,
|
|
|
|
|
allowTruncate: boolean
|
|
|
|
|
): Promise<{ data: Uint8Array; total: number; truncated: boolean }> {
|
2026-04-17 15:09:31 +02:00
|
|
|
const reader = response.body?.getReader();
|
|
|
|
|
if (!reader) {
|
|
|
|
|
const buf = new Uint8Array(await response.arrayBuffer());
|
2026-04-17 22:33:55 +02:00
|
|
|
if (buf.byteLength > maxBytes) {
|
|
|
|
|
if (allowTruncate) {
|
|
|
|
|
return { data: buf.slice(0, maxBytes), total: maxBytes, truncated: true };
|
|
|
|
|
}
|
|
|
|
|
throw new Error(`Response exceeds ${maxBytes} bytes`);
|
|
|
|
|
}
|
|
|
|
|
return { data: buf, total: buf.byteLength, truncated: false };
|
2026-04-17 15:09:31 +02:00
|
|
|
}
|
|
|
|
|
const chunks: Uint8Array[] = [];
|
|
|
|
|
let total = 0;
|
2026-04-17 22:33:55 +02:00
|
|
|
let truncated = false;
|
2026-04-17 15:09:31 +02:00
|
|
|
for (;;) {
|
|
|
|
|
const { value, done } = await reader.read();
|
|
|
|
|
if (done) break;
|
|
|
|
|
if (value) {
|
|
|
|
|
total += value.byteLength;
|
|
|
|
|
if (total > maxBytes) {
|
|
|
|
|
await reader.cancel();
|
2026-04-17 22:33:55 +02:00
|
|
|
if (allowTruncate) {
|
|
|
|
|
// keep what we have up to the chunk boundary; good enough for HTML head
|
|
|
|
|
const keep = value.byteLength - (total - maxBytes);
|
|
|
|
|
if (keep > 0) chunks.push(value.slice(0, keep));
|
|
|
|
|
total = maxBytes;
|
|
|
|
|
truncated = true;
|
|
|
|
|
break;
|
|
|
|
|
}
|
2026-04-17 15:09:31 +02:00
|
|
|
throw new Error(`Response exceeds ${maxBytes} bytes`);
|
|
|
|
|
}
|
|
|
|
|
chunks.push(value);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
const merged = new Uint8Array(total);
|
|
|
|
|
let offset = 0;
|
|
|
|
|
for (const c of chunks) {
|
|
|
|
|
merged.set(c, offset);
|
|
|
|
|
offset += c.byteLength;
|
|
|
|
|
}
|
2026-04-17 22:33:55 +02:00
|
|
|
return { data: merged, total, truncated };
|
2026-04-17 15:09:31 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
async function doFetch(url: string, opts: FetchOptions): Promise<Response> {
|
|
|
|
|
assertSafeUrl(url);
|
2026-04-17 16:56:13 +02:00
|
|
|
const timeoutMs = opts.timeoutMs ?? DEFAULTS.timeoutMs;
|
|
|
|
|
const userAgent = opts.userAgent ?? DEFAULTS.userAgent;
|
2026-04-17 15:09:31 +02:00
|
|
|
const controller = new AbortController();
|
2026-04-17 16:56:13 +02:00
|
|
|
const timer = setTimeout(() => controller.abort(), timeoutMs);
|
|
|
|
|
const headers: Record<string, string> = {
|
|
|
|
|
'user-agent': userAgent,
|
|
|
|
|
...(opts.extraHeaders ?? {})
|
|
|
|
|
};
|
2026-04-17 15:09:31 +02:00
|
|
|
try {
|
|
|
|
|
const res = await fetch(url, {
|
|
|
|
|
signal: controller.signal,
|
|
|
|
|
redirect: 'follow',
|
2026-04-17 16:56:13 +02:00
|
|
|
headers
|
2026-04-17 15:09:31 +02:00
|
|
|
});
|
|
|
|
|
if (!res.ok) throw new Error(`HTTP ${res.status} for ${url}`);
|
|
|
|
|
return res;
|
|
|
|
|
} finally {
|
|
|
|
|
clearTimeout(timer);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
export async function fetchText(url: string, opts: FetchOptions = {}): Promise<string> {
|
2026-04-17 16:56:13 +02:00
|
|
|
const maxBytes = opts.maxBytes ?? DEFAULTS.maxBytes;
|
|
|
|
|
const res = await doFetch(url, opts);
|
2026-04-17 22:33:55 +02:00
|
|
|
const { data } = await readBody(res, maxBytes, opts.allowTruncate ?? false);
|
2026-04-17 15:09:31 +02:00
|
|
|
return new TextDecoder('utf-8').decode(data);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
export async function fetchBuffer(
|
|
|
|
|
url: string,
|
|
|
|
|
opts: FetchOptions = {}
|
|
|
|
|
): Promise<{ data: Uint8Array; contentType: string | null }> {
|
2026-04-17 16:56:13 +02:00
|
|
|
const maxBytes = opts.maxBytes ?? DEFAULTS.maxBytes;
|
|
|
|
|
const res = await doFetch(url, opts);
|
2026-04-17 22:33:55 +02:00
|
|
|
const { data } = await readBody(res, maxBytes, opts.allowTruncate ?? false);
|
2026-04-17 15:09:31 +02:00
|
|
|
return { data, contentType: res.headers.get('content-type') };
|
|
|
|
|
}
|