kochwas/src/lib/server/http.ts

export type FetchOptions = {
  maxBytes?: number;
  timeoutMs?: number;
  userAgent?: string;
  extraHeaders?: Record<string, string>;
  /**
   * When true, return the data read up to `maxBytes` instead of throwing.
   * Useful when we only care about the page head (og:image, JSON-LD) — most
   * recipe sites are >1 MB today because of inlined bundles, but the head is
   * usually well under 512 KB.
   */
  allowTruncate?: boolean;
};

const DEFAULTS: Required<Omit<FetchOptions, 'extraHeaders' | 'allowTruncate'>> = {
  maxBytes: 10 * 1024 * 1024,
  timeoutMs: 10_000,
  userAgent: 'Kochwas/0.1'
};

function assertSafeUrl(url: string): void {
  let u: URL;
  try {
    u = new URL(url);
  } catch {
    throw new Error(`Invalid URL: ${url}`);
  }
  if (u.protocol !== 'http:' && u.protocol !== 'https:') {
    throw new Error(`Unsupported URL scheme: ${u.protocol}`);
  }
}

async function readBody(
  response: Response,
  maxBytes: number,
  allowTruncate: boolean
): Promise<{ data: Uint8Array; total: number; truncated: boolean }> {
  const reader = response.body?.getReader();
  if (!reader) {
    const buf = new Uint8Array(await response.arrayBuffer());
    if (buf.byteLength > maxBytes) {
      if (allowTruncate) {
        return { data: buf.slice(0, maxBytes), total: maxBytes, truncated: true };
      }
      throw new Error(`Response exceeds ${maxBytes} bytes`);
    }
    return { data: buf, total: buf.byteLength, truncated: false };
  }
  const chunks: Uint8Array[] = [];
  let total = 0;
  let truncated = false;
  for (;;) {
    const { value, done } = await reader.read();
    if (done) break;
    if (value) {
      total += value.byteLength;
      if (total > maxBytes) {
        await reader.cancel();
        if (allowTruncate) {
          // keep what we have up to the chunk boundary; good enough for HTML head
          const keep = value.byteLength - (total - maxBytes);
          if (keep > 0) chunks.push(value.slice(0, keep));
          total = maxBytes;
          truncated = true;
          break;
        }
        throw new Error(`Response exceeds ${maxBytes} bytes`);
      }
      chunks.push(value);
    }
  }
  const merged = new Uint8Array(total);
  let offset = 0;
  for (const c of chunks) {
    merged.set(c, offset);
    offset += c.byteLength;
  }
  return { data: merged, total, truncated };
}

async function doFetch(url: string, opts: FetchOptions): Promise<Response> {
  assertSafeUrl(url);
  const timeoutMs = opts.timeoutMs ?? DEFAULTS.timeoutMs;
  const userAgent = opts.userAgent ?? DEFAULTS.userAgent;
  const controller = new AbortController();
  const timer = setTimeout(() => controller.abort(), timeoutMs);
  const headers: Record<string, string> = {
    'user-agent': userAgent,
    ...(opts.extraHeaders ?? {})
  };
  try {
    const res = await fetch(url, {
      signal: controller.signal,
      redirect: 'follow',
      headers
    });
    if (!res.ok) throw new Error(`HTTP ${res.status} for ${url}`);
    return res;
  } finally {
    clearTimeout(timer);
  }
}

export async function fetchText(url: string, opts: FetchOptions = {}): Promise<string> {
  const maxBytes = opts.maxBytes ?? DEFAULTS.maxBytes;
  const res = await doFetch(url, opts);
  const { data } = await readBody(res, maxBytes, opts.allowTruncate ?? false);
  return new TextDecoder('utf-8').decode(data);
}

export async function fetchBuffer(
  url: string,
  opts: FetchOptions = {}
): Promise<{ data: Uint8Array; contentType: string | null }> {
  const maxBytes = opts.maxBytes ?? DEFAULTS.maxBytes;
  const res = await doFetch(url, opts);
  const { data } = await readBody(res, maxBytes, opts.allowTruncate ?? false);
  return { data, contentType: res.headers.get('content-type') };
}