src/lib/server/http.ts

export type FetchOptions = {
  maxBytes?: number;
  timeoutMs?: number;
  userAgent?: string;
  extraHeaders?: Record<string, string>;
  /**
   * When true, return the data read up to `maxBytes` instead of throwing.
   * Useful when we only care about the page head (og:image, JSON-LD) — most
   * recipe sites are >1 MB today because of inlined bundles, but the head is
   * usually well under 512 KB.
   */
  allowTruncate?: boolean;
};

const DEFAULTS: Required<Omit<FetchOptions, 'extraHeaders' | 'allowTruncate'>> = {
  maxBytes: 10 * 1024 * 1024,
  timeoutMs: 10_000,
  userAgent: 'Kochwas/0.1'
};

function assertSafeUrl(url: string): void {
  let u: URL;
  try {
    u = new URL(url);
  } catch {
    throw new Error(`Invalid URL: ${url}`);
  }
  if (u.protocol !== 'http:' && u.protocol !== 'https:') {
    throw new Error(`Unsupported URL scheme: ${u.protocol}`);
  }
}

async function readBody(
  response: Response,
  maxBytes: number,
  allowTruncate: boolean
): Promise<{ data: Uint8Array; total: number; truncated: boolean }> {
  const reader = response.body?.getReader();
  if (!reader) {
    const buf = new Uint8Array(await response.arrayBuffer());
    if (buf.byteLength > maxBytes) {
      if (allowTruncate) {
        return { data: buf.slice(0, maxBytes), total: maxBytes, truncated: true };
      }
      throw new Error(`Response exceeds ${maxBytes} bytes`);
    }
    return { data: buf, total: buf.byteLength, truncated: false };
  }
  const chunks: Uint8Array[] = [];
  let total = 0;
  let truncated = false;
  for (;;) {
    const { value, done } = await reader.read();
    if (done) break;
    if (value) {
      total += value.byteLength;
      if (total > maxBytes) {
        await reader.cancel();
        if (allowTruncate) {
          // keep what we have up to the chunk boundary; good enough for HTML head
          const keep = value.byteLength - (total - maxBytes);
          if (keep > 0) chunks.push(value.slice(0, keep));
          total = maxBytes;
          truncated = true;
          break;
        }
        throw new Error(`Response exceeds ${maxBytes} bytes`);
      }
      chunks.push(value);
    }
  }
  const merged = new Uint8Array(total);
  let offset = 0;
  for (const c of chunks) {
    merged.set(c, offset);
    offset += c.byteLength;
  }
  return { data: merged, total, truncated };
}

async function doFetch(url: string, opts: FetchOptions): Promise<Response> {
  assertSafeUrl(url);
  const timeoutMs = opts.timeoutMs ?? DEFAULTS.timeoutMs;
  const userAgent = opts.userAgent ?? DEFAULTS.userAgent;
  const controller = new AbortController();
  const timer = setTimeout(() => controller.abort(), timeoutMs);
  const headers: Record<string, string> = {
    'user-agent': userAgent,
    ...(opts.extraHeaders ?? {})
  };
  try {
    const res = await fetch(url, {
      signal: controller.signal,
      redirect: 'follow',
      headers
    });
    if (!res.ok) throw new Error(`HTTP ${res.status} for ${url}`);
    return res;
  } finally {
    clearTimeout(timer);
  }
}

export async function fetchText(url: string, opts: FetchOptions = {}): Promise<string> {
  const maxBytes = opts.maxBytes ?? DEFAULTS.maxBytes;
  const res = await doFetch(url, opts);
  const { data } = await readBody(res, maxBytes, opts.allowTruncate ?? false);
  return new TextDecoder('utf-8').decode(data);
}

export async function fetchBuffer(
  url: string,
  opts: FetchOptions = {}
): Promise<{ data: Uint8Array; contentType: string | null }> {
  const maxBytes = opts.maxBytes ?? DEFAULTS.maxBytes;
  const res = await doFetch(url, opts);
  const { data } = await readBody(res, maxBytes, opts.allowTruncate ?? false);
  return { data, contentType: res.headers.get('content-type') };
}
feat(http): add fetchText/fetchBuffer with timeout and size limits Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> 2026-04-17 15:09:31 +02:00			`export type FetchOptions = {`
			`maxBytes?: number;`
			`timeoutMs?: number;`
			`userAgent?: string;`
fix(search): unblock SearXNG 403 — config + headers SearXNG returned 403 for every query, logging 'X-Forwarded-For nor X-Real-IP header is set!'. Two fixes, both needed: 1. searxng/settings.yml was being overwritten by SearXNG's default config in fresh volumes. Explicitly set limiter: false, public_instance: false, and move secret_key to env lookup via ${SEARXNG_SECRET:-…}. Force a well-known JSON format list. 2. Even with the limiter off, SearXNG's bot detection still nags on missing forwarder headers. The Node client now sends X-Forwarded-For: 127.0.0.1, X-Real-IP: 127.0.0.1 and Accept: json deterministically. Done via a new extraHeaders option on the http wrapper. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> 2026-04-17 16:56:13 +02:00			`extraHeaders?: Record<string, string>;`
fix(search): Filter zuverlässiger durch allowTruncate Vorher warf fetchText einen Fehler, sobald eine Seite >512 KB war — bei modernen Rezeptseiten (eingebettete Bundles, base64-Bilder) läuft das praktisch immer voll. Der Catch-Block hat dann hasRecipe auf NULL gelassen, und der Treffer ging ungefiltert durch. Neue FetchOptions.allowTruncate: true → wir bekommen die ersten 512 KB (das reicht für <head> mit og:image und JSON-LD) statt eines Throws. Timeout auf 8s erhöht, weil der Pi manchmal langsamer ist. Migration 008 räumt alte NULL-has_recipe-Einträge aus dem Cache, damit sie beim nächsten Search frisch klassifiziert werden statt weitere 30 Tage falsch gecached zu bleiben. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> 2026-04-17 22:33:55 +02:00			`/**`
			* When true, return the data read up to `maxBytes` instead of throwing.
			`* Useful when we only care about the page head (og:image, JSON-LD) — most`
			`* recipe sites are >1 MB today because of inlined bundles, but the head is`
			`* usually well under 512 KB.`
			`*/`
			`allowTruncate?: boolean;`
feat(http): add fetchText/fetchBuffer with timeout and size limits Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> 2026-04-17 15:09:31 +02:00			`};`

fix(search): Filter zuverlässiger durch allowTruncate Vorher warf fetchText einen Fehler, sobald eine Seite >512 KB war — bei modernen Rezeptseiten (eingebettete Bundles, base64-Bilder) läuft das praktisch immer voll. Der Catch-Block hat dann hasRecipe auf NULL gelassen, und der Treffer ging ungefiltert durch. Neue FetchOptions.allowTruncate: true → wir bekommen die ersten 512 KB (das reicht für <head> mit og:image und JSON-LD) statt eines Throws. Timeout auf 8s erhöht, weil der Pi manchmal langsamer ist. Migration 008 räumt alte NULL-has_recipe-Einträge aus dem Cache, damit sie beim nächsten Search frisch klassifiziert werden statt weitere 30 Tage falsch gecached zu bleiben. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> 2026-04-17 22:33:55 +02:00			`const DEFAULTS: Required<Omit<FetchOptions, 'extraHeaders' \| 'allowTruncate'>> = {`
feat(http): add fetchText/fetchBuffer with timeout and size limits Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> 2026-04-17 15:09:31 +02:00			`maxBytes: 10 * 1024 * 1024,`
			`timeoutMs: 10_000,`
			`userAgent: 'Kochwas/0.1'`
			`};`

			`function assertSafeUrl(url: string): void {`
			`let u: URL;`
			`try {`
			`u = new URL(url);`
			`} catch {`
			throw new Error(`Invalid URL: ${url}`);
			`}`
			`if (u.protocol !== 'http:' && u.protocol !== 'https:') {`
			throw new Error(`Unsupported URL scheme: ${u.protocol}`);
			`}`
			`}`

			`async function readBody(`
			`response: Response,`
fix(search): Filter zuverlässiger durch allowTruncate Vorher warf fetchText einen Fehler, sobald eine Seite >512 KB war — bei modernen Rezeptseiten (eingebettete Bundles, base64-Bilder) läuft das praktisch immer voll. Der Catch-Block hat dann hasRecipe auf NULL gelassen, und der Treffer ging ungefiltert durch. Neue FetchOptions.allowTruncate: true → wir bekommen die ersten 512 KB (das reicht für <head> mit og:image und JSON-LD) statt eines Throws. Timeout auf 8s erhöht, weil der Pi manchmal langsamer ist. Migration 008 räumt alte NULL-has_recipe-Einträge aus dem Cache, damit sie beim nächsten Search frisch klassifiziert werden statt weitere 30 Tage falsch gecached zu bleiben. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> 2026-04-17 22:33:55 +02:00			`maxBytes: number,`
			`allowTruncate: boolean`
			`): Promise<{ data: Uint8Array; total: number; truncated: boolean }> {`
feat(http): add fetchText/fetchBuffer with timeout and size limits Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> 2026-04-17 15:09:31 +02:00			`const reader = response.body?.getReader();`
			`if (!reader) {`
			`const buf = new Uint8Array(await response.arrayBuffer());`
fix(search): Filter zuverlässiger durch allowTruncate Vorher warf fetchText einen Fehler, sobald eine Seite >512 KB war — bei modernen Rezeptseiten (eingebettete Bundles, base64-Bilder) läuft das praktisch immer voll. Der Catch-Block hat dann hasRecipe auf NULL gelassen, und der Treffer ging ungefiltert durch. Neue FetchOptions.allowTruncate: true → wir bekommen die ersten 512 KB (das reicht für <head> mit og:image und JSON-LD) statt eines Throws. Timeout auf 8s erhöht, weil der Pi manchmal langsamer ist. Migration 008 räumt alte NULL-has_recipe-Einträge aus dem Cache, damit sie beim nächsten Search frisch klassifiziert werden statt weitere 30 Tage falsch gecached zu bleiben. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> 2026-04-17 22:33:55 +02:00			`if (buf.byteLength > maxBytes) {`
			`if (allowTruncate) {`
			`return { data: buf.slice(0, maxBytes), total: maxBytes, truncated: true };`
			`}`
			throw new Error(`Response exceeds ${maxBytes} bytes`);
			`}`
			`return { data: buf, total: buf.byteLength, truncated: false };`
feat(http): add fetchText/fetchBuffer with timeout and size limits Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> 2026-04-17 15:09:31 +02:00			`}`
			`const chunks: Uint8Array[] = [];`
			`let total = 0;`
fix(search): Filter zuverlässiger durch allowTruncate Vorher warf fetchText einen Fehler, sobald eine Seite >512 KB war — bei modernen Rezeptseiten (eingebettete Bundles, base64-Bilder) läuft das praktisch immer voll. Der Catch-Block hat dann hasRecipe auf NULL gelassen, und der Treffer ging ungefiltert durch. Neue FetchOptions.allowTruncate: true → wir bekommen die ersten 512 KB (das reicht für <head> mit og:image und JSON-LD) statt eines Throws. Timeout auf 8s erhöht, weil der Pi manchmal langsamer ist. Migration 008 räumt alte NULL-has_recipe-Einträge aus dem Cache, damit sie beim nächsten Search frisch klassifiziert werden statt weitere 30 Tage falsch gecached zu bleiben. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> 2026-04-17 22:33:55 +02:00			`let truncated = false;`
feat(http): add fetchText/fetchBuffer with timeout and size limits Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> 2026-04-17 15:09:31 +02:00			`for (;;) {`
			`const { value, done } = await reader.read();`
			`if (done) break;`
			`if (value) {`
			`total += value.byteLength;`
			`if (total > maxBytes) {`
			`await reader.cancel();`
fix(search): Filter zuverlässiger durch allowTruncate Vorher warf fetchText einen Fehler, sobald eine Seite >512 KB war — bei modernen Rezeptseiten (eingebettete Bundles, base64-Bilder) läuft das praktisch immer voll. Der Catch-Block hat dann hasRecipe auf NULL gelassen, und der Treffer ging ungefiltert durch. Neue FetchOptions.allowTruncate: true → wir bekommen die ersten 512 KB (das reicht für <head> mit og:image und JSON-LD) statt eines Throws. Timeout auf 8s erhöht, weil der Pi manchmal langsamer ist. Migration 008 räumt alte NULL-has_recipe-Einträge aus dem Cache, damit sie beim nächsten Search frisch klassifiziert werden statt weitere 30 Tage falsch gecached zu bleiben. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> 2026-04-17 22:33:55 +02:00			`if (allowTruncate) {`
			`// keep what we have up to the chunk boundary; good enough for HTML head`
			`const keep = value.byteLength - (total - maxBytes);`
			`if (keep > 0) chunks.push(value.slice(0, keep));`
			`total = maxBytes;`
			`truncated = true;`
			`break;`
			`}`
feat(http): add fetchText/fetchBuffer with timeout and size limits Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> 2026-04-17 15:09:31 +02:00			throw new Error(`Response exceeds ${maxBytes} bytes`);
			`}`
			`chunks.push(value);`
			`}`
			`}`
			`const merged = new Uint8Array(total);`
			`let offset = 0;`
			`for (const c of chunks) {`
			`merged.set(c, offset);`
			`offset += c.byteLength;`
			`}`
fix(search): Filter zuverlässiger durch allowTruncate Vorher warf fetchText einen Fehler, sobald eine Seite >512 KB war — bei modernen Rezeptseiten (eingebettete Bundles, base64-Bilder) läuft das praktisch immer voll. Der Catch-Block hat dann hasRecipe auf NULL gelassen, und der Treffer ging ungefiltert durch. Neue FetchOptions.allowTruncate: true → wir bekommen die ersten 512 KB (das reicht für <head> mit og:image und JSON-LD) statt eines Throws. Timeout auf 8s erhöht, weil der Pi manchmal langsamer ist. Migration 008 räumt alte NULL-has_recipe-Einträge aus dem Cache, damit sie beim nächsten Search frisch klassifiziert werden statt weitere 30 Tage falsch gecached zu bleiben. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> 2026-04-17 22:33:55 +02:00			`return { data: merged, total, truncated };`
feat(http): add fetchText/fetchBuffer with timeout and size limits Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> 2026-04-17 15:09:31 +02:00			`}`

			`async function doFetch(url: string, opts: FetchOptions): Promise<Response> {`
			`assertSafeUrl(url);`
fix(search): unblock SearXNG 403 — config + headers SearXNG returned 403 for every query, logging 'X-Forwarded-For nor X-Real-IP header is set!'. Two fixes, both needed: 1. searxng/settings.yml was being overwritten by SearXNG's default config in fresh volumes. Explicitly set limiter: false, public_instance: false, and move secret_key to env lookup via ${SEARXNG_SECRET:-…}. Force a well-known JSON format list. 2. Even with the limiter off, SearXNG's bot detection still nags on missing forwarder headers. The Node client now sends X-Forwarded-For: 127.0.0.1, X-Real-IP: 127.0.0.1 and Accept: json deterministically. Done via a new extraHeaders option on the http wrapper. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> 2026-04-17 16:56:13 +02:00			`const timeoutMs = opts.timeoutMs ?? DEFAULTS.timeoutMs;`
			`const userAgent = opts.userAgent ?? DEFAULTS.userAgent;`
feat(http): add fetchText/fetchBuffer with timeout and size limits Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> 2026-04-17 15:09:31 +02:00			`const controller = new AbortController();`
fix(search): unblock SearXNG 403 — config + headers SearXNG returned 403 for every query, logging 'X-Forwarded-For nor X-Real-IP header is set!'. Two fixes, both needed: 1. searxng/settings.yml was being overwritten by SearXNG's default config in fresh volumes. Explicitly set limiter: false, public_instance: false, and move secret_key to env lookup via ${SEARXNG_SECRET:-…}. Force a well-known JSON format list. 2. Even with the limiter off, SearXNG's bot detection still nags on missing forwarder headers. The Node client now sends X-Forwarded-For: 127.0.0.1, X-Real-IP: 127.0.0.1 and Accept: json deterministically. Done via a new extraHeaders option on the http wrapper. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> 2026-04-17 16:56:13 +02:00			`const timer = setTimeout(() => controller.abort(), timeoutMs);`
			`const headers: Record<string, string> = {`
			`'user-agent': userAgent,`
			`...(opts.extraHeaders ?? {})`
			`};`
feat(http): add fetchText/fetchBuffer with timeout and size limits Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> 2026-04-17 15:09:31 +02:00			`try {`
			`const res = await fetch(url, {`
			`signal: controller.signal,`
			`redirect: 'follow',`
fix(search): unblock SearXNG 403 — config + headers SearXNG returned 403 for every query, logging 'X-Forwarded-For nor X-Real-IP header is set!'. Two fixes, both needed: 1. searxng/settings.yml was being overwritten by SearXNG's default config in fresh volumes. Explicitly set limiter: false, public_instance: false, and move secret_key to env lookup via ${SEARXNG_SECRET:-…}. Force a well-known JSON format list. 2. Even with the limiter off, SearXNG's bot detection still nags on missing forwarder headers. The Node client now sends X-Forwarded-For: 127.0.0.1, X-Real-IP: 127.0.0.1 and Accept: json deterministically. Done via a new extraHeaders option on the http wrapper. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> 2026-04-17 16:56:13 +02:00			`headers`
feat(http): add fetchText/fetchBuffer with timeout and size limits Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> 2026-04-17 15:09:31 +02:00			`});`
			if (!res.ok) throw new Error(`HTTP ${res.status} for ${url}`);
			`return res;`
			`} finally {`
			`clearTimeout(timer);`
			`}`
			`}`

			`export async function fetchText(url: string, opts: FetchOptions = {}): Promise<string> {`
fix(search): unblock SearXNG 403 — config + headers SearXNG returned 403 for every query, logging 'X-Forwarded-For nor X-Real-IP header is set!'. Two fixes, both needed: 1. searxng/settings.yml was being overwritten by SearXNG's default config in fresh volumes. Explicitly set limiter: false, public_instance: false, and move secret_key to env lookup via ${SEARXNG_SECRET:-…}. Force a well-known JSON format list. 2. Even with the limiter off, SearXNG's bot detection still nags on missing forwarder headers. The Node client now sends X-Forwarded-For: 127.0.0.1, X-Real-IP: 127.0.0.1 and Accept: json deterministically. Done via a new extraHeaders option on the http wrapper. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> 2026-04-17 16:56:13 +02:00			`const maxBytes = opts.maxBytes ?? DEFAULTS.maxBytes;`
			`const res = await doFetch(url, opts);`
fix(search): Filter zuverlässiger durch allowTruncate Vorher warf fetchText einen Fehler, sobald eine Seite >512 KB war — bei modernen Rezeptseiten (eingebettete Bundles, base64-Bilder) läuft das praktisch immer voll. Der Catch-Block hat dann hasRecipe auf NULL gelassen, und der Treffer ging ungefiltert durch. Neue FetchOptions.allowTruncate: true → wir bekommen die ersten 512 KB (das reicht für <head> mit og:image und JSON-LD) statt eines Throws. Timeout auf 8s erhöht, weil der Pi manchmal langsamer ist. Migration 008 räumt alte NULL-has_recipe-Einträge aus dem Cache, damit sie beim nächsten Search frisch klassifiziert werden statt weitere 30 Tage falsch gecached zu bleiben. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> 2026-04-17 22:33:55 +02:00			`const { data } = await readBody(res, maxBytes, opts.allowTruncate ?? false);`
feat(http): add fetchText/fetchBuffer with timeout and size limits Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> 2026-04-17 15:09:31 +02:00			`return new TextDecoder('utf-8').decode(data);`
			`}`

			`export async function fetchBuffer(`
			`url: string,`
			`opts: FetchOptions = {}`
			`): Promise<{ data: Uint8Array; contentType: string \| null }> {`
fix(search): unblock SearXNG 403 — config + headers SearXNG returned 403 for every query, logging 'X-Forwarded-For nor X-Real-IP header is set!'. Two fixes, both needed: 1. searxng/settings.yml was being overwritten by SearXNG's default config in fresh volumes. Explicitly set limiter: false, public_instance: false, and move secret_key to env lookup via ${SEARXNG_SECRET:-…}. Force a well-known JSON format list. 2. Even with the limiter off, SearXNG's bot detection still nags on missing forwarder headers. The Node client now sends X-Forwarded-For: 127.0.0.1, X-Real-IP: 127.0.0.1 and Accept: json deterministically. Done via a new extraHeaders option on the http wrapper. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> 2026-04-17 16:56:13 +02:00			`const maxBytes = opts.maxBytes ?? DEFAULTS.maxBytes;`
			`const res = await doFetch(url, opts);`
fix(search): Filter zuverlässiger durch allowTruncate Vorher warf fetchText einen Fehler, sobald eine Seite >512 KB war — bei modernen Rezeptseiten (eingebettete Bundles, base64-Bilder) läuft das praktisch immer voll. Der Catch-Block hat dann hasRecipe auf NULL gelassen, und der Treffer ging ungefiltert durch. Neue FetchOptions.allowTruncate: true → wir bekommen die ersten 512 KB (das reicht für <head> mit og:image und JSON-LD) statt eines Throws. Timeout auf 8s erhöht, weil der Pi manchmal langsamer ist. Migration 008 räumt alte NULL-has_recipe-Einträge aus dem Cache, damit sie beim nächsten Search frisch klassifiziert werden statt weitere 30 Tage falsch gecached zu bleiben. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> 2026-04-17 22:33:55 +02:00			`const { data } = await readBody(res, maxBytes, opts.allowTruncate ?? false);`
feat(http): add fetchText/fetchBuffer with timeout and size limits Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> 2026-04-17 15:09:31 +02:00			`return { data, contentType: res.headers.get('content-type') };`
			`}`