Files
kochwas/src/lib/server/http.ts
hsiegeln 0992e51a5d
All checks were successful
Build & Publish Docker Image / build-and-push (push) Successful in 1m16s
fix(search): Filter zuverlässiger durch allowTruncate
Vorher warf fetchText einen Fehler, sobald eine Seite >512 KB war —
bei modernen Rezeptseiten (eingebettete Bundles, base64-Bilder) läuft
das praktisch immer voll. Der Catch-Block hat dann hasRecipe auf NULL
gelassen, und der Treffer ging ungefiltert durch.

Neue FetchOptions.allowTruncate: true → wir bekommen die ersten 512 KB
(das reicht für <head> mit og:image und JSON-LD) statt eines Throws.
Timeout auf 8s erhöht, weil der Pi manchmal langsamer ist.

Migration 008 räumt alte NULL-has_recipe-Einträge aus dem Cache, damit
sie beim nächsten Search frisch klassifiziert werden statt weitere
30 Tage falsch gecached zu bleiben.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-17 22:33:55 +02:00

120 lines
3.6 KiB
TypeScript

export type FetchOptions = {
maxBytes?: number;
timeoutMs?: number;
userAgent?: string;
extraHeaders?: Record<string, string>;
/**
* When true, return the data read up to `maxBytes` instead of throwing.
* Useful when we only care about the page head (og:image, JSON-LD) — most
* recipe sites are >1 MB today because of inlined bundles, but the head is
* usually well under 512 KB.
*/
allowTruncate?: boolean;
};
const DEFAULTS: Required<Omit<FetchOptions, 'extraHeaders' | 'allowTruncate'>> = {
maxBytes: 10 * 1024 * 1024,
timeoutMs: 10_000,
userAgent: 'Kochwas/0.1'
};
function assertSafeUrl(url: string): void {
let u: URL;
try {
u = new URL(url);
} catch {
throw new Error(`Invalid URL: ${url}`);
}
if (u.protocol !== 'http:' && u.protocol !== 'https:') {
throw new Error(`Unsupported URL scheme: ${u.protocol}`);
}
}
async function readBody(
response: Response,
maxBytes: number,
allowTruncate: boolean
): Promise<{ data: Uint8Array; total: number; truncated: boolean }> {
const reader = response.body?.getReader();
if (!reader) {
const buf = new Uint8Array(await response.arrayBuffer());
if (buf.byteLength > maxBytes) {
if (allowTruncate) {
return { data: buf.slice(0, maxBytes), total: maxBytes, truncated: true };
}
throw new Error(`Response exceeds ${maxBytes} bytes`);
}
return { data: buf, total: buf.byteLength, truncated: false };
}
const chunks: Uint8Array[] = [];
let total = 0;
let truncated = false;
for (;;) {
const { value, done } = await reader.read();
if (done) break;
if (value) {
total += value.byteLength;
if (total > maxBytes) {
await reader.cancel();
if (allowTruncate) {
// keep what we have up to the chunk boundary; good enough for HTML head
const keep = value.byteLength - (total - maxBytes);
if (keep > 0) chunks.push(value.slice(0, keep));
total = maxBytes;
truncated = true;
break;
}
throw new Error(`Response exceeds ${maxBytes} bytes`);
}
chunks.push(value);
}
}
const merged = new Uint8Array(total);
let offset = 0;
for (const c of chunks) {
merged.set(c, offset);
offset += c.byteLength;
}
return { data: merged, total, truncated };
}
async function doFetch(url: string, opts: FetchOptions): Promise<Response> {
assertSafeUrl(url);
const timeoutMs = opts.timeoutMs ?? DEFAULTS.timeoutMs;
const userAgent = opts.userAgent ?? DEFAULTS.userAgent;
const controller = new AbortController();
const timer = setTimeout(() => controller.abort(), timeoutMs);
const headers: Record<string, string> = {
'user-agent': userAgent,
...(opts.extraHeaders ?? {})
};
try {
const res = await fetch(url, {
signal: controller.signal,
redirect: 'follow',
headers
});
if (!res.ok) throw new Error(`HTTP ${res.status} for ${url}`);
return res;
} finally {
clearTimeout(timer);
}
}
export async function fetchText(url: string, opts: FetchOptions = {}): Promise<string> {
const maxBytes = opts.maxBytes ?? DEFAULTS.maxBytes;
const res = await doFetch(url, opts);
const { data } = await readBody(res, maxBytes, opts.allowTruncate ?? false);
return new TextDecoder('utf-8').decode(data);
}
export async function fetchBuffer(
url: string,
opts: FetchOptions = {}
): Promise<{ data: Uint8Array; contentType: string | null }> {
const maxBytes = opts.maxBytes ?? DEFAULTS.maxBytes;
const res = await doFetch(url, opts);
const { data } = await readBody(res, maxBytes, opts.allowTruncate ?? false);
return { data, contentType: res.headers.get('content-type') };
}