import type Database from 'better-sqlite3'; import { createHash } from 'node:crypto'; import { existsSync } from 'node:fs'; import { mkdir, writeFile } from 'node:fs/promises'; import { join } from 'node:path'; import { fetchBuffer, fetchText } from '../http'; import { listDomains, setDomainFavicon } from './repository'; const EXT_BY_CONTENT_TYPE: Record = { 'image/png': '.png', 'image/jpeg': '.jpg', 'image/jpg': '.jpg', 'image/webp': '.webp', 'image/gif': '.gif', 'image/svg+xml': '.svg', 'image/x-icon': '.ico', 'image/vnd.microsoft.icon': '.ico' }; function extensionFor(contentType: string | null): string { if (!contentType) return '.ico'; const base = contentType.split(';')[0].trim().toLowerCase(); return EXT_BY_CONTENT_TYPE[base] ?? '.ico'; } async function tryFetch(url: string): Promise<{ data: Uint8Array; contentType: string | null } | null> { try { const res = await fetchBuffer(url, { timeoutMs: 3_000, maxBytes: 256 * 1024 }); if (res.data.byteLength === 0) return null; return res; } catch { return null; } } // Parst -Tags aus dem . WordPress-Seiten liefern // oft ein generisches /favicon.ico (Zahnrad-Default vom Hoster oder Plugin), // während das eigentliche Site-Icon per eingebunden ist. // Darum zuerst den Head durchsehen, nicht blind /favicon.ico nehmen. type IconLink = { href: string; size: number; isApple: boolean }; function extractIconLinks(html: string, baseUrl: string): IconLink[] { const head = html.slice(0, 300_000); const icons: IconLink[] = []; const linkRe = /]*>/gi; for (const m of head.matchAll(linkRe)) { const tag = m[0]; const relMatch = tag.match(/\brel\s*=\s*["']([^"']+)["']/i); if (!relMatch) continue; const rel = relMatch[1].toLowerCase(); const isApple = rel.includes('apple-touch-icon'); if (!isApple && !/\b(shortcut\s+icon|icon)\b/.test(rel)) continue; const hrefMatch = tag.match(/\bhref\s*=\s*["']([^"']+)["']/i); if (!hrefMatch) continue; const raw = hrefMatch[1].trim(); if (!raw || raw.startsWith('data:')) continue; let href: string; try { href = new URL(raw, baseUrl).toString(); } catch { continue; } let size = 0; const sizesMatch = tag.match(/\bsizes\s*=\s*["']([^"']+)["']/i); if (sizesMatch) { const sm = sizesMatch[1].match(/(\d+)\s*x\s*\d+/i); if (sm) size = Number(sm[1]); } if (!size && isApple) size = 180; icons.push({ href, size, isApple }); } return icons; } // Holt Icon-Kandidaten per HTML-Parse. 32–192 px bevorzugt (für 24×24-Darstellung // ist das sharp genug, ohne SVG-Wahnsinn); alles außerhalb landet am Ende. async function resolveIconsFromHtml(domain: string): Promise { try { const baseUrl = `https://${domain}/`; const html = await fetchText(baseUrl, { timeoutMs: 3_500, maxBytes: 256 * 1024, allowTruncate: true }); const icons = extractIconLinks(html, baseUrl); if (icons.length === 0) return []; const sweet = (s: number) => s >= 32 && s <= 192; icons.sort((a, b) => { if (sweet(a.size) && !sweet(b.size)) return -1; if (!sweet(a.size) && sweet(b.size)) return 1; return b.size - a.size; }); return icons.map((i) => i.href); } catch { return []; } } async function fetchFaviconBytes( domain: string ): Promise<{ data: Uint8Array; contentType: string | null } | null> { // 1. Aus der Homepage die -Kandidaten ziehen — das // ist normalerweise das "echte" Site-Icon, nicht der Hoster-Default. const htmlIcons = await resolveIconsFromHtml(domain); for (const url of htmlIcons) { const got = await tryFetch(url); if (got) return got; } // 2. Klassiker: /favicon.ico. Viele ältere Seiten haben nur den. const direct = await tryFetch(`https://${domain}/favicon.ico`); if (direct) return direct; // 3. Fallback: Google-Favicon-Service. Liefert praktisch immer etwas. return tryFetch(`https://www.google.com/s2/favicons?sz=64&domain=${encodeURIComponent(domain)}`); } async function persist( data: Uint8Array, contentType: string | null, imageDir: string ): Promise { const hash = createHash('sha256').update(data).digest('hex'); const ext = extensionFor(contentType); const filename = `favicon-${hash}${ext}`; const target = join(imageDir, filename); if (!existsSync(target)) { await mkdir(imageDir, { recursive: true }); await writeFile(target, data); } return filename; } export async function fetchAndStoreFavicon( domain: string, imageDir: string ): Promise { const result = await fetchFaviconBytes(domain); if (!result) return null; try { return await persist(result.data, result.contentType, imageDir); } catch { return null; } } // Lädt Favicons für alle Whitelist-Domains, bei denen noch keines gespeichert // ist. Parallel mit Limit 8. Bleibt bewusst sync vom Aufrufer aus gesehen, // damit der erste GET /api/domains eine vollständige Liste zurückgibt. // Beim zweiten Request ist nichts mehr zu tun. export async function ensureFavicons( db: Database.Database, imageDir: string ): Promise { const domains = listDomains(db).filter((d) => !d.favicon_path); if (domains.length === 0) return; const queue = [...domains]; const LIMIT = 8; const workers = Array.from({ length: Math.min(LIMIT, queue.length) }, async () => { while (queue.length > 0) { const d = queue.shift(); if (!d) break; const path = await fetchAndStoreFavicon(d.domain, imageDir); if (path) setDomainFavicon(db, d.id, path); } }); await Promise.all(workers); }