2026-04-18 08:17:44 +02:00
|
|
|
|
import type Database from 'better-sqlite3';
|
|
|
|
|
|
import { createHash } from 'node:crypto';
|
|
|
|
|
|
import { existsSync } from 'node:fs';
|
|
|
|
|
|
import { mkdir, writeFile } from 'node:fs/promises';
|
|
|
|
|
|
import { join } from 'node:path';
|
2026-04-18 14:34:17 +02:00
|
|
|
|
import { fetchBuffer, fetchText } from '../http';
|
2026-04-18 08:17:44 +02:00
|
|
|
|
import { listDomains, setDomainFavicon } from './repository';
|
|
|
|
|
|
|
|
|
|
|
|
const EXT_BY_CONTENT_TYPE: Record<string, string> = {
|
|
|
|
|
|
'image/png': '.png',
|
|
|
|
|
|
'image/jpeg': '.jpg',
|
|
|
|
|
|
'image/jpg': '.jpg',
|
|
|
|
|
|
'image/webp': '.webp',
|
|
|
|
|
|
'image/gif': '.gif',
|
|
|
|
|
|
'image/svg+xml': '.svg',
|
|
|
|
|
|
'image/x-icon': '.ico',
|
|
|
|
|
|
'image/vnd.microsoft.icon': '.ico'
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
function extensionFor(contentType: string | null): string {
|
|
|
|
|
|
if (!contentType) return '.ico';
|
|
|
|
|
|
const base = contentType.split(';')[0].trim().toLowerCase();
|
|
|
|
|
|
return EXT_BY_CONTENT_TYPE[base] ?? '.ico';
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
async function tryFetch(url: string): Promise<{ data: Uint8Array; contentType: string | null } | null> {
|
|
|
|
|
|
try {
|
|
|
|
|
|
const res = await fetchBuffer(url, { timeoutMs: 3_000, maxBytes: 256 * 1024 });
|
|
|
|
|
|
if (res.data.byteLength === 0) return null;
|
|
|
|
|
|
return res;
|
|
|
|
|
|
} catch {
|
|
|
|
|
|
return null;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-04-18 14:34:17 +02:00
|
|
|
|
// Parst <link rel="…icon">-Tags aus dem <head>. WordPress-Seiten liefern
|
|
|
|
|
|
// oft ein generisches /favicon.ico (Zahnrad-Default vom Hoster oder Plugin),
|
|
|
|
|
|
// während das eigentliche Site-Icon per <link rel="icon"> eingebunden ist.
|
|
|
|
|
|
// Darum zuerst den Head durchsehen, nicht blind /favicon.ico nehmen.
|
|
|
|
|
|
type IconLink = { href: string; size: number; isApple: boolean };
|
|
|
|
|
|
|
|
|
|
|
|
function extractIconLinks(html: string, baseUrl: string): IconLink[] {
|
|
|
|
|
|
const head = html.slice(0, 300_000);
|
|
|
|
|
|
const icons: IconLink[] = [];
|
|
|
|
|
|
const linkRe = /<link\b[^>]*>/gi;
|
|
|
|
|
|
for (const m of head.matchAll(linkRe)) {
|
|
|
|
|
|
const tag = m[0];
|
|
|
|
|
|
const relMatch = tag.match(/\brel\s*=\s*["']([^"']+)["']/i);
|
|
|
|
|
|
if (!relMatch) continue;
|
|
|
|
|
|
const rel = relMatch[1].toLowerCase();
|
|
|
|
|
|
const isApple = rel.includes('apple-touch-icon');
|
|
|
|
|
|
if (!isApple && !/\b(shortcut\s+icon|icon)\b/.test(rel)) continue;
|
|
|
|
|
|
const hrefMatch = tag.match(/\bhref\s*=\s*["']([^"']+)["']/i);
|
|
|
|
|
|
if (!hrefMatch) continue;
|
|
|
|
|
|
const raw = hrefMatch[1].trim();
|
|
|
|
|
|
if (!raw || raw.startsWith('data:')) continue;
|
|
|
|
|
|
let href: string;
|
|
|
|
|
|
try {
|
|
|
|
|
|
href = new URL(raw, baseUrl).toString();
|
|
|
|
|
|
} catch {
|
|
|
|
|
|
continue;
|
|
|
|
|
|
}
|
|
|
|
|
|
let size = 0;
|
|
|
|
|
|
const sizesMatch = tag.match(/\bsizes\s*=\s*["']([^"']+)["']/i);
|
|
|
|
|
|
if (sizesMatch) {
|
|
|
|
|
|
const sm = sizesMatch[1].match(/(\d+)\s*x\s*\d+/i);
|
|
|
|
|
|
if (sm) size = Number(sm[1]);
|
|
|
|
|
|
}
|
|
|
|
|
|
if (!size && isApple) size = 180;
|
|
|
|
|
|
icons.push({ href, size, isApple });
|
|
|
|
|
|
}
|
|
|
|
|
|
return icons;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Holt Icon-Kandidaten per HTML-Parse. 32–192 px bevorzugt (für 24×24-Darstellung
|
|
|
|
|
|
// ist das sharp genug, ohne SVG-Wahnsinn); alles außerhalb landet am Ende.
|
|
|
|
|
|
async function resolveIconsFromHtml(domain: string): Promise<string[]> {
|
|
|
|
|
|
try {
|
|
|
|
|
|
const baseUrl = `https://${domain}/`;
|
|
|
|
|
|
const html = await fetchText(baseUrl, {
|
|
|
|
|
|
timeoutMs: 3_500,
|
|
|
|
|
|
maxBytes: 256 * 1024,
|
|
|
|
|
|
allowTruncate: true
|
|
|
|
|
|
});
|
|
|
|
|
|
const icons = extractIconLinks(html, baseUrl);
|
|
|
|
|
|
if (icons.length === 0) return [];
|
|
|
|
|
|
const sweet = (s: number) => s >= 32 && s <= 192;
|
|
|
|
|
|
icons.sort((a, b) => {
|
|
|
|
|
|
if (sweet(a.size) && !sweet(b.size)) return -1;
|
|
|
|
|
|
if (!sweet(a.size) && sweet(b.size)) return 1;
|
|
|
|
|
|
return b.size - a.size;
|
|
|
|
|
|
});
|
|
|
|
|
|
return icons.map((i) => i.href);
|
|
|
|
|
|
} catch {
|
|
|
|
|
|
return [];
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-04-18 08:17:44 +02:00
|
|
|
|
async function fetchFaviconBytes(
|
|
|
|
|
|
domain: string
|
|
|
|
|
|
): Promise<{ data: Uint8Array; contentType: string | null } | null> {
|
2026-04-18 14:34:17 +02:00
|
|
|
|
// 1. Aus der Homepage die <link rel="icon">-Kandidaten ziehen — das
|
|
|
|
|
|
// ist normalerweise das "echte" Site-Icon, nicht der Hoster-Default.
|
|
|
|
|
|
const htmlIcons = await resolveIconsFromHtml(domain);
|
|
|
|
|
|
for (const url of htmlIcons) {
|
|
|
|
|
|
const got = await tryFetch(url);
|
|
|
|
|
|
if (got) return got;
|
|
|
|
|
|
}
|
|
|
|
|
|
// 2. Klassiker: /favicon.ico. Viele ältere Seiten haben nur den.
|
2026-04-18 08:17:44 +02:00
|
|
|
|
const direct = await tryFetch(`https://${domain}/favicon.ico`);
|
|
|
|
|
|
if (direct) return direct;
|
2026-04-18 14:34:17 +02:00
|
|
|
|
// 3. Fallback: Google-Favicon-Service. Liefert praktisch immer etwas.
|
2026-04-18 08:17:44 +02:00
|
|
|
|
return tryFetch(`https://www.google.com/s2/favicons?sz=64&domain=${encodeURIComponent(domain)}`);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
async function persist(
|
|
|
|
|
|
data: Uint8Array,
|
|
|
|
|
|
contentType: string | null,
|
|
|
|
|
|
imageDir: string
|
|
|
|
|
|
): Promise<string> {
|
|
|
|
|
|
const hash = createHash('sha256').update(data).digest('hex');
|
|
|
|
|
|
const ext = extensionFor(contentType);
|
|
|
|
|
|
const filename = `favicon-${hash}${ext}`;
|
|
|
|
|
|
const target = join(imageDir, filename);
|
|
|
|
|
|
if (!existsSync(target)) {
|
|
|
|
|
|
await mkdir(imageDir, { recursive: true });
|
|
|
|
|
|
await writeFile(target, data);
|
|
|
|
|
|
}
|
|
|
|
|
|
return filename;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
export async function fetchAndStoreFavicon(
|
|
|
|
|
|
domain: string,
|
|
|
|
|
|
imageDir: string
|
|
|
|
|
|
): Promise<string | null> {
|
|
|
|
|
|
const result = await fetchFaviconBytes(domain);
|
|
|
|
|
|
if (!result) return null;
|
|
|
|
|
|
try {
|
|
|
|
|
|
return await persist(result.data, result.contentType, imageDir);
|
|
|
|
|
|
} catch {
|
|
|
|
|
|
return null;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Lädt Favicons für alle Whitelist-Domains, bei denen noch keines gespeichert
|
|
|
|
|
|
// ist. Parallel mit Limit 8. Bleibt bewusst sync vom Aufrufer aus gesehen,
|
|
|
|
|
|
// damit der erste GET /api/domains eine vollständige Liste zurückgibt.
|
|
|
|
|
|
// Beim zweiten Request ist nichts mehr zu tun.
|
|
|
|
|
|
export async function ensureFavicons(
|
|
|
|
|
|
db: Database.Database,
|
|
|
|
|
|
imageDir: string
|
|
|
|
|
|
): Promise<void> {
|
|
|
|
|
|
const domains = listDomains(db).filter((d) => !d.favicon_path);
|
|
|
|
|
|
if (domains.length === 0) return;
|
|
|
|
|
|
const queue = [...domains];
|
|
|
|
|
|
const LIMIT = 8;
|
|
|
|
|
|
const workers = Array.from({ length: Math.min(LIMIT, queue.length) }, async () => {
|
|
|
|
|
|
while (queue.length > 0) {
|
|
|
|
|
|
const d = queue.shift();
|
|
|
|
|
|
if (!d) break;
|
|
|
|
|
|
const path = await fetchAndStoreFavicon(d.domain, imageDir);
|
|
|
|
|
|
if (path) setDomainFavicon(db, d.id, path);
|
|
|
|
|
|
}
|
|
|
|
|
|
});
|
|
|
|
|
|
await Promise.all(workers);
|
|
|
|
|
|
}
|