diff --git a/src/lib/server/search/searxng.ts b/src/lib/server/search/searxng.ts
index 9ec6a93..dbea01e 100644
--- a/src/lib/server/search/searxng.ts
+++ b/src/lib/server/search/searxng.ts
@@ -1,4 +1,5 @@
import type Database from 'better-sqlite3';
+import { parseHTML } from 'linkedom';
import { listDomains, normalizeDomain } from '../domains/repository';
import { fetchText } from '../http';
@@ -77,18 +78,102 @@ function looksLikeRecipePage(url: string): boolean {
}
}
-const OG_IMAGE_RE =
- /]+(?:property|name)=["']og:image(?::url)?["'][^>]+content=["']([^"']+)["']/i;
-const OG_IMAGE_RE_REV =
- /]+content=["']([^"']+)["'][^>]+(?:property|name)=["']og:image(?::url)?["']/i;
-const TWITTER_IMAGE_RE =
- /]+(?:property|name)=["']twitter:image["'][^>]+content=["']([^"']+)["']/i;
-
-function extractOgImage(html: string): string | null {
- const m = OG_IMAGE_RE.exec(html) ?? OG_IMAGE_RE_REV.exec(html) ?? TWITTER_IMAGE_RE.exec(html);
- if (!m) return null;
+function resolveUrl(href: string, baseUrl: string): string | null {
try {
- return new URL(m[1]).toString();
+ return new URL(href, baseUrl).toString();
+ } catch {
+ return null;
+ }
+}
+
+function imageFromJsonLd(data: unknown): string | null {
+ if (!data) return null;
+ if (Array.isArray(data)) {
+ for (const d of data) {
+ const img = imageFromJsonLd(d);
+ if (img) return img;
+ }
+ return null;
+ }
+ if (typeof data !== 'object') return null;
+ const node = data as Record;
+ if (Array.isArray(node['@graph'])) {
+ for (const d of node['@graph']) {
+ const img = imageFromJsonLd(d);
+ if (img) return img;
+ }
+ }
+ const image = node.image;
+ if (typeof image === 'string') return image;
+ if (Array.isArray(image) && image.length > 0) {
+ const first = image[0];
+ if (typeof first === 'string') return first;
+ if (first && typeof first === 'object' && 'url' in first) {
+ const url = (first as Record).url;
+ if (typeof url === 'string') return url;
+ }
+ }
+ if (image && typeof image === 'object' && 'url' in image) {
+ const url = (image as Record).url;
+ if (typeof url === 'string') return url;
+ }
+ return null;
+}
+
+const META_IMAGE_KEYS = new Set([
+ 'og:image',
+ 'og:image:url',
+ 'og:image:secure_url',
+ 'twitter:image',
+ 'twitter:image:src'
+]);
+
+function extractPageImage(html: string, baseUrl: string): string | null {
+ try {
+ const { document } = parseHTML(html);
+ // 1. OpenGraph / Twitter meta tags
+ for (const m of Array.from(document.querySelectorAll('meta'))) {
+ const key = (m.getAttribute('property') ?? m.getAttribute('name') ?? '').toLowerCase();
+ if (!META_IMAGE_KEYS.has(key)) continue;
+ const content = m.getAttribute('content');
+ if (!content) continue;
+ const resolved = resolveUrl(content, baseUrl);
+ if (resolved) return resolved;
+ }
+ // 2.
+ const link = document.querySelector('link[rel="image_src"]');
+ if (link) {
+ const href = link.getAttribute('href');
+ if (href) {
+ const resolved = resolveUrl(href, baseUrl);
+ if (resolved) return resolved;
+ }
+ }
+ // 3. JSON-LD image (Recipe schema etc.)
+ for (const s of Array.from(document.querySelectorAll('script[type="application/ld+json"]'))) {
+ try {
+ const data = JSON.parse(s.textContent ?? '');
+ const img = imageFromJsonLd(data);
+ if (img) {
+ const resolved = resolveUrl(img, baseUrl);
+ if (resolved) return resolved;
+ }
+ } catch {
+ // malformed JSON-LD — skip
+ }
+ }
+ // 4. First content image in article/main
+ const contentImg = document.querySelector(
+ 'article img[src], main img[src], .entry-content img[src], .post-content img[src], figure img[src]'
+ );
+ if (contentImg) {
+ const src = contentImg.getAttribute('src') ?? contentImg.getAttribute('data-src');
+ if (src) {
+ const resolved = resolveUrl(src, baseUrl);
+ if (resolved) return resolved;
+ }
+ }
+ return null;
} catch {
return null;
}
@@ -104,8 +189,8 @@ async function enrichThumbnail(url: string): Promise {
if (cached && cached.expires > now) return cached.image;
let image: string | null = null;
try {
- const html = await fetchText(url, { timeoutMs: 4_000, maxBytes: 256 * 1024 });
- image = extractOgImage(html);
+ const html = await fetchText(url, { timeoutMs: 4_000, maxBytes: 512 * 1024 });
+ image = extractPageImage(html, url);
} catch {
image = null;
}
diff --git a/src/routes/+page.svelte b/src/routes/+page.svelte
index 107c9e5..8f6503c 100644
--- a/src/routes/+page.svelte
+++ b/src/routes/+page.svelte
@@ -1,6 +1,5 @@
+