feat(search): Enter bleibt auf Seite + robustere Thumbnail-Erkennung
All checks were successful
Build & Publish Docker Image / build-and-push (push) Successful in 55s
All checks were successful
Build & Publish Docker Image / build-and-push (push) Successful in 55s
Startseite:
- Enter/Return löst die Suche jetzt sofort aus (cancelt den Debounce),
navigiert aber NICHT mehr auf /search. Der Anwender bleibt auf der
gleichen Seite mit Inline-Ergebnissen.
Thumbnail-Enrichment (searxng.ts):
- Regex-basierte og:image-Extraktion durch linkedom-parseHTML ersetzt.
- Neue Fallback-Kette (in dieser Reihenfolge):
1. <meta property/name = og:image | og:image:url | og:image:secure_url
| twitter:image | twitter:image:src>
2. <link rel="image_src" href="...">
3. JSON-LD image (auch tief in @graph; "image" als String, Array,
Objekt-mit-url)
4. Erstes <img> in article/main/.entry-content/.post-content/figure
- Relative URLs werden gegen die Seiten-URL zu absoluten aufgelöst
(z.B. /uploads/foo.jpg → http://host/uploads/foo.jpg).
- maxBytes von 256 KB auf 512 KB angehoben, damit JSON-LD-lastige
Recipe-Seiten nicht mitten im Script abgeschnitten werden.
Tests (97/97):
- Neu: JSON-LD-Image-Fallback-Test.
- Neu: Content-<img>-Fallback-Test mit relativer URL, die zur
absoluten aufgelöst wird.
This commit is contained in:
@@ -1,4 +1,5 @@
|
||||
import type Database from 'better-sqlite3';
|
||||
import { parseHTML } from 'linkedom';
|
||||
import { listDomains, normalizeDomain } from '../domains/repository';
|
||||
import { fetchText } from '../http';
|
||||
|
||||
@@ -77,18 +78,102 @@ function looksLikeRecipePage(url: string): boolean {
|
||||
}
|
||||
}
|
||||
|
||||
const OG_IMAGE_RE =
|
||||
/<meta[^>]+(?:property|name)=["']og:image(?::url)?["'][^>]+content=["']([^"']+)["']/i;
|
||||
const OG_IMAGE_RE_REV =
|
||||
/<meta[^>]+content=["']([^"']+)["'][^>]+(?:property|name)=["']og:image(?::url)?["']/i;
|
||||
const TWITTER_IMAGE_RE =
|
||||
/<meta[^>]+(?:property|name)=["']twitter:image["'][^>]+content=["']([^"']+)["']/i;
|
||||
|
||||
function extractOgImage(html: string): string | null {
|
||||
const m = OG_IMAGE_RE.exec(html) ?? OG_IMAGE_RE_REV.exec(html) ?? TWITTER_IMAGE_RE.exec(html);
|
||||
if (!m) return null;
|
||||
function resolveUrl(href: string, baseUrl: string): string | null {
|
||||
try {
|
||||
return new URL(m[1]).toString();
|
||||
return new URL(href, baseUrl).toString();
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
function imageFromJsonLd(data: unknown): string | null {
|
||||
if (!data) return null;
|
||||
if (Array.isArray(data)) {
|
||||
for (const d of data) {
|
||||
const img = imageFromJsonLd(d);
|
||||
if (img) return img;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
if (typeof data !== 'object') return null;
|
||||
const node = data as Record<string, unknown>;
|
||||
if (Array.isArray(node['@graph'])) {
|
||||
for (const d of node['@graph']) {
|
||||
const img = imageFromJsonLd(d);
|
||||
if (img) return img;
|
||||
}
|
||||
}
|
||||
const image = node.image;
|
||||
if (typeof image === 'string') return image;
|
||||
if (Array.isArray(image) && image.length > 0) {
|
||||
const first = image[0];
|
||||
if (typeof first === 'string') return first;
|
||||
if (first && typeof first === 'object' && 'url' in first) {
|
||||
const url = (first as Record<string, unknown>).url;
|
||||
if (typeof url === 'string') return url;
|
||||
}
|
||||
}
|
||||
if (image && typeof image === 'object' && 'url' in image) {
|
||||
const url = (image as Record<string, unknown>).url;
|
||||
if (typeof url === 'string') return url;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
const META_IMAGE_KEYS = new Set([
|
||||
'og:image',
|
||||
'og:image:url',
|
||||
'og:image:secure_url',
|
||||
'twitter:image',
|
||||
'twitter:image:src'
|
||||
]);
|
||||
|
||||
function extractPageImage(html: string, baseUrl: string): string | null {
|
||||
try {
|
||||
const { document } = parseHTML(html);
|
||||
// 1. OpenGraph / Twitter meta tags
|
||||
for (const m of Array.from(document.querySelectorAll('meta'))) {
|
||||
const key = (m.getAttribute('property') ?? m.getAttribute('name') ?? '').toLowerCase();
|
||||
if (!META_IMAGE_KEYS.has(key)) continue;
|
||||
const content = m.getAttribute('content');
|
||||
if (!content) continue;
|
||||
const resolved = resolveUrl(content, baseUrl);
|
||||
if (resolved) return resolved;
|
||||
}
|
||||
// 2. <link rel="image_src">
|
||||
const link = document.querySelector('link[rel="image_src"]');
|
||||
if (link) {
|
||||
const href = link.getAttribute('href');
|
||||
if (href) {
|
||||
const resolved = resolveUrl(href, baseUrl);
|
||||
if (resolved) return resolved;
|
||||
}
|
||||
}
|
||||
// 3. JSON-LD image (Recipe schema etc.)
|
||||
for (const s of Array.from(document.querySelectorAll('script[type="application/ld+json"]'))) {
|
||||
try {
|
||||
const data = JSON.parse(s.textContent ?? '');
|
||||
const img = imageFromJsonLd(data);
|
||||
if (img) {
|
||||
const resolved = resolveUrl(img, baseUrl);
|
||||
if (resolved) return resolved;
|
||||
}
|
||||
} catch {
|
||||
// malformed JSON-LD — skip
|
||||
}
|
||||
}
|
||||
// 4. First content image in article/main
|
||||
const contentImg = document.querySelector(
|
||||
'article img[src], main img[src], .entry-content img[src], .post-content img[src], figure img[src]'
|
||||
);
|
||||
if (contentImg) {
|
||||
const src = contentImg.getAttribute('src') ?? contentImg.getAttribute('data-src');
|
||||
if (src) {
|
||||
const resolved = resolveUrl(src, baseUrl);
|
||||
if (resolved) return resolved;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
@@ -104,8 +189,8 @@ async function enrichThumbnail(url: string): Promise<string | null> {
|
||||
if (cached && cached.expires > now) return cached.image;
|
||||
let image: string | null = null;
|
||||
try {
|
||||
const html = await fetchText(url, { timeoutMs: 4_000, maxBytes: 256 * 1024 });
|
||||
image = extractOgImage(html);
|
||||
const html = await fetchText(url, { timeoutMs: 4_000, maxBytes: 512 * 1024 });
|
||||
image = extractPageImage(html, url);
|
||||
} catch {
|
||||
image = null;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user