feat(search): Enter bleibt auf Seite + robustere Thumbnail-Erkennung
All checks were successful
Build & Publish Docker Image / build-and-push (push) Successful in 55s

Startseite:
- Enter/Return löst die Suche jetzt sofort aus (cancelt den Debounce),
  navigiert aber NICHT mehr auf /search. Der Anwender bleibt auf der
  gleichen Seite mit Inline-Ergebnissen.

Thumbnail-Enrichment (searxng.ts):
- Regex-basierte og:image-Extraktion durch linkedom-parseHTML ersetzt.
- Neue Fallback-Kette (in dieser Reihenfolge):
    1. <meta property/name = og:image | og:image:url | og:image:secure_url
                           | twitter:image | twitter:image:src>
    2. <link rel="image_src" href="...">
    3. JSON-LD image (auch tief in @graph; "image" als String, Array,
       Objekt-mit-url)
    4. Erstes <img> in article/main/.entry-content/.post-content/figure
- Relative URLs werden gegen die Seiten-URL zu absoluten aufgelöst
  (z.B. /uploads/foo.jpg → http://host/uploads/foo.jpg).
- maxBytes von 256 KB auf 512 KB angehoben, damit JSON-LD-lastige
  Recipe-Seiten nicht mitten im Script abgeschnitten werden.

Tests (97/97):
- Neu: JSON-LD-Image-Fallback-Test.
- Neu: Content-<img>-Fallback-Test mit relativer URL, die zur
  absoluten aufgelöst wird.
This commit is contained in:
hsiegeln
2026-04-17 18:04:59 +02:00
parent 9bc4465061
commit 211d58ebec
3 changed files with 178 additions and 42 deletions

View File

@@ -1,4 +1,5 @@
import type Database from 'better-sqlite3';
import { parseHTML } from 'linkedom';
import { listDomains, normalizeDomain } from '../domains/repository';
import { fetchText } from '../http';
@@ -77,18 +78,102 @@ function looksLikeRecipePage(url: string): boolean {
}
}
const OG_IMAGE_RE =
/<meta[^>]+(?:property|name)=["']og:image(?::url)?["'][^>]+content=["']([^"']+)["']/i;
const OG_IMAGE_RE_REV =
/<meta[^>]+content=["']([^"']+)["'][^>]+(?:property|name)=["']og:image(?::url)?["']/i;
const TWITTER_IMAGE_RE =
/<meta[^>]+(?:property|name)=["']twitter:image["'][^>]+content=["']([^"']+)["']/i;
function extractOgImage(html: string): string | null {
const m = OG_IMAGE_RE.exec(html) ?? OG_IMAGE_RE_REV.exec(html) ?? TWITTER_IMAGE_RE.exec(html);
if (!m) return null;
function resolveUrl(href: string, baseUrl: string): string | null {
try {
return new URL(m[1]).toString();
return new URL(href, baseUrl).toString();
} catch {
return null;
}
}
function imageFromJsonLd(data: unknown): string | null {
if (!data) return null;
if (Array.isArray(data)) {
for (const d of data) {
const img = imageFromJsonLd(d);
if (img) return img;
}
return null;
}
if (typeof data !== 'object') return null;
const node = data as Record<string, unknown>;
if (Array.isArray(node['@graph'])) {
for (const d of node['@graph']) {
const img = imageFromJsonLd(d);
if (img) return img;
}
}
const image = node.image;
if (typeof image === 'string') return image;
if (Array.isArray(image) && image.length > 0) {
const first = image[0];
if (typeof first === 'string') return first;
if (first && typeof first === 'object' && 'url' in first) {
const url = (first as Record<string, unknown>).url;
if (typeof url === 'string') return url;
}
}
if (image && typeof image === 'object' && 'url' in image) {
const url = (image as Record<string, unknown>).url;
if (typeof url === 'string') return url;
}
return null;
}
const META_IMAGE_KEYS = new Set([
'og:image',
'og:image:url',
'og:image:secure_url',
'twitter:image',
'twitter:image:src'
]);
function extractPageImage(html: string, baseUrl: string): string | null {
try {
const { document } = parseHTML(html);
// 1. OpenGraph / Twitter meta tags
for (const m of Array.from(document.querySelectorAll('meta'))) {
const key = (m.getAttribute('property') ?? m.getAttribute('name') ?? '').toLowerCase();
if (!META_IMAGE_KEYS.has(key)) continue;
const content = m.getAttribute('content');
if (!content) continue;
const resolved = resolveUrl(content, baseUrl);
if (resolved) return resolved;
}
// 2. <link rel="image_src">
const link = document.querySelector('link[rel="image_src"]');
if (link) {
const href = link.getAttribute('href');
if (href) {
const resolved = resolveUrl(href, baseUrl);
if (resolved) return resolved;
}
}
// 3. JSON-LD image (Recipe schema etc.)
for (const s of Array.from(document.querySelectorAll('script[type="application/ld+json"]'))) {
try {
const data = JSON.parse(s.textContent ?? '');
const img = imageFromJsonLd(data);
if (img) {
const resolved = resolveUrl(img, baseUrl);
if (resolved) return resolved;
}
} catch {
// malformed JSON-LD — skip
}
}
// 4. First content image in article/main
const contentImg = document.querySelector(
'article img[src], main img[src], .entry-content img[src], .post-content img[src], figure img[src]'
);
if (contentImg) {
const src = contentImg.getAttribute('src') ?? contentImg.getAttribute('data-src');
if (src) {
const resolved = resolveUrl(src, baseUrl);
if (resolved) return resolved;
}
}
return null;
} catch {
return null;
}
@@ -104,8 +189,8 @@ async function enrichThumbnail(url: string): Promise<string | null> {
if (cached && cached.expires > now) return cached.image;
let image: string | null = null;
try {
const html = await fetchText(url, { timeoutMs: 4_000, maxBytes: 256 * 1024 });
image = extractOgImage(html);
const html = await fetchText(url, { timeoutMs: 4_000, maxBytes: 512 * 1024 });
image = extractPageImage(html, url);
} catch {
image = null;
}

View File

@@ -1,6 +1,5 @@
<script lang="ts">
import { onMount } from 'svelte';
import { goto } from '$app/navigation';
import type { SearchHit } from '$lib/server/recipes/search-local';
import type { WebHit } from '$lib/server/search/searxng';
import { randomQuote } from '$lib/quotes';
@@ -24,6 +23,34 @@
let debounceTimer: ReturnType<typeof setTimeout> | null = null;
async function runSearch(q: string) {
try {
const res = await fetch(`/api/recipes/search?q=${encodeURIComponent(q)}`);
const body = await res.json();
if (query.trim() !== q) return;
hits = body.hits;
searchedFor = q;
if (body.hits.length === 0) {
webSearching = true;
try {
const wres = await fetch(`/api/recipes/search/web?q=${encodeURIComponent(q)}`);
if (query.trim() !== q) return;
if (!wres.ok) {
const err = await wres.json().catch(() => ({}));
webError = err.message ?? `HTTP ${wres.status}`;
} else {
const wbody = await wres.json();
webHits = wbody.hits;
}
} finally {
if (query.trim() === q) webSearching = false;
}
}
} finally {
if (query.trim() === q) searching = false;
}
}
$effect(() => {
const q = query.trim();
if (debounceTimer) clearTimeout(debounceTimer);
@@ -40,40 +67,18 @@
webHits = [];
webSearching = false;
webError = null;
debounceTimer = setTimeout(async () => {
try {
const res = await fetch(`/api/recipes/search?q=${encodeURIComponent(q)}`);
const body = await res.json();
if (query.trim() !== q) return;
hits = body.hits;
searchedFor = q;
if (body.hits.length === 0) {
webSearching = true;
try {
const wres = await fetch(`/api/recipes/search/web?q=${encodeURIComponent(q)}`);
if (query.trim() !== q) return;
if (!wres.ok) {
const err = await wres.json().catch(() => ({}));
webError = err.message ?? `HTTP ${wres.status}`;
} else {
const wbody = await wres.json();
webHits = wbody.hits;
}
} finally {
if (query.trim() === q) webSearching = false;
}
}
} finally {
if (query.trim() === q) searching = false;
}
debounceTimer = setTimeout(() => {
void runSearch(q);
}, 300);
});
function submit(e: SubmitEvent) {
e.preventDefault();
const q = query.trim();
if (!q) return;
void goto(`/search?q=${encodeURIComponent(q)}`);
if (q.length <= 3) return;
if (debounceTimer) clearTimeout(debounceTimer);
searching = true;
void runSearch(q);
}
const activeSearch = $derived(query.trim().length > 3);

View File

@@ -94,6 +94,52 @@ describe('searchWeb', () => {
}
});
it('falls back to JSON-LD image when no og:image', async () => {
const pageServer = createServer((_req, res) => {
res.writeHead(200, { 'content-type': 'text/html; charset=utf-8' });
res.end(`<html><head>
<script type="application/ld+json">${JSON.stringify({
'@type': 'Recipe',
name: 'Pie',
image: 'https://cdn.example/pie.jpg'
})}</script>
</head><body></body></html>`);
});
await new Promise<void>((r) => pageServer.listen(0, '127.0.0.1', r));
const addr = pageServer.address() as AddressInfo;
const pageUrl = `http://127.0.0.1:${addr.port}/pie`;
try {
const db = openInMemoryForTest();
addDomain(db, '127.0.0.1');
respondWith([{ url: pageUrl, title: 'Pie', content: '' }]);
const hits = await searchWeb(db, 'pie', { searxngUrl: baseUrl });
expect(hits[0].thumbnail).toBe('https://cdn.example/pie.jpg');
} finally {
await new Promise<void>((r) => pageServer.close(() => r()));
}
});
it('falls back to first content image when no meta/JSON-LD image', async () => {
const pageServer = createServer((_req, res) => {
res.writeHead(200, { 'content-type': 'text/html; charset=utf-8' });
res.end(
'<html><body><article><img src="/uploads/dish.jpg" alt=""></article></body></html>'
);
});
await new Promise<void>((r) => pageServer.listen(0, '127.0.0.1', r));
const addr = pageServer.address() as AddressInfo;
const pageUrl = `http://127.0.0.1:${addr.port}/article`;
try {
const db = openInMemoryForTest();
addDomain(db, '127.0.0.1');
respondWith([{ url: pageUrl, title: 'Dish', content: '' }]);
const hits = await searchWeb(db, 'dish', { searxngUrl: baseUrl });
expect(hits[0].thumbnail).toBe(`http://127.0.0.1:${addr.port}/uploads/dish.jpg`);
} finally {
await new Promise<void>((r) => pageServer.close(() => r()));
}
});
it('leaves existing thumbnails untouched (no enrichment fetch)', async () => {
const db = openInMemoryForTest();
addDomain(db, 'chefkoch.de');