feat(search): Enter bleibt auf Seite + robustere Thumbnail-Erkennung
All checks were successful
Build & Publish Docker Image / build-and-push (push) Successful in 55s
All checks were successful
Build & Publish Docker Image / build-and-push (push) Successful in 55s
Startseite:
- Enter/Return löst die Suche jetzt sofort aus (cancelt den Debounce),
navigiert aber NICHT mehr auf /search. Der Anwender bleibt auf der
gleichen Seite mit Inline-Ergebnissen.
Thumbnail-Enrichment (searxng.ts):
- Regex-basierte og:image-Extraktion durch linkedom-parseHTML ersetzt.
- Neue Fallback-Kette (in dieser Reihenfolge):
1. <meta property/name = og:image | og:image:url | og:image:secure_url
| twitter:image | twitter:image:src>
2. <link rel="image_src" href="...">
3. JSON-LD image (auch tief in @graph; "image" als String, Array,
Objekt-mit-url)
4. Erstes <img> in article/main/.entry-content/.post-content/figure
- Relative URLs werden gegen die Seiten-URL zu absoluten aufgelöst
(z.B. /uploads/foo.jpg → http://host/uploads/foo.jpg).
- maxBytes von 256 KB auf 512 KB angehoben, damit JSON-LD-lastige
Recipe-Seiten nicht mitten im Script abgeschnitten werden.
Tests (97/97):
- Neu: JSON-LD-Image-Fallback-Test.
- Neu: Content-<img>-Fallback-Test mit relativer URL, die zur
absoluten aufgelöst wird.
This commit is contained in:
@@ -1,4 +1,5 @@
|
||||
import type Database from 'better-sqlite3';
|
||||
import { parseHTML } from 'linkedom';
|
||||
import { listDomains, normalizeDomain } from '../domains/repository';
|
||||
import { fetchText } from '../http';
|
||||
|
||||
@@ -77,18 +78,102 @@ function looksLikeRecipePage(url: string): boolean {
|
||||
}
|
||||
}
|
||||
|
||||
const OG_IMAGE_RE =
|
||||
/<meta[^>]+(?:property|name)=["']og:image(?::url)?["'][^>]+content=["']([^"']+)["']/i;
|
||||
const OG_IMAGE_RE_REV =
|
||||
/<meta[^>]+content=["']([^"']+)["'][^>]+(?:property|name)=["']og:image(?::url)?["']/i;
|
||||
const TWITTER_IMAGE_RE =
|
||||
/<meta[^>]+(?:property|name)=["']twitter:image["'][^>]+content=["']([^"']+)["']/i;
|
||||
|
||||
function extractOgImage(html: string): string | null {
|
||||
const m = OG_IMAGE_RE.exec(html) ?? OG_IMAGE_RE_REV.exec(html) ?? TWITTER_IMAGE_RE.exec(html);
|
||||
if (!m) return null;
|
||||
function resolveUrl(href: string, baseUrl: string): string | null {
|
||||
try {
|
||||
return new URL(m[1]).toString();
|
||||
return new URL(href, baseUrl).toString();
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
function imageFromJsonLd(data: unknown): string | null {
|
||||
if (!data) return null;
|
||||
if (Array.isArray(data)) {
|
||||
for (const d of data) {
|
||||
const img = imageFromJsonLd(d);
|
||||
if (img) return img;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
if (typeof data !== 'object') return null;
|
||||
const node = data as Record<string, unknown>;
|
||||
if (Array.isArray(node['@graph'])) {
|
||||
for (const d of node['@graph']) {
|
||||
const img = imageFromJsonLd(d);
|
||||
if (img) return img;
|
||||
}
|
||||
}
|
||||
const image = node.image;
|
||||
if (typeof image === 'string') return image;
|
||||
if (Array.isArray(image) && image.length > 0) {
|
||||
const first = image[0];
|
||||
if (typeof first === 'string') return first;
|
||||
if (first && typeof first === 'object' && 'url' in first) {
|
||||
const url = (first as Record<string, unknown>).url;
|
||||
if (typeof url === 'string') return url;
|
||||
}
|
||||
}
|
||||
if (image && typeof image === 'object' && 'url' in image) {
|
||||
const url = (image as Record<string, unknown>).url;
|
||||
if (typeof url === 'string') return url;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
const META_IMAGE_KEYS = new Set([
|
||||
'og:image',
|
||||
'og:image:url',
|
||||
'og:image:secure_url',
|
||||
'twitter:image',
|
||||
'twitter:image:src'
|
||||
]);
|
||||
|
||||
function extractPageImage(html: string, baseUrl: string): string | null {
|
||||
try {
|
||||
const { document } = parseHTML(html);
|
||||
// 1. OpenGraph / Twitter meta tags
|
||||
for (const m of Array.from(document.querySelectorAll('meta'))) {
|
||||
const key = (m.getAttribute('property') ?? m.getAttribute('name') ?? '').toLowerCase();
|
||||
if (!META_IMAGE_KEYS.has(key)) continue;
|
||||
const content = m.getAttribute('content');
|
||||
if (!content) continue;
|
||||
const resolved = resolveUrl(content, baseUrl);
|
||||
if (resolved) return resolved;
|
||||
}
|
||||
// 2. <link rel="image_src">
|
||||
const link = document.querySelector('link[rel="image_src"]');
|
||||
if (link) {
|
||||
const href = link.getAttribute('href');
|
||||
if (href) {
|
||||
const resolved = resolveUrl(href, baseUrl);
|
||||
if (resolved) return resolved;
|
||||
}
|
||||
}
|
||||
// 3. JSON-LD image (Recipe schema etc.)
|
||||
for (const s of Array.from(document.querySelectorAll('script[type="application/ld+json"]'))) {
|
||||
try {
|
||||
const data = JSON.parse(s.textContent ?? '');
|
||||
const img = imageFromJsonLd(data);
|
||||
if (img) {
|
||||
const resolved = resolveUrl(img, baseUrl);
|
||||
if (resolved) return resolved;
|
||||
}
|
||||
} catch {
|
||||
// malformed JSON-LD — skip
|
||||
}
|
||||
}
|
||||
// 4. First content image in article/main
|
||||
const contentImg = document.querySelector(
|
||||
'article img[src], main img[src], .entry-content img[src], .post-content img[src], figure img[src]'
|
||||
);
|
||||
if (contentImg) {
|
||||
const src = contentImg.getAttribute('src') ?? contentImg.getAttribute('data-src');
|
||||
if (src) {
|
||||
const resolved = resolveUrl(src, baseUrl);
|
||||
if (resolved) return resolved;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
@@ -104,8 +189,8 @@ async function enrichThumbnail(url: string): Promise<string | null> {
|
||||
if (cached && cached.expires > now) return cached.image;
|
||||
let image: string | null = null;
|
||||
try {
|
||||
const html = await fetchText(url, { timeoutMs: 4_000, maxBytes: 256 * 1024 });
|
||||
image = extractOgImage(html);
|
||||
const html = await fetchText(url, { timeoutMs: 4_000, maxBytes: 512 * 1024 });
|
||||
image = extractPageImage(html, url);
|
||||
} catch {
|
||||
image = null;
|
||||
}
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
<script lang="ts">
|
||||
import { onMount } from 'svelte';
|
||||
import { goto } from '$app/navigation';
|
||||
import type { SearchHit } from '$lib/server/recipes/search-local';
|
||||
import type { WebHit } from '$lib/server/search/searxng';
|
||||
import { randomQuote } from '$lib/quotes';
|
||||
@@ -24,6 +23,34 @@
|
||||
|
||||
let debounceTimer: ReturnType<typeof setTimeout> | null = null;
|
||||
|
||||
async function runSearch(q: string) {
|
||||
try {
|
||||
const res = await fetch(`/api/recipes/search?q=${encodeURIComponent(q)}`);
|
||||
const body = await res.json();
|
||||
if (query.trim() !== q) return;
|
||||
hits = body.hits;
|
||||
searchedFor = q;
|
||||
if (body.hits.length === 0) {
|
||||
webSearching = true;
|
||||
try {
|
||||
const wres = await fetch(`/api/recipes/search/web?q=${encodeURIComponent(q)}`);
|
||||
if (query.trim() !== q) return;
|
||||
if (!wres.ok) {
|
||||
const err = await wres.json().catch(() => ({}));
|
||||
webError = err.message ?? `HTTP ${wres.status}`;
|
||||
} else {
|
||||
const wbody = await wres.json();
|
||||
webHits = wbody.hits;
|
||||
}
|
||||
} finally {
|
||||
if (query.trim() === q) webSearching = false;
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
if (query.trim() === q) searching = false;
|
||||
}
|
||||
}
|
||||
|
||||
$effect(() => {
|
||||
const q = query.trim();
|
||||
if (debounceTimer) clearTimeout(debounceTimer);
|
||||
@@ -40,40 +67,18 @@
|
||||
webHits = [];
|
||||
webSearching = false;
|
||||
webError = null;
|
||||
debounceTimer = setTimeout(async () => {
|
||||
try {
|
||||
const res = await fetch(`/api/recipes/search?q=${encodeURIComponent(q)}`);
|
||||
const body = await res.json();
|
||||
if (query.trim() !== q) return;
|
||||
hits = body.hits;
|
||||
searchedFor = q;
|
||||
if (body.hits.length === 0) {
|
||||
webSearching = true;
|
||||
try {
|
||||
const wres = await fetch(`/api/recipes/search/web?q=${encodeURIComponent(q)}`);
|
||||
if (query.trim() !== q) return;
|
||||
if (!wres.ok) {
|
||||
const err = await wres.json().catch(() => ({}));
|
||||
webError = err.message ?? `HTTP ${wres.status}`;
|
||||
} else {
|
||||
const wbody = await wres.json();
|
||||
webHits = wbody.hits;
|
||||
}
|
||||
} finally {
|
||||
if (query.trim() === q) webSearching = false;
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
if (query.trim() === q) searching = false;
|
||||
}
|
||||
debounceTimer = setTimeout(() => {
|
||||
void runSearch(q);
|
||||
}, 300);
|
||||
});
|
||||
|
||||
function submit(e: SubmitEvent) {
|
||||
e.preventDefault();
|
||||
const q = query.trim();
|
||||
if (!q) return;
|
||||
void goto(`/search?q=${encodeURIComponent(q)}`);
|
||||
if (q.length <= 3) return;
|
||||
if (debounceTimer) clearTimeout(debounceTimer);
|
||||
searching = true;
|
||||
void runSearch(q);
|
||||
}
|
||||
|
||||
const activeSearch = $derived(query.trim().length > 3);
|
||||
|
||||
@@ -94,6 +94,52 @@ describe('searchWeb', () => {
|
||||
}
|
||||
});
|
||||
|
||||
it('falls back to JSON-LD image when no og:image', async () => {
|
||||
const pageServer = createServer((_req, res) => {
|
||||
res.writeHead(200, { 'content-type': 'text/html; charset=utf-8' });
|
||||
res.end(`<html><head>
|
||||
<script type="application/ld+json">${JSON.stringify({
|
||||
'@type': 'Recipe',
|
||||
name: 'Pie',
|
||||
image: 'https://cdn.example/pie.jpg'
|
||||
})}</script>
|
||||
</head><body></body></html>`);
|
||||
});
|
||||
await new Promise<void>((r) => pageServer.listen(0, '127.0.0.1', r));
|
||||
const addr = pageServer.address() as AddressInfo;
|
||||
const pageUrl = `http://127.0.0.1:${addr.port}/pie`;
|
||||
try {
|
||||
const db = openInMemoryForTest();
|
||||
addDomain(db, '127.0.0.1');
|
||||
respondWith([{ url: pageUrl, title: 'Pie', content: '' }]);
|
||||
const hits = await searchWeb(db, 'pie', { searxngUrl: baseUrl });
|
||||
expect(hits[0].thumbnail).toBe('https://cdn.example/pie.jpg');
|
||||
} finally {
|
||||
await new Promise<void>((r) => pageServer.close(() => r()));
|
||||
}
|
||||
});
|
||||
|
||||
it('falls back to first content image when no meta/JSON-LD image', async () => {
|
||||
const pageServer = createServer((_req, res) => {
|
||||
res.writeHead(200, { 'content-type': 'text/html; charset=utf-8' });
|
||||
res.end(
|
||||
'<html><body><article><img src="/uploads/dish.jpg" alt=""></article></body></html>'
|
||||
);
|
||||
});
|
||||
await new Promise<void>((r) => pageServer.listen(0, '127.0.0.1', r));
|
||||
const addr = pageServer.address() as AddressInfo;
|
||||
const pageUrl = `http://127.0.0.1:${addr.port}/article`;
|
||||
try {
|
||||
const db = openInMemoryForTest();
|
||||
addDomain(db, '127.0.0.1');
|
||||
respondWith([{ url: pageUrl, title: 'Dish', content: '' }]);
|
||||
const hits = await searchWeb(db, 'dish', { searxngUrl: baseUrl });
|
||||
expect(hits[0].thumbnail).toBe(`http://127.0.0.1:${addr.port}/uploads/dish.jpg`);
|
||||
} finally {
|
||||
await new Promise<void>((r) => pageServer.close(() => r()));
|
||||
}
|
||||
});
|
||||
|
||||
it('leaves existing thumbnails untouched (no enrichment fetch)', async () => {
|
||||
const db = openInMemoryForTest();
|
||||
addDomain(db, 'chefkoch.de');
|
||||
|
||||
Reference in New Issue
Block a user