From 211d58ebecd56032c4057346d389725576869cb6 Mon Sep 17 00:00:00 2001
From: hsiegeln <37154749+hsiegeln@users.noreply.github.com>
Date: Fri, 17 Apr 2026 18:04:59 +0200
Subject: [PATCH] feat(search): Enter bleibt auf Seite + robustere
 Thumbnail-Erkennung
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Startseite:
- Enter/Return löst die Suche jetzt sofort aus (cancelt den Debounce),
  navigiert aber NICHT mehr auf /search. Der Anwender bleibt auf der
  gleichen Seite mit Inline-Ergebnissen.

Thumbnail-Enrichment (searxng.ts):
- Regex-basierte og:image-Extraktion durch linkedom-parseHTML ersetzt.
- Neue Fallback-Kette (in dieser Reihenfolge):
    1. <meta property/name = og:image | og:image:url | og:image:secure_url
                           | twitter:image | twitter:image:src>
    2. <link rel="image_src" href="...">
    3. JSON-LD image (auch tief in @graph; "image" als String, Array,
       Objekt-mit-url)
    4. Erstes <img> in article/main/.entry-content/.post-content/figure
- Relative URLs werden gegen die Seiten-URL zu absoluten aufgelöst
  (z.B. /uploads/foo.jpg → http://host/uploads/foo.jpg).
- maxBytes von 256 KB auf 512 KB angehoben, damit JSON-LD-lastige
  Recipe-Seiten nicht mitten im Script abgeschnitten werden.

Tests (97/97):
- Neu: JSON-LD-Image-Fallback-Test.
- Neu: Content-<img>-Fallback-Test mit relativer URL, die zur
  absoluten aufgelöst wird.
---
 src/lib/server/search/searxng.ts  | 111 ++++++++++++++++++++++++++----
 src/routes/+page.svelte           |  63 +++++++++--------
 tests/integration/searxng.test.ts |  46 +++++++++++++
 3 files changed, 178 insertions(+), 42 deletions(-)
diff --git a/src/lib/server/search/searxng.ts b/src/lib/server/search/searxng.ts
index 9ec6a93..dbea01e 100644
--- a/src/lib/server/search/searxng.ts
+++ b/src/lib/server/search/searxng.ts
@@ -1,4 +1,5 @@
 import type Database from 'better-sqlite3';
+import { parseHTML } from 'linkedom';
 import { listDomains, normalizeDomain } from '../domains/repository';
 import { fetchText } from '../http';
 
@@ -77,18 +78,102 @@ function looksLikeRecipePage(url: string): boolean {
   }
 }
 
-const OG_IMAGE_RE =
-  /<meta[^>]+(?:property|name)=["']og:image(?::url)?["'][^>]+content=["']([^"']+)["']/i;
-const OG_IMAGE_RE_REV =
-  /<meta[^>]+content=["']([^"']+)["'][^>]+(?:property|name)=["']og:image(?::url)?["']/i;
-const TWITTER_IMAGE_RE =
-  /<meta[^>]+(?:property|name)=["']twitter:image["'][^>]+content=["']([^"']+)["']/i;
-
-function extractOgImage(html: string): string | null {
-  const m = OG_IMAGE_RE.exec(html) ?? OG_IMAGE_RE_REV.exec(html) ?? TWITTER_IMAGE_RE.exec(html);
-  if (!m) return null;
+function resolveUrl(href: string, baseUrl: string): string | null {
   try {
-    return new URL(m[1]).toString();
+    return new URL(href, baseUrl).toString();
+  } catch {
+    return null;
+  }
+}
+
+function imageFromJsonLd(data: unknown): string | null {
+  if (!data) return null;
+  if (Array.isArray(data)) {
+    for (const d of data) {
+      const img = imageFromJsonLd(d);
+      if (img) return img;
+    }
+    return null;
+  }
+  if (typeof data !== 'object') return null;
+  const node = data as Record<string, unknown>;
+  if (Array.isArray(node['@graph'])) {
+    for (const d of node['@graph']) {
+      const img = imageFromJsonLd(d);
+      if (img) return img;
+    }
+  }
+  const image = node.image;
+  if (typeof image === 'string') return image;
+  if (Array.isArray(image) && image.length > 0) {
+    const first = image[0];
+    if (typeof first === 'string') return first;
+    if (first && typeof first === 'object' && 'url' in first) {
+      const url = (first as Record<string, unknown>).url;
+      if (typeof url === 'string') return url;
+    }
+  }
+  if (image && typeof image === 'object' && 'url' in image) {
+    const url = (image as Record<string, unknown>).url;
+    if (typeof url === 'string') return url;
+  }
+  return null;
+}
+
+const META_IMAGE_KEYS = new Set([
+  'og:image',
+  'og:image:url',
+  'og:image:secure_url',
+  'twitter:image',
+  'twitter:image:src'
+]);
+
+function extractPageImage(html: string, baseUrl: string): string | null {
+  try {
+    const { document } = parseHTML(html);
+    // 1. OpenGraph / Twitter meta tags
+    for (const m of Array.from(document.querySelectorAll('meta'))) {
+      const key = (m.getAttribute('property') ?? m.getAttribute('name') ?? '').toLowerCase();
+      if (!META_IMAGE_KEYS.has(key)) continue;
+      const content = m.getAttribute('content');
+      if (!content) continue;
+      const resolved = resolveUrl(content, baseUrl);
+      if (resolved) return resolved;
+    }
+    // 2. <link rel="image_src">
+    const link = document.querySelector('link[rel="image_src"]');
+    if (link) {
+      const href = link.getAttribute('href');
+      if (href) {
+        const resolved = resolveUrl(href, baseUrl);
+        if (resolved) return resolved;
+      }
+    }
+    // 3. JSON-LD image (Recipe schema etc.)
+    for (const s of Array.from(document.querySelectorAll('script[type="application/ld+json"]'))) {
+      try {
+        const data = JSON.parse(s.textContent ?? '');
+        const img = imageFromJsonLd(data);
+        if (img) {
+          const resolved = resolveUrl(img, baseUrl);
+          if (resolved) return resolved;
+        }
+      } catch {
+        // malformed JSON-LD — skip
+      }
+    }
+    // 4. First content image in article/main
+    const contentImg = document.querySelector(
+      'article img[src], main img[src], .entry-content img[src], .post-content img[src], figure img[src]'
+    );
+    if (contentImg) {
+      const src = contentImg.getAttribute('src') ?? contentImg.getAttribute('data-src');
+      if (src) {
+        const resolved = resolveUrl(src, baseUrl);
+        if (resolved) return resolved;
+      }
+    }
+    return null;
   } catch {
     return null;
   }
@@ -104,8 +189,8 @@ async function enrichThumbnail(url: string): Promise<string | null> {
   if (cached && cached.expires > now) return cached.image;
   let image: string | null = null;
   try {
-    const html = await fetchText(url, { timeoutMs: 4_000, maxBytes: 256 * 1024 });
-    image = extractOgImage(html);
+    const html = await fetchText(url, { timeoutMs: 4_000, maxBytes: 512 * 1024 });
+    image = extractPageImage(html, url);
   } catch {
     image = null;
   }
diff --git a/src/routes/+page.svelte b/src/routes/+page.svelte
index 107c9e5..8f6503c 100644
--- a/src/routes/+page.svelte
+++ b/src/routes/+page.svelte
@@ -1,6 +1,5 @@
 <script lang="ts">
   import { onMount } from 'svelte';
-  import { goto } from '$app/navigation';
   import type { SearchHit } from '$lib/server/recipes/search-local';
   import type { WebHit } from '$lib/server/search/searxng';
   import { randomQuote } from '$lib/quotes';
@@ -24,6 +23,34 @@
 
   let debounceTimer: ReturnType<typeof setTimeout> | null = null;
 
+  async function runSearch(q: string) {
+    try {
+      const res = await fetch(`/api/recipes/search?q=${encodeURIComponent(q)}`);
+      const body = await res.json();
+      if (query.trim() !== q) return;
+      hits = body.hits;
+      searchedFor = q;
+      if (body.hits.length === 0) {
+        webSearching = true;
+        try {
+          const wres = await fetch(`/api/recipes/search/web?q=${encodeURIComponent(q)}`);
+          if (query.trim() !== q) return;
+          if (!wres.ok) {
+            const err = await wres.json().catch(() => ({}));
+            webError = err.message ?? `HTTP ${wres.status}`;
+          } else {
+            const wbody = await wres.json();
+            webHits = wbody.hits;
+          }
+        } finally {
+          if (query.trim() === q) webSearching = false;
+        }
+      }
+    } finally {
+      if (query.trim() === q) searching = false;
+    }
+  }
+
   $effect(() => {
     const q = query.trim();
     if (debounceTimer) clearTimeout(debounceTimer);
@@ -40,40 +67,18 @@
     webHits = [];
     webSearching = false;
     webError = null;
-    debounceTimer = setTimeout(async () => {
-      try {
-        const res = await fetch(`/api/recipes/search?q=${encodeURIComponent(q)}`);
-        const body = await res.json();
-        if (query.trim() !== q) return;
-        hits = body.hits;
-        searchedFor = q;
-        if (body.hits.length === 0) {
-          webSearching = true;
-          try {
-            const wres = await fetch(`/api/recipes/search/web?q=${encodeURIComponent(q)}`);
-            if (query.trim() !== q) return;
-            if (!wres.ok) {
-              const err = await wres.json().catch(() => ({}));
-              webError = err.message ?? `HTTP ${wres.status}`;
-            } else {
-              const wbody = await wres.json();
-              webHits = wbody.hits;
-            }
-          } finally {
-            if (query.trim() === q) webSearching = false;
-          }
-        }
-      } finally {
-        if (query.trim() === q) searching = false;
-      }
+    debounceTimer = setTimeout(() => {
+      void runSearch(q);
     }, 300);
   });
 
   function submit(e: SubmitEvent) {
     e.preventDefault();
     const q = query.trim();
-    if (!q) return;
-    void goto(`/search?q=${encodeURIComponent(q)}`);
+    if (q.length <= 3) return;
+    if (debounceTimer) clearTimeout(debounceTimer);
+    searching = true;
+    void runSearch(q);
   }
 
   const activeSearch = $derived(query.trim().length > 3);
diff --git a/tests/integration/searxng.test.ts b/tests/integration/searxng.test.ts
index 2e164fc..0b778fc 100644
--- a/tests/integration/searxng.test.ts
+++ b/tests/integration/searxng.test.ts
@@ -94,6 +94,52 @@ describe('searchWeb', () => {
     }
   });
 
+  it('falls back to JSON-LD image when no og:image', async () => {
+    const pageServer = createServer((_req, res) => {
+      res.writeHead(200, { 'content-type': 'text/html; charset=utf-8' });
+      res.end(`<html><head>
+        <script type="application/ld+json">${JSON.stringify({
+          '@type': 'Recipe',
+          name: 'Pie',
+          image: 'https://cdn.example/pie.jpg'
+        })}</script>
+      </head><body></body></html>`);
+    });
+    await new Promise<void>((r) => pageServer.listen(0, '127.0.0.1', r));
+    const addr = pageServer.address() as AddressInfo;
+    const pageUrl = `http://127.0.0.1:${addr.port}/pie`;
+    try {
+      const db = openInMemoryForTest();
+      addDomain(db, '127.0.0.1');
+      respondWith([{ url: pageUrl, title: 'Pie', content: '' }]);
+      const hits = await searchWeb(db, 'pie', { searxngUrl: baseUrl });
+      expect(hits[0].thumbnail).toBe('https://cdn.example/pie.jpg');
+    } finally {
+      await new Promise<void>((r) => pageServer.close(() => r()));
+    }
+  });
+
+  it('falls back to first content image when no meta/JSON-LD image', async () => {
+    const pageServer = createServer((_req, res) => {
+      res.writeHead(200, { 'content-type': 'text/html; charset=utf-8' });
+      res.end(
+        '<html><body><article><img src="/uploads/dish.jpg" alt=""></article></body></html>'
+      );
+    });
+    await new Promise<void>((r) => pageServer.listen(0, '127.0.0.1', r));
+    const addr = pageServer.address() as AddressInfo;
+    const pageUrl = `http://127.0.0.1:${addr.port}/article`;
+    try {
+      const db = openInMemoryForTest();
+      addDomain(db, '127.0.0.1');
+      respondWith([{ url: pageUrl, title: 'Dish', content: '' }]);
+      const hits = await searchWeb(db, 'dish', { searxngUrl: baseUrl });
+      expect(hits[0].thumbnail).toBe(`http://127.0.0.1:${addr.port}/uploads/dish.jpg`);
+    } finally {
+      await new Promise<void>((r) => pageServer.close(() => r()));
+    }
+  });
+
   it('leaves existing thumbnails untouched (no enrichment fetch)', async () => {
     const db = openInMemoryForTest();
     addDomain(db, 'chefkoch.de');