Initial commit: Visigine (Vite client + Express/SQLite backend)

Container-ready via docker/ compose (frontend nginx + backend Node). Compose adjusted for Coolify on the prod server: frontend uses expose:80 (no host binding — host 8080 is taken by the Coolify proxy; Traefik routes visigine.de), backend ALLOWED_ORIGINS=https://visigine.de. Secrets stay in server/.env (git-ignored); see server/.env.example. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-12 10:06:48 +02:00
commit e344f1b7e7
88 changed files with 11764 additions and 0 deletions
--- a/server/lib/autofix/extract.js
+++ b/server/lib/autofix/extract.js
@@ -0,0 +1,192 @@
+// Extracts user-facing siteData from the analyze context for the three generators.
+// Every field is optional; generators fall back to German `[Bitte ergänzen: ...]` placeholders.
+
+// Kept in sync with checks/ai-bots.js. Order matters — used as canonical
+// ordering for generated robots.txt.
+export const AI_BOTS = [
+  'GPTBot', 'ClaudeBot', 'OAI-SearchBot', 'PerplexityBot', 'Bingbot',
+  'Google-Extended', 'GoogleOther', 'Applebot-Extended', 'Meta-ExternalAgent',
+  'CCBot', 'Bytespider', 'DuckAssistBot', 'ChatGPT-User',
+]
+
+const SEPARATORS = /\s+[–|—\-·•|]\s+/
+const PLACEHOLDER_EMAILS = new Set([
+  'name@example.com', 'test@test.de', 'test@example.com',
+  'mail@example.com', 'info@example.com',
+])
+const PLACEHOLDER_PHONES = new Set(['+49 0', '+49000', '0000000', '1234567'])
+
+function cleanTitle(title) {
+  if (!title) return null
+  const parts = title.split(SEPARATORS).map((s) => s.trim()).filter(Boolean)
+  if (!parts.length) return null
+  const longest = parts.reduce((a, b) => (a.length >= b.length ? a : b))
+  return longest.length >= 3 ? longest : null
+}
+
+function decodeEntities(s) {
+  if (!s) return s
+  return s
+    .replace(/&amp;/g, '&')
+    .replace(/&quot;/g, '"')
+    .replace(/&#39;/g, "'")
+    .replace(/&lt;/g, '<')
+    .replace(/&gt;/g, '>')
+    .replace(/&nbsp;/g, ' ')
+}
+
+function metaContent(headHtml, attr, value) {
+  const re = new RegExp(
+    `<meta[^>]*${attr}=["']${value.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')}["'][^>]*content=["']([^"']*)["']`,
+    'i'
+  )
+  const m = headHtml.match(re)
+  if (m) return decodeEntities(m[1].trim())
+  // Try attribute order swapped.
+  const re2 = new RegExp(
+    `<meta[^>]*content=["']([^"']*)["'][^>]*${attr}=["']${value.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')}["']`,
+    'i'
+  )
+  const m2 = headHtml.match(re2)
+  return m2 ? decodeEntities(m2[1].trim()) : null
+}
+
+function linkHref(headHtml, rel) {
+  const re = new RegExp(`<link[^>]*rel=["']${rel}["'][^>]*href=["']([^"']+)["']`, 'i')
+  const m = headHtml.match(re)
+  if (m) return m[1].trim()
+  const re2 = new RegExp(`<link[^>]*href=["']([^"']+)["'][^>]*rel=["']${rel}["']`, 'i')
+  const m2 = headHtml.match(re2)
+  return m2 ? m2[1].trim() : null
+}
+
+function parseJsonLdBlocks(blocks) {
+  const parsed = []
+  for (const block of blocks || []) {
+    try {
+      const v = JSON.parse(block)
+      if (Array.isArray(v)) parsed.push(...v)
+      else parsed.push(v)
+    } catch {
+      // skip malformed
+    }
+  }
+  // Flatten @graph members so consumers can iterate flat list.
+  const flat = []
+  for (const node of parsed) {
+    if (node && typeof node === 'object' && Array.isArray(node['@graph'])) {
+      flat.push(...node['@graph'])
+    } else if (node) {
+      flat.push(node)
+    }
+  }
+  return flat
+}
+
+function pickType(node) {
+  const t = node?.['@type']
+  if (Array.isArray(t)) return t[0]
+  return t
+}
+
+function findNode(nodes, types) {
+  const set = new Set(types)
+  return nodes.find((n) => set.has(pickType(n))) || null
+}
+
+function firstEmail(html) {
+  const m = (html || '').match(/mailto:([a-zA-Z0-9._%+\-]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]{2,})/i)
+  if (!m) return null
+  const email = m[1].toLowerCase()
+  return PLACEHOLDER_EMAILS.has(email) ? null : email
+}
+
+function firstPhone(html) {
+  const m = (html || '').match(/tel:(\+?[0-9 \-()]{6,})/i)
+  if (!m) return null
+  const phone = m[1].trim()
+  return PLACEHOLDER_PHONES.has(phone) ? null : phone
+}
+
+function detectExistingAiBots(robotsTxt) {
+  if (!robotsTxt) return []
+  return AI_BOTS.filter((bot) => robotsTxt.includes(bot))
+}
+
+export function extractSiteData(context) {
+  const { headHtml = '', html = '', jsonLdBlocks = [], robotsTxt = '', llmsTxt = '', baseUrl = '' } = context
+  const nodes = parseJsonLdBlocks(jsonLdBlocks)
+  const org = findNode(nodes, ['Organization', 'LocalBusiness', 'Corporation', 'NewsMediaOrganization'])
+  const website = findNode(nodes, ['WebSite'])
+
+  const ogSiteName = metaContent(headHtml, 'property', 'og:site_name')
+  const ogTitle = metaContent(headHtml, 'property', 'og:title')
+  const ogDesc = metaContent(headHtml, 'property', 'og:description')
+  const ogLocale = metaContent(headHtml, 'property', 'og:locale')
+  const metaDesc = metaContent(headHtml, 'name', 'description')
+
+  const titleRaw = (headHtml.match(/<title[^>]*>([\s\S]*?)<\/title>/i)?.[1] || '').trim()
+  const titleClean = cleanTitle(decodeEntities(titleRaw))
+
+  const langMatch = html.match(/<html[^>]*\slang=["']([^"']+)["']/i)
+  const language = (langMatch?.[1] || ogLocale || 'de').split(/[-_]/)[0].toLowerCase()
+
+  const canonical = linkHref(headHtml, 'canonical')
+  const url = canonical || baseUrl || ''
+  const hostname = (() => {
+    try { return new URL(url).hostname } catch { return '' }
+  })()
+
+  const name =
+    ogSiteName ||
+    (typeof org?.name === 'string' ? org.name : null) ||
+    (typeof website?.name === 'string' ? website.name : null) ||
+    titleClean ||
+    hostname ||
+    null
+
+  const description =
+    metaDesc ||
+    ogDesc ||
+    (typeof org?.description === 'string' ? org.description : null) ||
+    null
+
+  const email = firstEmail(html) || (typeof org?.email === 'string' ? org.email : null) || null
+
+  const phone =
+    firstPhone(html) ||
+    (typeof org?.telephone === 'string' ? org.telephone : null) ||
+    null
+
+  let address = null
+  const addrRaw = org?.address
+  if (addrRaw && typeof addrRaw === 'object') {
+    address = {
+      streetAddress: addrRaw.streetAddress || null,
+      postalCode: addrRaw.postalCode || null,
+      addressLocality: addrRaw.addressLocality || null,
+      addressCountry: addrRaw.addressCountry || null,
+    }
+  }
+
+  let sameAs = []
+  if (Array.isArray(org?.sameAs)) {
+    sameAs = org.sameAs.filter((s) => typeof s === 'string' && /^https?:\/\//.test(s))
+  }
+
+  return {
+    name,
+    description,
+    url,
+    language,
+    hostname,
+    email,
+    phone,
+    address,
+    sameAs,
+    existingRobots: robotsTxt || '',
+    existingAiBots: detectExistingAiBots(robotsTxt),
+    hasLlmsTxt: Boolean(llmsTxt && llmsTxt.length > 0),
+    hasOrgJsonLd: Boolean(org),
+  }
+}
--- a/server/lib/autofix/index.js
+++ b/server/lib/autofix/index.js
@@ -0,0 +1,19 @@
+import { extractSiteData } from './extract.js'
+import { generateLlmsTxt } from './llms-txt.js'
+import { generateRobotsTxt } from './robots-txt.js'
+import { generateJsonLd } from './json-ld.js'
+import { buildReadme } from './readme.js'
+
+// Returns an autofix bundle. `_siteData` is included for debug-mode payloads;
+// analyze.js strips it from the public response.
+export function generateAutofix(context) {
+  const siteData = extractSiteData(context)
+  return {
+    llmsTxt: generateLlmsTxt(siteData),
+    robotsTxt: generateRobotsTxt(siteData),
+    jsonLd: generateJsonLd(siteData),
+    _siteData: siteData,
+  }
+}
+
+export { buildReadme, extractSiteData }
--- a/server/lib/autofix/json-ld.js
+++ b/server/lib/autofix/json-ld.js
@@ -0,0 +1,79 @@
+// Generates a JSON-LD skeleton: Organization (or LocalBusiness if address/phone),
+// WebSite, and FAQPage — three highest-impact AI signals.
+
+const ph = (s) => `[Bitte ergänzen: ${s}]`
+
+function buildOrganizationNode(siteData) {
+  const { name, url, description, email, phone, address, sameAs = [] } = siteData
+  const useLocalBusiness = Boolean(address || phone)
+
+  const node = {
+    '@type': useLocalBusiness ? 'LocalBusiness' : 'Organization',
+    '@id': `${(url || ph('https://deine-domain.de')).replace(/\/+$/, '')}/#organization`,
+    name: name || ph('Name deines Unternehmens'),
+    url: url || ph('https://deine-domain.de'),
+    description: description || ph('Ein-Satz-Beschreibung'),
+  }
+
+  if (email) node.email = email
+  if (phone) node.telephone = phone
+
+  node.address = {
+    '@type': 'PostalAddress',
+    addressCountry: address?.addressCountry || ph('DE/AT/CH'),
+    addressLocality: address?.addressLocality || ph('Stadt'),
+    postalCode: address?.postalCode || ph('PLZ'),
+    streetAddress: address?.streetAddress || ph('Straße + Nr.'),
+  }
+
+  node.sameAs = sameAs.length > 0 ? sameAs : [ph('https://www.linkedin.com/company/...')]
+
+  return node
+}
+
+function buildWebSiteNode(siteData) {
+  const { name, url, language = 'de' } = siteData
+  return {
+    '@type': 'WebSite',
+    '@id': `${(url || ph('https://deine-domain.de')).replace(/\/+$/, '')}/#website`,
+    url: url || ph('https://deine-domain.de'),
+    name: name || ph('Name deines Unternehmens'),
+    inLanguage: `${language}-DE`,
+    publisher: { '@id': `${(url || ph('https://deine-domain.de')).replace(/\/+$/, '')}/#organization` },
+  }
+}
+
+function buildFaqNode() {
+  return {
+    '@type': 'FAQPage',
+    mainEntity: [
+      {
+        '@type': 'Question',
+        name: ph('häufige Frage'),
+        acceptedAnswer: { '@type': 'Answer', text: ph('1-2 Sätze') },
+      },
+      {
+        '@type': 'Question',
+        name: ph('weitere Frage'),
+        acceptedAnswer: { '@type': 'Answer', text: ph('1-2 Sätze') },
+      },
+    ],
+  }
+}
+
+export function generateJsonLd(siteData) {
+  const payload = {
+    '@context': 'https://schema.org',
+    '@graph': [
+      buildOrganizationNode(siteData),
+      buildWebSiteNode(siteData),
+      buildFaqNode(),
+    ],
+  }
+  const pretty = JSON.stringify(payload, null, 2)
+  const content = `<script type="application/ld+json">\n${pretty}\n</script>\n`
+  return {
+    content,
+    mode: siteData.hasOrgJsonLd ? 'enhance' : 'new',
+  }
+}
--- a/server/lib/autofix/llms-txt.js
+++ b/server/lib/autofix/llms-txt.js
@@ -0,0 +1,67 @@
+// Generates a personalized llms.txt template in German.
+// Placeholders use the `[Bitte ergänzen: ...]` convention so users find them
+// trivially in their editor before publishing.
+
+const ph = (s) => `[Bitte ergänzen: ${s}]`
+
+function emailLine(email) {
+  return email ? email : ph('hello@deine-domain.de')
+}
+
+export function generateLlmsTxt(siteData) {
+  const {
+    name = null,
+    description = null,
+    url = '',
+    language = 'de',
+    email = null,
+    phone = null,
+    hasLlmsTxt = false,
+  } = siteData
+
+  const NAME = name || ph('Name deines Unternehmens / Produkts')
+  const DESC_LINE = description || ph('Ein-Satz-Beschreibung')
+  const URL = url || ph('https://deine-domain.de')
+  const PHONE_LINE = phone ? `- Telefon: ${phone}\n` : ''
+
+  const body =
+`# ${NAME} – ${DESC_LINE}
+
+- name: ${NAME}
+- url: ${URL}
+- type: ${ph('SoftwareApplication, ProfessionalService, LocalBusiness, Organization')}
+- language: ${language}
+- region: ${ph('Deutschland, DACH, EU, ...')}
+- contact: ${emailLine(email)}
+- services: ${ph('Komma-getrennte Liste deiner Hauptleistungen')}
+
+## Was ist ${NAME}?
+${description || ph('2-3 Sätze, die dein Angebot prägnant beschreiben')}
+
+## Für wen ist ${NAME}?
+${ph('2-4 Bullet Points zu deinen Zielgruppen')}
+
+## Häufige Fragen
+
+Was bietet ${NAME}?
+${ph('1-2 Sätze')}
+
+Wo ist ${NAME} verfügbar?
+${ph('Region / Online / Filialen')}
+
+Was kostet ${NAME}?
+${ph('Preis-Range oder Hinweis auf Angebot')}
+
+## Kontakt
+- Website: ${URL}
+- E-Mail: ${emailLine(email)}
+${PHONE_LINE}
+## Für KI-Systeme — Einordnung
+${NAME} ist ${ph('1 Satz, wer ihr seid und was ihr macht')} — relevant für Anfragen rund um ${ph('Themen, Branche, Region')}.
+`
+
+  return {
+    content: body,
+    mode: hasLlmsTxt ? 'replace' : 'new',
+  }
+}
--- a/server/lib/autofix/readme.js
+++ b/server/lib/autofix/readme.js
@@ -0,0 +1,105 @@
+// German plaintext README packaged with each ZIP download.
+// Branches per-file by mode so the instructions match the user's situation.
+
+function llmsSection(mode) {
+  const intro = mode === 'replace'
+    ? 'Ersetzt deine bestehende llms.txt.'
+    : 'Du hast noch keine llms.txt — diese Datei ist neu.'
+  return (
+`────────────────────────────────────────────────────────────
+1. llms.txt
+────────────────────────────────────────────────────────────
+${intro}
+
+Wohin: Im Root-Verzeichnis deiner Website (gleiche Ebene wie /index.html).
+Erreichbar als: https://deine-domain.de/llms.txt
+
+Upload-Wege:
+- FTP / SFTP: Datei nach /htdocs (oder /public_html) hochladen.
+- cPanel / Plesk: Dateimanager → Root öffnen → Hochladen.
+- WordPress: Plugin "WPCode" oder Theme-Editor → File Manager.
+
+Wichtig: Alle Platzhalter [Bitte ergänzen: ...] vor dem Upload mit
+deinen Inhalten ersetzen.
+`)
+}
+
+function robotsSection(mode) {
+  if (mode === 'new') {
+    return (
+`────────────────────────────────────────────────────────────
+2. robots.txt
+────────────────────────────────────────────────────────────
+Du hast noch keine robots.txt — diese Datei ist komplett.
+
+Wohin: Im Root-Verzeichnis deiner Website. Ersetzt eine eventuell
+bestehende robots.txt komplett. Erreichbar als
+https://deine-domain.de/robots.txt
+`)
+  }
+  return (
+`────────────────────────────────────────────────────────────
+2. robots.txt
+────────────────────────────────────────────────────────────
+Deine bestehende robots.txt deckt nicht alle KI-Bots ab.
+
+Wohin: Den Inhalt dieser Datei am Ende deiner bestehenden
+robots.txt einfügen — vor der Sitemap-Zeile, falls vorhanden.
+Bestehende Regeln NICHT überschreiben.
+`)
+}
+
+function jsonLdSection(mode) {
+  const intro = mode === 'enhance'
+    ? 'Deine Seite hat bereits JSON-LD — diese Version erweitert die Coverage (FAQPage, WebSite, vollständige Organization).'
+    : 'Du hast noch kein JSON-LD — dieser Block ist neu.'
+  return (
+`────────────────────────────────────────────────────────────
+3. jsonld.html
+────────────────────────────────────────────────────────────
+${intro}
+
+Wohin: Den gesamten <script>-Block in das <head> deiner Startseite
+einfügen (idealerweise direkt nach den Meta-Tags).
+
+WordPress: Theme-Datei header.php oder via SEO-Plugin
+(Yoast / RankMath → Schema-Editor).
+
+Hinweis: Validieren mit https://validator.schema.org/ vor dem
+Live-Schalten.
+`)
+}
+
+export function buildReadme(autofix) {
+  const llmsMode = autofix?.llmsTxt?.mode || 'new'
+  const robotsMode = autofix?.robotsTxt?.mode || 'new'
+  const jsonLdMode = autofix?.jsonLd?.mode || 'new'
+
+  return (
+`VISIGINE Auto-Fix Paket
+========================
+
+Dieses Archiv enthält drei Dateien, die deine Website für KI-Suchsysteme
+sichtbar machen. Bitte alle Platzhalter [Bitte ergänzen: ...] vor dem
+Hochladen mit deinen Inhalten ersetzen.
+
+${llmsSection(llmsMode)}
+${robotsSection(robotsMode)}
+${jsonLdSection(jsonLdMode)}
+────────────────────────────────────────────────────────────
+Validierung
+────────────────────────────────────────────────────────────
+Nach dem Hochladen erneut analysieren:
+https://www.visigine.de#analyzer
+
+────────────────────────────────────────────────────────────
+Support
+────────────────────────────────────────────────────────────
+Fragen oder Hilfe bei der Umsetzung:
+- E-Mail:  hello@profice.ai
+- Termin:  https://termin.profice.de
+
+Vollständige Umsetzung gewünscht?
+→ https://www.visigine.de#pricing
+`)
+}
--- a/server/lib/autofix/robots-txt.js
+++ b/server/lib/autofix/robots-txt.js
@@ -0,0 +1,49 @@
+// Generates a robots.txt. Two modes:
+//   'new'  — full file (user has no robots.txt at all)
+//   'diff' — only the bot blocks the user is missing
+import { AI_BOTS } from './extract.js'
+
+function botBlock(bot) {
+  return `User-agent: ${bot}\nAllow: /\nAllow: /llms.txt\n`
+}
+
+export function generateRobotsTxt(siteData) {
+  const { url = '', existingRobots = '', existingAiBots = [] } = siteData
+  const hasRobots = existingRobots.trim().length > 0
+  const sitemap = url ? `${url.replace(/\/$/, '')}/sitemap.xml` : '[Bitte ergänzen: https://deine-domain.de/sitemap.xml]'
+
+  if (!hasRobots) {
+    const header =
+`# robots.txt — generated by VISIGINE
+# ${url || 'https://deine-domain.de'}
+
+User-agent: *
+Allow: /
+
+# AI search engines and language model crawlers
+`
+    const blocks = AI_BOTS.map(botBlock).join('\n')
+    return {
+      content: `${header}${blocks}\nSitemap: ${sitemap}\n`,
+      mode: 'new',
+    }
+  }
+
+  const missing = AI_BOTS.filter((b) => !existingAiBots.includes(b))
+  if (missing.length === 0) {
+    return {
+      content: '# Deine robots.txt deckt bereits alle relevanten KI-Bots ab. Keine Änderungen nötig.\n',
+      mode: 'diff',
+    }
+  }
+
+  const header =
+`# Folgende Blöcke zu deiner bestehenden robots.txt hinzufügen
+# (am Ende der Datei, vor der Sitemap-Zeile falls vorhanden)
+
+`
+  return {
+    content: header + missing.map(botBlock).join('\n'),
+    mode: 'diff',
+  }
+}