<?php
/**
 * /sitemap.xml — auto-generated XML sitemap for search engines.
 *
 * Lists all published, non-deleted pages plus the home page, recent changes,
 * all-pages index, and category pages. Excludes admin/edit/private routes.
 *
 * Caching strategy:
 *   - Generated content is cached on disk at uploads/cache/sitemap.xml
 *   - Cache is regenerated when:
 *     a) Cache file doesn't exist
 *     b) Cache file is older than the most recently updated/deleted page
 *     c) Cache file is older than SITEMAP_MAX_AGE (24h failsafe)
 *   - This means the sitemap is "live" — saving any page invalidates the cache
 *     on the very next request to /sitemap.xml.
 */

require_once __DIR__ . '/../includes/bootstrap.php';

const SITEMAP_MAX_AGE = 86400;  // 24h — failsafe to refresh even if no edits

$cacheFile = dirname(__DIR__) . '/uploads/cache/sitemap.xml';
$cacheDir  = dirname($cacheFile);

if (!is_dir($cacheDir)) {
    @mkdir($cacheDir, 0775, true);
}

// Check if cache is fresh
$shouldRegenerate = true;
if (is_file($cacheFile)) {
    $cacheTime = filemtime($cacheFile);
    $latestPageUpdate = (int) DB::scalar(
        "SELECT GREATEST(
            COALESCE((SELECT UNIX_TIMESTAMP(MAX(updated_at)) FROM pages WHERE is_deleted = 0), 0),
            COALESCE((SELECT UNIX_TIMESTAMP(MAX(updated_at)) FROM categories), 0)
        )"
    );
    if ($cacheTime >= $latestPageUpdate && (time() - $cacheTime) < SITEMAP_MAX_AGE) {
        $shouldRegenerate = false;
    }
}

if ($shouldRegenerate) {
    $xml = generate_sitemap_xml();
    @file_put_contents($cacheFile, $xml);
    @chmod($cacheFile, 0644);
}

header('Content-Type: application/xml; charset=utf-8');
header('X-Robots-Tag: noindex');  // Don't index the sitemap itself
header('Cache-Control: public, max-age=3600');  // CDN caches for 1h

if (is_file($cacheFile)) {
    readfile($cacheFile);
} else {
    echo generate_sitemap_xml();  // Fallback if file write failed
}
exit;

/* ------------------------------------------------------------------ */

function generate_sitemap_xml(): string {
    $base = rtrim(canonical_base_url(), '/');
    $now  = gmdate('Y-m-d');

    $xml  = '<?xml version="1.0" encoding="UTF-8"?>' . "\n";
    $xml .= '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">' . "\n";

    // Home — high priority
    $xml .= sitemap_url("$base/", $now, '1.0', 'daily');

    // Static landing pages — medium priority, change rarely
    $xml .= sitemap_url("$base/pages",  $now, '0.6', 'daily');
    $xml .= sitemap_url("$base/recent", $now, '0.6', 'hourly');

    // Categories
    $cats = DB::all(
        "SELECT slug, updated_at FROM categories ORDER BY sort_order ASC, name ASC"
    );
    foreach ($cats as $c) {
        $lastmod = sitemap_format_date($c['updated_at'] ?? null);
        $xml .= sitemap_url(
            "$base/category/" . rawurlencode($c['slug']),
            $lastmod,
            '0.7',
            'daily'
        );
    }

    // All published pages
    $pages = DB::all(
        "SELECT slug, updated_at, view_count
         FROM pages
         WHERE status = 'published' AND is_deleted = 0
         ORDER BY view_count DESC"
    );
    foreach ($pages as $p) {
        // Heavily-viewed pages get higher priority — they're the ones users
        // care about most. Mapping: views > 1000 -> 0.9, > 100 -> 0.8, else 0.6
        $views = (int) ($p['view_count'] ?? 0);
        $priority = $views > 1000 ? '0.9' : ($views > 100 ? '0.8' : '0.6');
        $xml .= sitemap_url(
            "$base/wiki/" . rawurlencode($p['slug']),
            sitemap_format_date($p['updated_at'] ?? null),
            $priority,
            'weekly'
        );
    }

    $xml .= '</urlset>' . "\n";
    return $xml;
}

function sitemap_url(string $loc, string $lastmod, string $priority, string $changefreq): string {
    return "  <url>\n"
         . "    <loc>" . htmlspecialchars($loc, ENT_XML1 | ENT_QUOTES, 'UTF-8') . "</loc>\n"
         . "    <lastmod>" . $lastmod . "</lastmod>\n"
         . "    <changefreq>" . $changefreq . "</changefreq>\n"
         . "    <priority>" . $priority . "</priority>\n"
         . "  </url>\n";
}

function sitemap_format_date(?string $sqlDate): string {
    if (!$sqlDate) return gmdate('Y-m-d');
    $ts = strtotime($sqlDate);
    return $ts ? gmdate('Y-m-d', $ts) : gmdate('Y-m-d');
}

/**
 * Build the canonical https://host base URL using the SITE_URL config
 * if defined, otherwise reconstruct from the request.
 */
function canonical_base_url(): string {
    if (defined('SITE_URL') && SITE_URL !== '') {
        return rtrim(SITE_URL, '/');
    }
    $scheme = (!empty($_SERVER['HTTPS']) && $_SERVER['HTTPS'] !== 'off') ? 'https' : 'http';
    $host = $_SERVER['HTTP_HOST'] ?? 'localhost';
    return $scheme . '://' . $host;
}
