<?php

declare(strict_types=1);

namespace App;

/**
 * URL Validator - Destination safety layer
 * Anti open-redirect + anti malware target
 */
final class UrlValidator
{
    /**
     * Validate URL for safe redirect
     *
     * @param list<string> $allowedHosts
     * @param list<string> $deniedHosts
     * @param list<string> $deniedTlds
     * @return array{valid:bool,error:string,url:string}
     */
    public static function validate(
        string $url,
        array $allowedHosts = [],
        array $deniedHosts = [],
        array $deniedTlds = [],
        bool $httpsOnly = true,
        bool $blockIpLiteral = true,
        bool $blockUserinfo = true
    ): array {
        // Basic format validation
        $cleanUrl = trim($url);
        if ($cleanUrl === '') {
            return ['valid' => false, 'error' => 'Empty URL', 'url' => ''];
        }

        // Length check
        if (strlen($cleanUrl) > 2048) {
            return ['valid' => false, 'error' => 'URL too long', 'url' => ''];
        }

        // CRLF injection check
        if (preg_match('/[\r\n]/', $cleanUrl)) {
            return ['valid' => false, 'error' => 'CRLF detected', 'url' => ''];
        }

        // Parse URL
        $parsed = parse_url($cleanUrl);
        if ($parsed === false || !is_array($parsed)) {
            return ['valid' => false, 'error' => 'Invalid URL format', 'url' => ''];
        }

        // Check scheme
        $scheme = strtolower($parsed['scheme'] ?? '');
        if ($scheme === '') {
            return ['valid' => false, 'error' => 'Missing scheme', 'url' => ''];
        }

        if ($httpsOnly && $scheme !== 'https') {
            return ['valid' => false, 'error' => 'HTTPS required', 'url' => ''];
        }

        if (!in_array($scheme, ['http', 'https'], true)) {
            return ['valid' => false, 'error' => 'Invalid scheme', 'url' => ''];
        }

        // Check host
        $host = strtolower($parsed['host'] ?? '');
        if ($host === '') {
            return ['valid' => false, 'error' => 'Missing host', 'url' => ''];
        }

        // Block userinfo (user:pass@host)
        if ($blockUserinfo && (isset($parsed['user']) || isset($parsed['pass']))) {
            return ['valid' => false, 'error' => 'Userinfo not allowed', 'url' => ''];
        }

        // Block IP literal (http://192.168.1.1/)
        if ($blockIpLiteral && filter_var($host, FILTER_VALIDATE_IP) !== false) {
            return ['valid' => false, 'error' => 'IP literal not allowed', 'url' => ''];
        }

        // Check denylist first (highest priority)
        if (self::isHostDenied($host, $deniedHosts)) {
            return ['valid' => false, 'error' => 'Host denied', 'url' => ''];
        }

        // Check TLD denylist
        $tld = self::getTld($host);
        if (self::isTldDenied($tld, $deniedTlds)) {
            return ['valid' => false, 'error' => 'TLD denied', 'url' => ''];
        }

        // Check allowlist (if provided)
        if (!empty($allowedHosts) && !self::isHostAllowed($host, $allowedHosts)) {
            return ['valid' => false, 'error' => 'Host not in allowlist', 'url' => ''];
        }

        return ['valid' => true, 'error' => '', 'url' => $cleanUrl];
    }

    /**
     * Check if host is in allowlist (supports wildcards)
     *
     * @param list<string> $allowedHosts
     */
    private static function isHostAllowed(string $host, array $allowedHosts): bool
    {
        foreach ($allowedHosts as $allowed) {
            $allowed = strtolower(trim($allowed));

            // Exact match
            if ($host === $allowed) {
                return true;
            }

            // Wildcard match (*.example.com)
            if (strpos($allowed, '*.') === 0) {
                $domain = substr($allowed, 2); // Remove *.
                if ($host === $domain || str_ends_with($host, '.' . $domain)) {
                    return true;
                }
            }
        }

        return false;
    }

    /**
     * Check if host is in denylist (supports patterns)
     *
     * @param list<string> $deniedHosts
     */
    private static function isHostDenied(string $host, array $deniedHosts): bool
    {
        foreach ($deniedHosts as $denied) {
            $denied = strtolower(trim($denied));

            // Exact match
            if ($host === $denied) {
                return true;
            }

            // Pattern match (*.badsite.com)
            if (strpos($denied, '*.') === 0) {
                $domain = substr($denied, 2);
                if ($host === $domain || str_ends_with($host, '.' . $domain)) {
                    return true;
                }
            }

            // Contains match
            if (strpos($host, $denied) !== false) {
                return true;
            }
        }

        return false;
    }

    /**
     * Check if TLD is denied
     *
     * @param list<string> $deniedTlds
     */
    private static function isTldDenied(string $tld, array $deniedTlds): bool
    {
        return in_array($tld, $deniedTlds, true);
    }

    /**
     * Extract TLD from host
     */
    private static function getTld(string $host): string
    {
        $parts = explode('.', $host);
        return strtolower(end($parts));
    }

    /**
     * Common malware/spam TLDs (example list)
     *
     * @return list<string>
     */
    public static function getDefaultDeniedTlds(): array
    {
        return [
            'tk', 'ml', 'ga', 'cf', 'gq', // Free TLDs often abused
            'xxx', 'adult', 'porn', // Adult content
            'zip', 'mov', // Confusable file extensions
        ];
    }

    /**
     * Validate path against allowlist (optional per-campaign)
     *
     * @param list<string> $allowedPaths
     */
    public static function validatePath(string $url, array $allowedPaths): bool
    {
        if (empty($allowedPaths)) {
            return true; // No path restriction
        }

        $parsed = parse_url($url);
        $path = $parsed['path'] ?? '/';

        foreach ($allowedPaths as $allowed) {
            if (str_starts_with($path, $allowed)) {
                return true;
            }
        }

        return false;
    }
}
