-- Migration: Add optimization indexes and daily aggregation
-- Date: 2024-12-24
-- Description: Adds composite indexes for list queries and daily summary aggregation event

-- =============================================================================
-- PART 1: Additional Indexes for Performance
-- =============================================================================

-- Index for shortlinks list query ordering (ORDER BY updated_at DESC, code DESC)
-- This covers the common admin panel listing pattern
CREATE INDEX IF NOT EXISTS idx_shortlinks_updated_code
ON shortlinks (updated_at DESC, code DESC);

-- Covering index for traffic analytics aggregation queries
-- Covers: country_code grouping with shortlink filter
CREATE INDEX IF NOT EXISTS idx_traffic_code_country
ON traffic_analytics (shortlink_code, visited_at, country_code);

-- Covering index for device aggregation
CREATE INDEX IF NOT EXISTS idx_traffic_code_device
ON traffic_analytics (shortlink_code, visited_at, device_brand);

-- Covering index for browser aggregation
CREATE INDEX IF NOT EXISTS idx_traffic_code_browser
ON traffic_analytics (shortlink_code, visited_at, browser_name);

-- =============================================================================
-- PART 2: Daily Summary Aggregation Procedure
-- =============================================================================

DELIMITER //

-- Drop existing procedure if exists
DROP PROCEDURE IF EXISTS aggregate_daily_traffic//

-- Create procedure to aggregate traffic data for a specific date
CREATE PROCEDURE aggregate_daily_traffic(IN target_date DATE)
BEGIN
    DECLARE EXIT HANDLER FOR SQLEXCEPTION
    BEGIN
        ROLLBACK;
        RESIGNAL;
    END;

    START TRANSACTION;

    -- Delete existing summary for the target date (allows re-run)
    DELETE FROM traffic_summary_daily WHERE summary_date = target_date;

    -- Insert aggregated data
    INSERT INTO traffic_summary_daily (
        shortlink_code,
        summary_date,
        total_visits,
        unique_ips,
        bot_visits,
        mobile_visits,
        desktop_visits,
        tablet_visits,
        risky_visits,
        vpn_visits,
        proxy_visits,
        tor_visits,
        top_countries,
        top_devices,
        top_browsers,
        top_referrers,
        avg_response_time_ms
    )
    SELECT
        shortlink_code,
        target_date,
        COUNT(*) as total_visits,
        COUNT(DISTINCT ip_bin) as unique_ips,
        SUM(is_bot) as bot_visits,
        SUM(CASE WHEN device_type = 'mobile' THEN 1 ELSE 0 END) as mobile_visits,
        SUM(CASE WHEN device_type = 'desktop' THEN 1 ELSE 0 END) as desktop_visits,
        SUM(CASE WHEN device_type = 'tablet' THEN 1 ELSE 0 END) as tablet_visits,
        SUM(CASE WHEN risk_score >= 75 THEN 1 ELSE 0 END) as risky_visits,
        SUM(is_vpn) as vpn_visits,
        SUM(is_proxy) as proxy_visits,
        SUM(is_tor) as tor_visits,
        -- Top 10 countries as JSON
        (
            SELECT JSON_OBJECTAGG(country_code, cnt)
            FROM (
                SELECT country_code, COUNT(*) as cnt
                FROM traffic_analytics t2
                WHERE t2.shortlink_code = t1.shortlink_code
                  AND DATE(t2.visited_at) = target_date
                  AND t2.country_code IS NOT NULL
                GROUP BY country_code
                ORDER BY cnt DESC
                LIMIT 10
            ) sub
        ) as top_countries,
        -- Top 10 devices as JSON
        (
            SELECT JSON_OBJECTAGG(device_brand, cnt)
            FROM (
                SELECT device_brand, COUNT(*) as cnt
                FROM traffic_analytics t3
                WHERE t3.shortlink_code = t1.shortlink_code
                  AND DATE(t3.visited_at) = target_date
                  AND t3.device_brand IS NOT NULL
                GROUP BY device_brand
                ORDER BY cnt DESC
                LIMIT 10
            ) sub
        ) as top_devices,
        -- Top 10 browsers as JSON
        (
            SELECT JSON_OBJECTAGG(browser_name, cnt)
            FROM (
                SELECT browser_name, COUNT(*) as cnt
                FROM traffic_analytics t4
                WHERE t4.shortlink_code = t1.shortlink_code
                  AND DATE(t4.visited_at) = target_date
                  AND t4.browser_name IS NOT NULL
                GROUP BY browser_name
                ORDER BY cnt DESC
                LIMIT 10
            ) sub
        ) as top_browsers,
        -- Top 10 referrers as JSON (extract host from referer)
        (
            SELECT JSON_OBJECTAGG(ref_host, cnt)
            FROM (
                SELECT
                    SUBSTRING_INDEX(SUBSTRING_INDEX(REPLACE(REPLACE(referer, 'https://', ''), 'http://', ''), '/', 1), '?', 1) as ref_host,
                    COUNT(*) as cnt
                FROM traffic_analytics t5
                WHERE t5.shortlink_code = t1.shortlink_code
                  AND DATE(t5.visited_at) = target_date
                  AND t5.referer IS NOT NULL
                  AND t5.referer != ''
                GROUP BY ref_host
                ORDER BY cnt DESC
                LIMIT 10
            ) sub
        ) as top_referrers,
        AVG(response_time_ms) as avg_response_time_ms
    FROM traffic_analytics t1
    WHERE DATE(visited_at) = target_date
    GROUP BY shortlink_code;

    COMMIT;
END//

DELIMITER ;

-- =============================================================================
-- PART 3: Scheduled Event for Daily Aggregation
-- =============================================================================

-- Enable event scheduler (run once manually if needed: SET GLOBAL event_scheduler = ON)
-- Note: Requires SUPER privilege or event_scheduler enabled in my.cnf

DELIMITER //

-- Drop existing event if exists
DROP EVENT IF EXISTS evt_aggregate_daily_traffic//

-- Create event to run at 00:15 every day (aggregates previous day)
CREATE EVENT IF NOT EXISTS evt_aggregate_daily_traffic
ON SCHEDULE EVERY 1 DAY
STARTS (TIMESTAMP(CURRENT_DATE) + INTERVAL 1 DAY + INTERVAL 15 MINUTE)
DO
BEGIN
    -- Aggregate yesterday's traffic
    CALL aggregate_daily_traffic(DATE_SUB(CURRENT_DATE, INTERVAL 1 DAY));
END//

DELIMITER ;

-- =============================================================================
-- PART 4: Initial Backfill (run manually to populate historical data)
-- =============================================================================

-- Backfill last 30 days (uncomment to run)
-- Note: This can be slow for large datasets, run during off-peak hours
/*
CALL aggregate_daily_traffic(DATE_SUB(CURRENT_DATE, INTERVAL 30 DAY));
CALL aggregate_daily_traffic(DATE_SUB(CURRENT_DATE, INTERVAL 29 DAY));
CALL aggregate_daily_traffic(DATE_SUB(CURRENT_DATE, INTERVAL 28 DAY));
-- ... continue for all days ...
CALL aggregate_daily_traffic(DATE_SUB(CURRENT_DATE, INTERVAL 1 DAY));
*/
