"""Fix pharmacy_storage_stats materialized view calculation

Revision ID: 20251227_02
Revises: 20251227_01_add_ean_codes_array
Create Date: 2025-12-27

Problem:
    The original view used JOINs that caused row multiplication:
    - Each sales_data row was duplicated for each file_upload
    - Each sales_enrichment row was duplicated similarly
    - Result: storage_mb values were hugely inflated (5TB+ instead of MB)

Solution:
    Use CTEs to calculate each component separately before joining,
    preventing row multiplication and ensuring accurate calculations.
"""
import logging

import sqlalchemy as sa
from alembic import op

# revision identifiers, used by Alembic.
revision = "20251227_02"
down_revision = "20251227_01"
branch_labels = None
depends_on = None

logger = logging.getLogger(__name__)


def upgrade():
    """Replace pharmacy_storage_stats with corrected calculation using CTEs."""
    conn = op.get_bind()

    logger.info("Dropping old pharmacy_storage_stats materialized view...")
    conn.execute(sa.text("DROP MATERIALIZED VIEW IF EXISTS pharmacy_storage_stats CASCADE"))

    logger.info("Creating corrected pharmacy_storage_stats materialized view with CTEs...")
    conn.execute(sa.text("""
        CREATE MATERIALIZED VIEW pharmacy_storage_stats AS
        WITH sales_stats AS (
            -- Calculate sales_data stats per pharmacy (no duplication)
            SELECT
                pharmacy_id,
                COUNT(*) as total_sales,
                COALESCE(SUM(pg_column_size(sales_data.*)), 0) as sales_bytes,
                MIN(created_at) as data_from,
                MAX(created_at) as data_to
            FROM sales_data
            GROUP BY pharmacy_id
        ),
        enrichment_stats AS (
            -- Calculate sales_enrichment stats per pharmacy (no duplication)
            SELECT
                sd.pharmacy_id,
                COALESCE(SUM(pg_column_size(se.*)), 0) as enrichment_bytes
            FROM sales_enrichment se
            JOIN sales_data sd ON se.sales_data_id = sd.id
            GROUP BY sd.pharmacy_id
        ),
        file_stats AS (
            -- Calculate file_uploads stats per pharmacy (no duplication)
            SELECT
                pharmacy_id,
                COALESCE(SUM(file_size), 0) as file_bytes
            FROM file_uploads
            GROUP BY pharmacy_id
        )
        SELECT
            p.id as pharmacy_id,
            COALESCE(ss.total_sales, 0) as total_sales,
            COALESCE(
                (
                    COALESCE(ss.sales_bytes, 0) +
                    COALESCE(es.enrichment_bytes, 0) +
                    COALESCE(fs.file_bytes, 0)
                ) / 1024.0 / 1024.0,
                0
            ) as storage_mb,
            ss.data_from,
            ss.data_to
        FROM pharmacies p
        LEFT JOIN sales_stats ss ON ss.pharmacy_id = p.id
        LEFT JOIN enrichment_stats es ON es.pharmacy_id = p.id
        LEFT JOIN file_stats fs ON fs.pharmacy_id = p.id
    """))

    # Recreate unique index for CONCURRENTLY refresh
    logger.info("Creating unique index on pharmacy_storage_stats...")
    conn.execute(sa.text(
        "CREATE UNIQUE INDEX idx_pharmacy_storage_stats_pharmacy_id "
        "ON pharmacy_storage_stats(pharmacy_id)"
    ))

    logger.info("pharmacy_storage_stats materialized view recreated with correct calculation")


def downgrade():
    """Restore original (incorrect) pharmacy_storage_stats view."""
    conn = op.get_bind()

    logger.warning(
        "DOWNGRADE WARNING: Restoring pharmacy_storage_stats with known JOIN "
        "multiplication bug. Storage values will be significantly inflated (5TB+ instead of MB)."
    )

    logger.info("Dropping corrected pharmacy_storage_stats materialized view...")
    conn.execute(sa.text("DROP MATERIALIZED VIEW IF EXISTS pharmacy_storage_stats CASCADE"))

    logger.info("Restoring original pharmacy_storage_stats materialized view...")
    conn.execute(sa.text("""
        CREATE MATERIALIZED VIEW pharmacy_storage_stats AS
        SELECT
            p.id as pharmacy_id,
            COUNT(DISTINCT sd.id) as total_sales,
            COALESCE(
                (
                    COALESCE(SUM(pg_column_size(sd.*)), 0) +
                    COALESCE(SUM(pg_column_size(se.*)), 0) +
                    COALESCE(SUM(fu.file_size), 0)
                ) / 1024 / 1024,
                0
            ) as storage_mb,
            MIN(sd.created_at) as data_from,
            MAX(sd.created_at) as data_to
        FROM pharmacies p
        LEFT JOIN sales_data sd ON sd.pharmacy_id = p.id
        LEFT JOIN sales_enrichment se ON se.sales_data_id = sd.id
        LEFT JOIN file_uploads fu ON fu.pharmacy_id = p.id
        GROUP BY p.id
    """))

    conn.execute(sa.text(
        "CREATE UNIQUE INDEX idx_pharmacy_storage_stats_pharmacy_id "
        "ON pharmacy_storage_stats(pharmacy_id)"
    ))

    logger.info("Original pharmacy_storage_stats materialized view restored")
