"""add_unique_constraint_sales_data

Add UNIQUE constraint to prevent duplicate sales data (fixes Issue #251)

This migration adds a composite UNIQUE constraint on sales_data table
to prevent duplicate entries based on the business logic criteria:
- pharmacy_id + sale_date + codigo_nacional + product_name + quantity + total_amount

CONTEXT:
- 16,580 duplicate groups exist in production database
- Duplicate detection logic already implemented in upload.py (lines 395-413)
- This migration prevents future duplicates at the database level

SAFETY: This migration will fail if there are existing duplicates.
Clean duplicates manually first using:
  POST /api/v1/admin/cleanup-duplicates

IMPORTANT: Run cleanup endpoint BEFORE running this migration!

Revision ID: 20250112_unique_sales
Revises: f2610595fadb
Create Date: 2025-10-12 15:30:00.000000

"""
from typing import Sequence, Union
import logging

from alembic import op
from sqlalchemy import text

# Configure logging
logger = logging.getLogger(__name__)

# revision identifiers, used by Alembic.
revision: str = '20250112_unique_sales'
down_revision: Union[str, None] = '20250112_01_index'
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None


def upgrade() -> None:
    """
    Add UNIQUE constraint to sales_data table to prevent duplicates.

    SAFETY: This migration will fail if there are existing duplicates
    in the database. Clean duplicates manually first using:
      POST /api/v1/admin/cleanup-duplicates

    Composite unique key:
      (pharmacy_id, sale_date, codigo_nacional, product_name, quantity, total_amount)
    """
    # Pre-flight check: Verify no duplicate records exist
    conn = op.get_bind()

    logger.info("Running pre-flight check for duplicate sales data...")

    # Check for duplicates using the same criteria as upload.py and cleanup endpoint
    result = conn.execute(text("""
        SELECT
            pharmacy_id,
            sale_date,
            codigo_nacional,
            product_name,
            quantity,
            total_amount,
            COUNT(*) as duplicate_count
        FROM sales_data
        GROUP BY
            pharmacy_id,
            sale_date,
            codigo_nacional,
            product_name,
            quantity,
            total_amount
        HAVING COUNT(*) > 1
        LIMIT 10
    """))

    duplicates = list(result)

    if duplicates:
        # Log detailed information about duplicates
        logger.error("Cannot upgrade: Found duplicate sales data records")
        logger.error(f"Total duplicate groups found: {len(duplicates)} (showing first 10)")

        for idx, dup in enumerate(duplicates, 1):
            pharmacy_id = dup[0]
            sale_date = dup[1]
            codigo_nacional = dup[2]
            product_name = dup[3]
            quantity = dup[4]
            total_amount = dup[5]
            count = dup[6]

            logger.error(
                f"  Group {idx}: pharmacy={pharmacy_id}, date={sale_date}, "
                f"CN={codigo_nacional}, product={product_name[:30]}..., "
                f"qty={quantity}, amount={total_amount} - {count} records"
            )

        # Get total count of duplicate groups
        total_result = conn.execute(text("""
            SELECT COUNT(*) as total_duplicate_groups
            FROM (
                SELECT
                    pharmacy_id,
                    sale_date,
                    codigo_nacional,
                    product_name,
                    quantity,
                    total_amount
                FROM sales_data
                GROUP BY
                    pharmacy_id,
                    sale_date,
                    codigo_nacional,
                    product_name,
                    quantity,
                    total_amount
                HAVING COUNT(*) > 1
            ) as dup_groups
        """))

        total_groups = total_result.fetchone()[0]

        raise Exception(
            f"Cannot upgrade: Found {total_groups} groups of duplicate sales data. "
            f"This violates the UNIQUE constraint that this migration will add. "
            f"Please clean duplicates manually before running this migration. "
            f"\n\nSteps to fix:"
            f"\n1. Run cleanup endpoint: POST /api/v1/admin/cleanup-duplicates"
            f"\n2. Verify no duplicates remain with the query above"
            f"\n3. Re-run this migration: alembic upgrade head"
            f"\n\nQuery to verify duplicates:"
            f"\n  SELECT pharmacy_id, sale_date, codigo_nacional, product_name, quantity, total_amount, COUNT(*)"
            f"\n  FROM sales_data"
            f"\n  GROUP BY pharmacy_id, sale_date, codigo_nacional, product_name, quantity, total_amount"
            f"\n  HAVING COUNT(*) > 1"
        )

    logger.info("Pre-flight check passed: No duplicate sales data found")

    # Create composite UNIQUE constraint
    logger.info("Creating UNIQUE constraint on sales_data...")

    # Use constraint name for easier management
    constraint_name = 'uq_sales_data_composite'

    op.create_unique_constraint(
        constraint_name,
        'sales_data',
        [
            'pharmacy_id',
            'sale_date',
            'codigo_nacional',
            'product_name',
            'quantity',
            'total_amount'
        ]
    )

    logger.info(
        f"Successfully created UNIQUE constraint '{constraint_name}' on sales_data. "
        f"Future duplicate entries will be prevented at the database level."
    )


def downgrade() -> None:
    """
    Remove UNIQUE constraint from sales_data table.

    ⚠️  WARNING: This downgrade will allow duplicate sales data to be inserted again!

    This downgrade should only be used for emergency rollback.
    The UNIQUE constraint is the correct design to prevent data quality issues.
    """
    logger.warning(
        "⚠️  DOWNGRADE: Removing UNIQUE constraint from sales_data table. "
        "This will allow duplicate entries to be inserted again! "
        "Only use for emergency rollback."
    )

    # Drop the UNIQUE constraint
    constraint_name = 'uq_sales_data_composite'

    op.drop_constraint(
        constraint_name,
        'sales_data',
        type_='unique'
    )

    logger.warning(
        f"UNIQUE constraint '{constraint_name}' removed from sales_data. "
        "The system is now vulnerable to duplicate data entries. "
        "Please fix and upgrade as soon as possible."
    )
