"""Add category_aliases table (Issue #459)

Revision ID: 20251228_02
Revises: 20251228_01
Create Date: 2025-12-28

Issue #459: Migrate category normalization from static file to database.
Allows admins to manage category aliases without code deployment.

REGLA #14: Verificación de existencia antes de crear (idempotente).

Seeds 23 existing aliases from backend/app/core/category_normalization.py
"""

import logging

import sqlalchemy as sa
from alembic import op

# revision identifiers, used by Alembic.
revision = "20251228_02"
down_revision = "20251228_01"
branch_labels = None
depends_on = None

logger = logging.getLogger(__name__)

# ==============================================================================
# SEED DATA: 23 aliases from category_normalization.py
# ==============================================================================
INITIAL_ALIASES = [
    # (source_category, target_category, reason)
    ("aftas_llagas", "aftas", "Mismo concepto, diferente nombre"),
    ("heridas_cicatrizacion", "cicatrizacion", "DB tiene 598 cicatrizacion vs 33 heridas_cicatrizacion"),
    ("mareo", "mareo_viaje", "Mismo concepto"),
    ("vitaminas_minerales", "vitaminas_general", "Normalización general"),
    ("calcio_vitamina_d", "calcio_huesos", "Normalización general"),
    ("desinfeccion", "heridas_apositos", "Desinfección = Heridas/Apósitos"),
    ("dolor_fiebre", "dolor", "Simplificación"),
    ("gases", "gases_flatulencia", "Normalización"),
    ("gases_digestion", "gases_flatulencia", "Normalización"),
    ("memoria", "memoria_concentracion", "Ampliación concepto"),
    ("tos_seca", "mucosidad_respiratoria", "Agrupación respiratoria"),
    ("tos_garganta", "mucosidad_respiratoria", "DB: 2598 mucosidad_respiratoria vs 225 tos_garganta"),
    ("varices", "circulacion_piernas", "Normalización circulación"),
    ("xerostomia", "boca_seca", "Término más común"),
    ("tinnitus", "acufenos", "Término más común en España"),
    ("candidiasis", "hongos_vaginales", "Categoría más específica"),
    ("hongos_piel", "hongos_pies", "Normalización"),
    ("colageno", "articulaciones", "DB clasifica colágeno como articulaciones (109 productos)"),
    ("compresion_vendajes", "vendaje_compresion", "Orden de palabras"),
    ("infeccion_urinaria", "cistitis", "Término más común"),
    ("irritacion_panal", "dermatitis_panal", "Mismo concepto"),
    ("quemaduras", "quemaduras_aftersun", "DB: 75 quemaduras_aftersun vs 2 quemaduras"),
    ("ortopedia", "material_ortopedico", "DB: 235 material_ortopedico vs 124 ortopedia"),
]


def upgrade() -> None:
    """Create category_aliases table and seed initial data."""
    conn = op.get_bind()

    # ====================================================================
    # STEP 1: Check if table already exists (idempotent)
    # ====================================================================
    table_exists = conn.execute(
        sa.text(
            "SELECT table_name FROM information_schema.tables "
            "WHERE table_schema='public' AND table_name='category_aliases'"
        )
    ).fetchone()

    if table_exists:
        logger.info("category_aliases table already exists, skipping creation")
        # Still try to seed missing aliases
        _seed_missing_aliases(conn)
        return

    # ====================================================================
    # STEP 2: Create table
    # ====================================================================
    op.create_table(
        "category_aliases",
        sa.Column("id", sa.Integer(), primary_key=True),
        sa.Column("source_category", sa.String(100), nullable=False, unique=True),
        sa.Column("target_category", sa.String(100), nullable=False),
        sa.Column("is_active", sa.Boolean(), nullable=False, server_default="true"),
        sa.Column("reason", sa.String(500), nullable=True),
        sa.Column("usage_count", sa.Integer(), nullable=False, server_default="0"),
        sa.Column("last_used_at", sa.DateTime(timezone=True), nullable=True),
        sa.Column(
            "created_at",
            sa.DateTime(timezone=True),
            server_default=sa.text("NOW()"),
            nullable=False,
        ),
    )
    logger.info("Created category_aliases table")

    # ====================================================================
    # STEP 3: Create indexes
    # ====================================================================
    # Unique constraint on source (already implied by unique=True but explicit)
    op.create_unique_constraint(
        "uq_category_alias_source", "category_aliases", ["source_category"]
    )

    # Index for active aliases (most common query)
    op.create_index(
        "ix_category_alias_active",
        "category_aliases",
        ["is_active"],
    )

    # Index for target lookup (reverse lookups)
    op.create_index(
        "ix_category_alias_target",
        "category_aliases",
        ["target_category"],
    )

    # Case-insensitive index for source search
    op.execute(
        sa.text(
            """
            CREATE INDEX ix_category_alias_source_lower
            ON category_aliases (lower(source_category))
            """
        )
    )

    logger.info("Created indexes for category_aliases table")

    # ====================================================================
    # STEP 4: Seed initial aliases
    # ====================================================================
    _seed_all_aliases(conn)


def _seed_all_aliases(conn) -> None:
    """Seed all 23 initial aliases."""
    for source, target, reason in INITIAL_ALIASES:
        conn.execute(
            sa.text(
                """
                INSERT INTO category_aliases (source_category, target_category, is_active, reason, usage_count, created_at)
                VALUES (:source, :target, true, :reason, 0, NOW())
                """
            ),
            {"source": source, "target": target, "reason": reason},
        )
    logger.info(f"Seeded {len(INITIAL_ALIASES)} category aliases")


def _seed_missing_aliases(conn) -> None:
    """Seed only missing aliases (for idempotency)."""
    seeded = 0
    for source, target, reason in INITIAL_ALIASES:
        exists = conn.execute(
            sa.text(
                "SELECT 1 FROM category_aliases WHERE source_category = :source"
            ),
            {"source": source},
        ).fetchone()

        if not exists:
            conn.execute(
                sa.text(
                    """
                    INSERT INTO category_aliases (source_category, target_category, is_active, reason, usage_count, created_at)
                    VALUES (:source, :target, true, :reason, 0, NOW())
                    """
                ),
                {"source": source, "target": target, "reason": reason},
            )
            seeded += 1

    if seeded > 0:
        logger.info(f"Seeded {seeded} missing category aliases")


def downgrade() -> None:
    """Drop category_aliases table."""
    conn = op.get_bind()

    # Check if table exists before dropping
    table_exists = conn.execute(
        sa.text(
            "SELECT table_name FROM information_schema.tables "
            "WHERE table_schema='public' AND table_name='category_aliases'"
        )
    ).fetchone()

    if not table_exists:
        logger.info("category_aliases table does not exist, skipping drop")
        return

    # Drop indexes first (in reverse order of creation)
    op.execute(sa.text("DROP INDEX IF EXISTS ix_category_alias_source_lower"))
    op.drop_index("ix_category_alias_target", table_name="category_aliases")
    op.drop_index("ix_category_alias_active", table_name="category_aliases")
    # Note: uq_category_alias_source is auto-dropped with table

    # Drop table
    op.drop_table("category_aliases")
    logger.info("Dropped category_aliases table")
