# backend/app/core/category_normalization.py
"""
Category Normalization Layer.

Maps classifier output categories to DB-expected category names.
This avoids modifying the database while improving match metrics.

Philosophy:
- Classifier uses specific, technically correct categories
- DB may have legacy or different naming conventions
- This layer bridges the gap without data migration risk

Issue #459: Now supports database-backed aliases with fallback to static dict.
- DB aliases are preferred (allow changes without deploys)
- Static dict is fallback for robustness
"""

import logging
from typing import Dict, Optional

from sqlalchemy.orm import Session

logger = logging.getLogger(__name__)

# =============================================================================
# CATEGORY ALIASES
# Maps: Classifier Output -> DB Expected Category
# Generated from comparing classifier outputs vs DB categories
# =============================================================================
CATEGORY_ALIASES: Dict[str, str] = {
    # ==========================================================================
    # VALIDATED ALIASES
    # Based on comparing classifier categories vs 200 DB categories
    # ==========================================================================

    # === Aftas (mismo concepto, diferente nombre) ===
    "aftas_llagas": "aftas",

    # === Cicatrización ===
    # DB has 598 cicatrizacion vs 33 heridas_cicatrizacion
    # Normalize classifier output to match DB majority
    "heridas_cicatrizacion": "cicatrizacion",

    # === Mareo (mismo concepto) ===
    "mareo": "mareo_viaje",

    # === Vitaminas ===
    "vitaminas_minerales": "vitaminas_general",
    "calcio_vitamina_d": "calcio_huesos",

    # === Desinfección = Heridas/Apositos ===
    "desinfeccion": "heridas_apositos",

    # === Dolor/Fiebre = Dolor ===
    "dolor_fiebre": "dolor",

    # === Gases ===
    "gases": "gases_flatulencia",
    "gases_digestion": "gases_flatulencia",

    # === Memoria ===
    "memoria": "memoria_concentracion",

    # === Tos ===
    "tos_seca": "mucosidad_respiratoria",
    "tos_garganta": "mucosidad_respiratoria",  # DB: 2598 mucosidad_respiratoria vs 225 tos_garganta

    # === Circulación ===
    "varices": "circulacion_piernas",

    # === Boca seca ===
    "xerostomia": "boca_seca",

    # === Tinnitus ===
    "tinnitus": "acufenos",

    # === Hongos ===
    "candidiasis": "hongos_vaginales",
    "hongos_piel": "hongos_pies",

    # === Colágeno = Articulaciones ===
    # DB clasifica productos de colágeno como articulaciones (109 productos)
    "colageno": "articulaciones",

    # === Vendajes ===
    "compresion_vendajes": "vendaje_compresion",

    # === Infección urinaria = Cistitis ===
    "infeccion_urinaria": "cistitis",

    # === Irritación pañal = Dermatitis pañal (mismo concepto) ===
    "irritacion_panal": "dermatitis_panal",

    # === Quemaduras ===
    # DB: 75 quemaduras_aftersun vs 2 quemaduras
    "quemaduras": "quemaduras_aftersun",

    # === Ortopedia ===
    # DB: 235 material_ortopedico vs 124 ortopedia
    "ortopedia": "material_ortopedico",

    # === Servicios (Issue #488: consolidar variantes) ===
    # DB: 1151 servicios_farmacia vs 282 servicio
    "servicio": "servicios_farmacia",

    # === Exfoliación (DB tiene pocos como exfoliacion, normalizar) ===
    # Pero limpieza_facial -> exfoliacion no tiene sentido
    # Mantener exfoliacion si classifier lo dice

    # === Sequedad vaginal = Higiene íntima (en algunos casos) ===
    # No normalizar, hay productos específicos de sequedad

    # ==========================================================================
    # CATEGORÍAS QUE NO SE NORMALIZAN (existen en DB con su nombre)
    # ==========================================================================
    # - colageno: DB tiene 66 productos
    # - probioticos: DB tiene 346 productos
    # - interdental: DB tiene 153 productos
    # - confort_bebe: DB tiene 188 productos
    # - contorno_ojos: DB tiene 118 productos
    # - cepillo_electrico: DB tiene 36 productos
    # - higiene_bebe: DB tiene 76 productos
    # - dermatitis_panal: DB tiene 182 productos
    # - colicos_bebe: DB tiene 20 productos
    # - omega3: DB tiene 31 productos (no normalizar a omega_3)
    # ==========================================================================
}


def normalize_category(category: str, db: Optional[Session] = None) -> str:
    """
    Normalize classifier output to match DB expected category.

    Uses database aliases if db session provided (Issue #459),
    falls back to static dict for robustness.

    Args:
        category: The category from classifier
        db: Optional SQLAlchemy session for DB lookup

    Returns:
        Normalized category matching DB naming convention
    """
    if db:
        try:
            # Import here to avoid circular dependency
            from app.models.category_alias import CategoryAlias

            alias = (
                db.query(CategoryAlias)
                .filter(
                    CategoryAlias.source_category == category.lower(),
                    CategoryAlias.is_active == True,  # noqa: E712
                )
                .first()
            )

            if alias:
                # Increment usage counter
                alias.usage_count += 1
                db.commit()
                logger.debug(
                    f"Normalized '{category}' → '{alias.target_category}' (from DB)"
                )
                return alias.target_category
        except Exception as e:
            # Log error but don't fail - use fallback
            logger.warning(
                f"DB lookup failed for category '{category}', using fallback: {e}"
            )

    # Fallback to static dict
    normalized = CATEGORY_ALIASES.get(category, category)
    if normalized != category:
        logger.debug(f"Normalized '{category}' → '{normalized}' (from static)")
    return normalized


def get_reverse_aliases() -> Dict[str, str]:
    """
    Get reverse mapping (DB -> Classifier).
    Useful for understanding what categories the classifier might output.
    """
    return {v: k for k, v in CATEGORY_ALIASES.items()}


# For debugging: show all mappings
if __name__ == "__main__":
    print("Category Normalization Mappings:")
    print("=" * 50)
    for src, dst in sorted(CATEGORY_ALIASES.items()):
        print(f"  {src:30s} -> {dst}")
