# backend/app/services/feedback_service.py
"""
Service para Human-in-the-Loop feedback del clasificador.

Prioridades de revisión (P1 > P2 > P3):
- P1: "otros" (fallback) - El clasificador no supo qué hacer
- P2: confidence < 0.6 - Clasificación dudosa
- P3: Marcas desconocidas - Productos sin marca detectada

Issue #457: M4 Feedback Loop
"""

from datetime import datetime, timezone
from typing import Optional
from uuid import UUID

from sqlalchemy import and_, case, cast, exists, func, or_, select, String
from sqlalchemy.orm import Session, joinedload, lazyload, load_only

from app.models.product_correction import (
    CorrectionType,
    OutlierReason,
    OutlierStatus,
    PredictionSource,
    ProductCorrection,
)
from app.models.sales_enrichment import SalesEnrichment


class FeedbackService:
    """Servicio para gestión de feedback humano del clasificador."""

    # Umbrales de confianza
    # 2025-12-20: Bajado de 0.85 a 0.75, luego a 0.60 basado en análisis:
    # - 90% de productos P2 estaban bien clasificados
    # - Añadidas 60+ reglas de marcas para cubrir casos específicos
    # - Los restantes tienen clasificación correcta, solo baja confianza
    LOW_CONFIDENCE_THRESHOLD = 0.60
    VERY_LOW_CONFIDENCE_THRESHOLD = 0.4

    # Categorías que indican fallback del clasificador
    # AMPLIADO: Incluir categorías demasiado genéricas que necesitan revisión
    FALLBACK_CATEGORIES = {
        # Fallbacks explícitos
        "otros", "parafarmacia_otros", "unknown", "sin_clasificar",
        # Categorías demasiado genéricas (deberían ser más específicas)
        "dolor",  # Debería ser dolor_muscular, dolor_cabeza, etc.
        "vitaminas_general",  # Debería ser vitaminas_minerales, hierro, etc.
        "interno_no_venta",  # Debería ser servicios_farmacia
        "higiene_general",  # Debería ser higiene_bucal, higiene_intima, etc.
    }

    def __init__(self, db: Session):
        self.db = db

    def get_review_queue(
        self,
        pharmacy_id: Optional[UUID] = None,
        limit: int = 50,
        offset: int = 0,
        priority: Optional[str] = None,  # "P1", "P2", "P3" o None para todos
        days_back: Optional[int] = None,  # Filtrar productos recientes (ej: 7 = última semana)
        unique_products: bool = True,  # Issue #457: Deduplicar por product_name
    ) -> dict:
        """
        Obtiene cola de productos pendientes de revisión.

        Prioridades:
        - P1: Fallback ("otros") - Máxima prioridad
        - P2: Baja confianza (<0.6) - Media prioridad
        - P3: Sin marca detectada - Baja prioridad

        Args:
            days_back: Si se especifica, solo devuelve productos enriquecidos
                       en los últimos N días (para revisión semanal automatizada)

        Returns:
            dict con items, total, y breakdown por prioridad
        """
        # Query base: productos con clasificación venta_libre
        # que no tengan corrección registrada (excluyendo SKIP)
        # FIXED: Use correct column names (product_type, ml_category, ml_confidence)
        # PERF: Use NOT EXISTS instead of NOT IN (P0 fix - single execution vs 4x)
        # PERF: Optimized eager loading - only fetch product_name (P2 fix)
        # Issue #457: SKIP corrections don't count as "processed" - they go to end of queue
        exists_real_correction = select(ProductCorrection.id).where(
            and_(
                ProductCorrection.sales_enrichment_id == SalesEnrichment.id,
                cast(ProductCorrection.correction_type, String) != "skip",
            )
        )
        base_query = (
            self.db.query(SalesEnrichment)
            .options(
                # Only load product_name from sales_data (avoid 4 automatic JOINs)
                joinedload(SalesEnrichment.sales_data),
                lazyload(SalesEnrichment.product_catalog),
                lazyload(SalesEnrichment.intercambiable_group),
                lazyload(SalesEnrichment.product_cluster),
            )
            .filter(SalesEnrichment.product_type == "venta_libre")
            .filter(~exists(exists_real_correction))
        )

        if pharmacy_id:
            from app.models.sales_data import SalesData
            base_query = base_query.join(SalesData).filter(
                SalesData.pharmacy_id == pharmacy_id
            )

        # Filtrar por productos recientes (para revisión semanal automatizada)
        if days_back is not None and days_back > 0:
            from datetime import datetime, timedelta, timezone
            cutoff_date = datetime.now(timezone.utc) - timedelta(days=days_back)
            base_query = base_query.filter(
                SalesEnrichment.enriched_at >= cutoff_date
            )

        # Calcular prioridad con CASE
        # FIXED: Use ml_category and ml_confidence instead of non-existent columns
        priority_case = case(
            # P1: Fallback categories
            (
                or_(
                    SalesEnrichment.ml_category.in_(self.FALLBACK_CATEGORIES),
                    SalesEnrichment.ml_category.is_(None),
                ),
                1,
            ),
            # P2: Low confidence
            (
                and_(
                    SalesEnrichment.ml_confidence.isnot(None),
                    SalesEnrichment.ml_confidence < self.LOW_CONFIDENCE_THRESHOLD,
                ),
                2,
            ),
            # P3: Resto
            else_=3,
        )

        # Issue #457: Detectar si producto tiene SKIP para ordenar al final
        exists_skip_correction = select(ProductCorrection.id).where(
            and_(
                ProductCorrection.sales_enrichment_id == SalesEnrichment.id,
                cast(ProductCorrection.correction_type, String) == "skip",
            )
        )
        # 0 = sin skip (primero), 1 = con skip (al final)
        skip_order_case = case(
            (exists(exists_skip_correction), 1),
            else_=0,
        )

        # Filtrar por prioridad específica si se solicita
        if priority == "P1":
            base_query = base_query.filter(
                or_(
                    SalesEnrichment.ml_category.in_(self.FALLBACK_CATEGORIES),
                    SalesEnrichment.ml_category.is_(None),
                )
            )
        elif priority == "P2":
            base_query = base_query.filter(
                and_(
                    ~SalesEnrichment.ml_category.in_(self.FALLBACK_CATEGORIES),
                    SalesEnrichment.ml_category.isnot(None),
                    SalesEnrichment.ml_confidence.isnot(None),
                    SalesEnrichment.ml_confidence < self.LOW_CONFIDENCE_THRESHOLD,
                )
            )
        elif priority == "P3":
            base_query = base_query.filter(
                and_(
                    ~SalesEnrichment.ml_category.in_(self.FALLBACK_CATEGORIES),
                    SalesEnrichment.ml_category.isnot(None),
                    or_(
                        SalesEnrichment.ml_confidence.is_(None),
                        SalesEnrichment.ml_confidence >= self.LOW_CONFIDENCE_THRESHOLD,
                    ),
                )
            )

        # Total y breakdown
        total = base_query.count()

        # Issue #457: Contar productos únicos si se pide deduplicación
        # PERF: Solo calcular unique_total cuando priority=None (para "Todas")
        # Para filtros específicos, no necesitamos el conteo exacto (muy lento con COUNT DISTINCT)
        unique_total = None
        if unique_products and priority is None:
            from app.models.sales_data import SalesData
            unique_total = (
                self.db.query(func.count(func.distinct(SalesData.product_name)))
                .select_from(SalesEnrichment)
                .join(SalesData, SalesData.id == SalesEnrichment.sales_data_id)
                .filter(SalesEnrichment.product_type == "venta_libre")
                .filter(~exists(exists_real_correction))
            )
            # Aplicar mismos filtros que base_query (pharmacy_id, days_back)
            if pharmacy_id:
                unique_total = unique_total.filter(SalesData.pharmacy_id == pharmacy_id)
            if days_back is not None and days_back > 0:
                from datetime import timedelta
                cutoff_date = datetime.now(timezone.utc) - timedelta(days=days_back)
                unique_total = unique_total.filter(SalesEnrichment.enriched_at >= cutoff_date)
            unique_total = unique_total.scalar() or 0

        # Breakdown por prioridad (solo si no hay filtro)
        # Issue #457: Mostrar productos ÚNICOS por prioridad (no raw records)
        breakdown = {}
        if priority is None:
            from app.models.sales_data import SalesData

            # P1 condition: fallback categories or None
            p1_condition = or_(
                SalesEnrichment.ml_category.in_(self.FALLBACK_CATEGORIES),
                SalesEnrichment.ml_category.is_(None),
            )
            # P2 condition: low confidence (not P1)
            p2_condition = and_(
                ~SalesEnrichment.ml_category.in_(self.FALLBACK_CATEGORIES),
                SalesEnrichment.ml_category.isnot(None),
                SalesEnrichment.ml_confidence.isnot(None),
                SalesEnrichment.ml_confidence < self.LOW_CONFIDENCE_THRESHOLD,
            )
            # P3 condition: rest (high confidence, specific category)
            p3_condition = and_(
                ~SalesEnrichment.ml_category.in_(self.FALLBACK_CATEGORIES),
                SalesEnrichment.ml_category.isnot(None),
                or_(
                    SalesEnrichment.ml_confidence.is_(None),
                    SalesEnrichment.ml_confidence >= self.LOW_CONFIDENCE_THRESHOLD,
                ),
            )

            # Count UNIQUE products per priority (consistent with unique_total)
            base_breakdown = (
                self.db.query(func.count(func.distinct(SalesData.product_name)))
                .select_from(SalesEnrichment)
                .join(SalesData, SalesData.id == SalesEnrichment.sales_data_id)
                .filter(SalesEnrichment.product_type == "venta_libre")
                .filter(~exists(exists_real_correction))
            )

            p1_count = base_breakdown.filter(p1_condition).scalar() or 0
            p2_count = base_breakdown.filter(p2_condition).scalar() or 0
            p3_count = base_breakdown.filter(p3_condition).scalar() or 0

            breakdown = {
                "P1_fallback": p1_count,
                "P2_low_confidence": p2_count,
                "P3_other": p3_count,
            }

        # Ordenar por prioridad y obtener items
        # DIAGNÓSTICO #3: Si P1 y P2 están vacíos, muestrear P3 aleatoriamente
        # para auditoría (evitar "alucinación masiva" del clasificador)
        p1_empty = breakdown.get("P1_fallback", 0) == 0 if breakdown else True
        p2_empty = breakdown.get("P2_low_confidence", 0) == 0 if breakdown else True

        # Issue #457: Deduplicar por product_name (evitar ver mismo producto 4841 veces)
        # OPTIMIZADO: Usar DISTINCT en query final en lugar de subquery pesada
        if unique_products:
            from app.models.sales_data import SalesData
            from sqlalchemy.orm import aliased

            # En lugar de subquery con GROUP BY (lento), usamos DISTINCT en la query final
            # y confiamos en que el ORDER BY + LIMIT nos dará items únicos suficientes
            # La deduplicación real se hace en memoria para el pequeño batch solicitado
            pass  # La deduplicación se aplicará después del fetch

        if p1_empty and p2_empty and priority is None:
            # Muestreo aleatorio de P3 para auditoría
            # Issue #457: Pero SKIP siempre al final
            from sqlalchemy.sql.expression import func as sql_func
            raw_items = (
                base_query.order_by(skip_order_case, sql_func.random())  # SKIP al final, luego aleatorio
                .limit(limit * 10 if unique_products else limit)  # Fetch more for dedup
                .all()
            )
            # Marcar que es muestreo de auditoría
            breakdown["audit_sampling"] = True
        else:
            # Orden normal por prioridad
            # Issue #457: SKIP siempre al final (skip_order_case=1), luego prioridad normal
            raw_items = (
                base_query.order_by(skip_order_case, priority_case, SalesEnrichment.created_at.desc())
                .offset(offset)
                .limit(limit * 10 if unique_products else limit)  # Fetch more for dedup
                .all()
            )

        # Issue #457: Deduplicación en memoria (mucho más rápido que SQL GROUP BY)
        if unique_products:
            seen_names = set()
            items = []
            for item in raw_items:
                product_name = item.sales_data.product_name if item.sales_data else None
                if product_name and product_name not in seen_names:
                    seen_names.add(product_name)
                    items.append(item)
                    if len(items) >= limit:
                        break
        else:
            items = raw_items[:limit]

        result = {
            "items": [self._serialize_for_review(item) for item in items],
            "total": total,
            "breakdown": breakdown,
            "offset": offset,
            "limit": limit,
        }
        # Issue #457: Añadir conteo de productos únicos
        if unique_total is not None:
            result["unique_total"] = unique_total
        return result

    def _serialize_for_review(self, enrichment: SalesEnrichment) -> dict:
        """Serializa un SalesEnrichment para la cola de revisión."""
        # FIXED: Access product_name through sales_data relationship
        product_name = None
        if enrichment.sales_data:
            product_name = enrichment.sales_data.product_name

        # FIXED: Use correct column names
        return {
            "id": str(enrichment.id),
            "product_name": product_name,
            "current_category": enrichment.ml_category,
            "confidence_score": float(enrichment.ml_confidence) if enrichment.ml_confidence else None,
            "prediction_source": self._infer_prediction_source(enrichment),
            "brand_detected": enrichment.detected_brand,
            "sales_data_id": str(enrichment.sales_data_id) if enrichment.sales_data_id else None,
            "created_at": enrichment.created_at.isoformat() if enrichment.created_at else None,
        }

    def _infer_prediction_source(self, enrichment: SalesEnrichment) -> str:
        """Infiere la fuente de predicción basándose en el estado del enrichment."""
        # FIXED: Use correct column names (ml_category, detected_brand)
        if enrichment.ml_category in self.FALLBACK_CATEGORIES or enrichment.ml_category is None:
            return PredictionSource.FALLBACK.value
        if enrichment.detected_brand:
            return PredictionSource.BRAND.value
        # Por defecto, asumimos tier1_generic
        return PredictionSource.TIER1_GENERIC.value

    def _get_product_name(self, enrichment: SalesEnrichment) -> str:
        """Helper to get product name from sales_data relationship."""
        if enrichment.sales_data:
            return enrichment.sales_data.product_name or ""
        return ""

    def approve_classification(
        self,
        sales_enrichment_id: UUID,
        pharmacy_id: Optional[UUID] = None,
        reviewer_notes: Optional[str] = None,
    ) -> ProductCorrection:
        """
        Aprueba la clasificación actual del sistema.

        El farmacéutico confirma que el clasificador acertó.
        """
        # FIXED: Use db.get() instead of deprecated query().get()
        enrichment = self.db.get(SalesEnrichment, sales_enrichment_id)
        if not enrichment:
            raise ValueError(f"SalesEnrichment {sales_enrichment_id} not found")

        # Check for existing correction
        existing = (
            self.db.query(ProductCorrection)
            .filter(ProductCorrection.sales_enrichment_id == sales_enrichment_id)
            .first()
        )
        if existing:
            raise ValueError(f"Correction already exists for SalesEnrichment {sales_enrichment_id}")

        # Use .value for string columns (avoid SQLAlchemy enum name/value mismatch)
        correction = ProductCorrection(
            sales_enrichment_id=sales_enrichment_id,
            product_name=self._get_product_name(enrichment),
            predicted_category=enrichment.ml_category or "unknown",
            prediction_source=self._infer_prediction_source(enrichment),  # Already returns string
            confidence_score=float(enrichment.ml_confidence) if enrichment.ml_confidence else 0.0,
            correction_type=CorrectionType.APPROVE.value,  # Use .value for string column
            corrected_category=None,  # No hay corrección, sistema acertó
            reviewer_notes=reviewer_notes,
            pharmacy_id=pharmacy_id,
            created_at=datetime.now(timezone.utc),
            processed_for_rules=False,
        )

        try:
            self.db.add(correction)
            self.db.commit()
            self.db.refresh(correction)
        except Exception:
            self.db.rollback()
            raise

        return correction

    def correct_classification(
        self,
        sales_enrichment_id: UUID,
        corrected_category: str,
        pharmacy_id: Optional[UUID] = None,
        reviewer_notes: Optional[str] = None,
    ) -> ProductCorrection:
        """
        Corrige la clasificación del sistema.

        El farmacéutico indica la categoría correcta.
        """
        # FIXED: Use db.get() instead of deprecated query().get()
        enrichment = self.db.get(SalesEnrichment, sales_enrichment_id)
        if not enrichment:
            raise ValueError(f"SalesEnrichment {sales_enrichment_id} not found")

        # Check for existing correction
        existing = (
            self.db.query(ProductCorrection)
            .filter(ProductCorrection.sales_enrichment_id == sales_enrichment_id)
            .first()
        )
        if existing:
            raise ValueError(f"Correction already exists for SalesEnrichment {sales_enrichment_id}")

        # Use .value for string columns (avoid SQLAlchemy enum name/value mismatch)
        correction = ProductCorrection(
            sales_enrichment_id=sales_enrichment_id,
            product_name=self._get_product_name(enrichment),
            predicted_category=enrichment.ml_category or "unknown",
            prediction_source=self._infer_prediction_source(enrichment),  # Already returns string
            confidence_score=float(enrichment.ml_confidence) if enrichment.ml_confidence else 0.0,
            correction_type=CorrectionType.CORRECT.value,  # Use .value for string column
            corrected_category=corrected_category,
            reviewer_notes=reviewer_notes,
            pharmacy_id=pharmacy_id,
            created_at=datetime.now(timezone.utc),
            processed_for_rules=False,
        )

        try:
            self.db.add(correction)

            # FIXED: Update with correct column names
            enrichment.ml_category = corrected_category
            enrichment.ml_confidence = 1.0  # Confianza máxima (humano)

            self.db.commit()
            self.db.refresh(correction)
        except Exception:
            self.db.rollback()
            raise

        return correction

    def mark_as_outlier(
        self,
        sales_enrichment_id: UUID,
        pharmacy_id: Optional[UUID] = None,
        reviewer_notes: Optional[str] = None,
        outlier_reason: Optional[OutlierReason] = None,
    ) -> ProductCorrection:
        """
        Marca un producto como outlier (no encaja en ninguna categoría).

        Issue #457: Now includes outlier_reason and outlier_status for
        later investigation by dev team.

        Args:
            sales_enrichment_id: Product to mark
            pharmacy_id: Optional pharmacy context
            reviewer_notes: Optional notes from reviewer
            outlier_reason: Why this is an outlier (pack, ambiguous, etc.)

        Returns:
            Created ProductCorrection
        """
        enrichment = self.db.get(SalesEnrichment, sales_enrichment_id)
        if not enrichment:
            raise ValueError(f"SalesEnrichment {sales_enrichment_id} not found")

        # Check for existing correction
        existing = (
            self.db.query(ProductCorrection)
            .filter(ProductCorrection.sales_enrichment_id == sales_enrichment_id)
            .first()
        )
        if existing:
            raise ValueError(f"Correction already exists for SalesEnrichment {sales_enrichment_id}")

        # Use .value for string columns (avoid SQLAlchemy enum name/value mismatch)
        correction = ProductCorrection(
            sales_enrichment_id=sales_enrichment_id,
            product_name=self._get_product_name(enrichment),
            predicted_category=enrichment.ml_category or "unknown",
            prediction_source=self._infer_prediction_source(enrichment),  # Already returns string
            confidence_score=float(enrichment.ml_confidence) if enrichment.ml_confidence else 0.0,
            correction_type=CorrectionType.OUTLIER.value,  # Use .value for string column
            corrected_category=None,  # Outlier no tiene categoría correcta
            reviewer_notes=reviewer_notes,
            pharmacy_id=pharmacy_id,
            created_at=datetime.now(timezone.utc),
            processed_for_rules=False,
            # Issue #457: Outlier investigation fields - use .value for string columns
            outlier_reason=(outlier_reason.value if outlier_reason else OutlierReason.OTRO.value),
            outlier_status=OutlierStatus.PENDING_REVIEW.value,
        )

        try:
            self.db.add(correction)
            self.db.commit()
            self.db.refresh(correction)
        except Exception:
            self.db.rollback()
            raise

        return correction

    def skip_product(
        self,
        sales_enrichment_id: UUID,
        pharmacy_id: Optional[UUID] = None,
        reviewer_notes: Optional[str] = None,
    ) -> ProductCorrection:
        """
        Salta un producto temporalmente, enviándolo al final de la cola.

        Issue #457: El producto no se pierde, simplemente se marca como
        "skip" y aparecerá al final de la cola de revisión.

        Args:
            sales_enrichment_id: Product to skip
            pharmacy_id: Optional pharmacy context
            reviewer_notes: Optional reason for skipping

        Returns:
            Created ProductCorrection with type SKIP
        """
        enrichment = self.db.get(SalesEnrichment, sales_enrichment_id)
        if not enrichment:
            raise ValueError(f"SalesEnrichment {sales_enrichment_id} not found")

        # Check for existing correction - if SKIP, update timestamp; else error
        existing = (
            self.db.query(ProductCorrection)
            .filter(ProductCorrection.sales_enrichment_id == sales_enrichment_id)
            .first()
        )
        if existing:
            if existing.correction_type == CorrectionType.SKIP.value:
                # Already skipped - update timestamp to push to end of queue again
                existing.created_at = datetime.now(timezone.utc)
                if reviewer_notes:
                    existing.reviewer_notes = reviewer_notes
                self.db.commit()
                self.db.refresh(existing)
                return existing
            else:
                raise ValueError(f"Correction already exists for SalesEnrichment {sales_enrichment_id}")

        correction = ProductCorrection(
            sales_enrichment_id=sales_enrichment_id,
            product_name=self._get_product_name(enrichment),
            predicted_category=enrichment.ml_category or "unknown",
            prediction_source=self._infer_prediction_source(enrichment),
            confidence_score=float(enrichment.ml_confidence) if enrichment.ml_confidence else 0.0,
            correction_type=CorrectionType.SKIP.value,
            corrected_category=None,  # Skip no tiene categoría
            reviewer_notes=reviewer_notes,
            pharmacy_id=pharmacy_id,
            created_at=datetime.now(timezone.utc),
            processed_for_rules=False,
        )

        try:
            self.db.add(correction)
            self.db.commit()
            self.db.refresh(correction)
        except Exception:
            self.db.rollback()
            raise

        return correction

    def unskip_product(self, sales_enrichment_id: UUID) -> bool:
        """
        Elimina el skip de un producto para que vuelva a la cola normal.

        Returns:
            True if unskipped, False if not found or not skipped
        """
        existing = (
            self.db.query(ProductCorrection)
            .filter(ProductCorrection.sales_enrichment_id == sales_enrichment_id)
            .filter(cast(ProductCorrection.correction_type, String) == "skip")
            .first()
        )
        if existing:
            self.db.delete(existing)
            self.db.commit()
            return True
        return False

    def get_corrections_stats(self, pharmacy_id: Optional[UUID] = None) -> dict:
        """Obtiene estadísticas de correcciones."""
        query = self.db.query(ProductCorrection)

        if pharmacy_id:
            query = query.filter(ProductCorrection.pharmacy_id == pharmacy_id)

        total = query.count()
        # Use cast() to convert enum to string for comparison
        # PostgreSQL stores lowercase values but SQLAlchemy sends enum NAMES (uppercase)
        approved = query.filter(
            cast(ProductCorrection.correction_type, String) == "approve"
        ).count()
        corrected = query.filter(
            cast(ProductCorrection.correction_type, String) == "correct"
        ).count()
        outliers = query.filter(
            cast(ProductCorrection.correction_type, String) == "outlier"
        ).count()

        # Accuracy del clasificador basada en correcciones
        accuracy = approved / total if total > 0 else None

        return {
            "total_corrections": total,
            "approved": approved,
            "corrected": corrected,
            "outliers": outliers,
            "classifier_accuracy": round(accuracy * 100, 1) if accuracy else None,
        }

    def get_actionable_corrections(self, limit: int = 100) -> list:
        """
        Obtiene correcciones que pueden generar nuevas reglas.

        Solo correcciones de tipo CORRECT que no se han procesado aún.
        """
        return (
            self.db.query(ProductCorrection)
            .filter(cast(ProductCorrection.correction_type, String) == "correct")
            .filter(ProductCorrection.processed_for_rules.is_(False))
            .filter(ProductCorrection.corrected_category.isnot(None))
            .order_by(ProductCorrection.created_at.desc())
            .limit(limit)
            .all()
        )

    def mark_as_processed(
        self, correction_id: int, suggested_rule: Optional[str] = None
    ) -> None:
        """Marca una corrección como procesada para generación de reglas."""
        correction = self.db.get(ProductCorrection, correction_id)
        if correction:
            correction.processed_for_rules = True
            correction.processed_at = datetime.now(timezone.utc)
            if suggested_rule:
                correction.suggested_rule = suggested_rule
            try:
                self.db.commit()
            except Exception:
                self.db.rollback()
                raise

    # ==========================================================================
    # Issue #457: OUTLIER INVESTIGATION SYSTEM
    # ==========================================================================

    def get_outliers_pending(
        self,
        status: Optional[OutlierStatus] = None,
        reason: Optional[OutlierReason] = None,
        limit: int = 50,
        offset: int = 0,
    ) -> dict:
        """
        Obtiene outliers pendientes de investigación.

        Issue #457: Los outliers ya no son "basura en la BD". Ahora tienen
        un flujo de investigación que permite detectar categorías faltantes.

        Args:
            status: Filtrar por estado (None = pending + investigating)
            reason: Filtrar por razón de outlier
            limit: Máximo de resultados
            offset: Paginación

        Returns:
            dict con items, total, y breakdown por status/reason
        """
        # Use cast() to convert enum to string for comparison
        # PostgreSQL stores lowercase values but SQLAlchemy sends enum NAMES (uppercase)
        query = (
            self.db.query(ProductCorrection)
            .filter(cast(ProductCorrection.correction_type, String) == "outlier")
        )

        # Filter by status - use cast() for enum comparison
        if status:
            query = query.filter(
                cast(ProductCorrection.outlier_status, String) == status.value
            )
        else:
            # Default: show pending and investigating (not resolved)
            query = query.filter(
                or_(
                    cast(ProductCorrection.outlier_status, String) == "pending_review",
                    cast(ProductCorrection.outlier_status, String) == "investigating",
                    ProductCorrection.outlier_status.is_(None),  # Legacy
                )
            )

        # Filter by reason - use cast() for enum comparison
        if reason:
            query = query.filter(
                cast(ProductCorrection.outlier_reason, String) == reason.value
            )

        total = query.count()

        # Get items
        items = (
            query.order_by(ProductCorrection.created_at.desc())
            .offset(offset)
            .limit(limit)
            .all()
        )

        # Breakdown by status and reason - use cast() for enum comparison
        status_breakdown = {}
        for s in OutlierStatus:
            count = (
                self.db.query(func.count(ProductCorrection.id))
                .filter(cast(ProductCorrection.correction_type, String) == "outlier")
                .filter(cast(ProductCorrection.outlier_status, String) == s.value)
                .scalar()
            )
            status_breakdown[s.value] = count or 0

        reason_breakdown = {}
        for r in OutlierReason:
            count = (
                self.db.query(func.count(ProductCorrection.id))
                .filter(cast(ProductCorrection.correction_type, String) == "outlier")
                .filter(cast(ProductCorrection.outlier_reason, String) == r.value)
                .scalar()
            )
            reason_breakdown[r.value] = count or 0

        return {
            "items": [self._serialize_outlier(item) for item in items],
            "total": total,
            "status_breakdown": status_breakdown,
            "reason_breakdown": reason_breakdown,
            "offset": offset,
            "limit": limit,
        }

    def _serialize_outlier(self, correction: ProductCorrection) -> dict:
        """Serializa un outlier para la API."""
        # prediction_source is stored as string in DB, handle both enum and string
        prediction_source = correction.prediction_source
        if prediction_source and hasattr(prediction_source, 'value'):
            prediction_source = prediction_source.value

        # outlier_reason/status may be enum or string depending on how created
        outlier_reason = correction.outlier_reason
        if outlier_reason and hasattr(outlier_reason, 'value'):
            outlier_reason = outlier_reason.value

        outlier_status = correction.outlier_status
        if outlier_status and hasattr(outlier_status, 'value'):
            outlier_status = outlier_status.value

        return {
            "id": correction.id,
            "product_name": correction.product_name,
            "predicted_category": correction.predicted_category,
            "prediction_source": prediction_source,
            "confidence_score": correction.confidence_score,
            "outlier_reason": outlier_reason,
            "outlier_status": outlier_status,
            "reviewer_notes": correction.reviewer_notes,
            "outlier_resolution_notes": correction.outlier_resolution_notes,
            "created_at": correction.created_at.isoformat() if correction.created_at else None,
            "outlier_resolved_at": correction.outlier_resolved_at.isoformat() if correction.outlier_resolved_at else None,
        }

    def resolve_outlier(
        self,
        correction_id: int,
        new_status: OutlierStatus,
        resolution_notes: Optional[str] = None,
        new_category: Optional[str] = None,
    ) -> ProductCorrection:
        """
        Resuelve un outlier cambiando su estado.

        Issue #457: Flujo de resolución:
        - RESOLVED_NEW_CATEGORY: Se creó una nueva categoría para este tipo
        - RESOLVED_CORRECTED: Era clasificable, se corrigió el clasificador
        - TRUE_OUTLIER: Confirmado como no clasificable (pack, descatalogado)

        Args:
            correction_id: ID del outlier
            new_status: Nuevo estado
            resolution_notes: Notas de resolución
            new_category: Si se corrigió, qué categoría se asignó

        Returns:
            ProductCorrection actualizado
        """
        correction = self.db.get(ProductCorrection, correction_id)
        if not correction:
            raise ValueError(f"ProductCorrection {correction_id} not found")

        if not correction.is_outlier:
            raise ValueError(f"ProductCorrection {correction_id} is not an outlier")

        correction.outlier_status = new_status.value  # Use .value for string column
        correction.outlier_resolution_notes = resolution_notes
        correction.outlier_resolved_at = datetime.now(timezone.utc)

        # If corrected, update the corrected_category
        if new_status == OutlierStatus.RESOLVED_CORRECTED and new_category:
            correction.corrected_category = new_category

        try:
            self.db.commit()
            self.db.refresh(correction)
        except Exception:
            self.db.rollback()
            raise

        return correction

    def update_outlier_status(
        self,
        correction_id: int,
        new_status: OutlierStatus,
    ) -> ProductCorrection:
        """
        Actualiza solo el estado de un outlier (sin resolverlo).

        Útil para marcar como "investigating" sin cerrar.
        """
        correction = self.db.get(ProductCorrection, correction_id)
        if not correction:
            raise ValueError(f"ProductCorrection {correction_id} not found")

        if not correction.is_outlier:
            raise ValueError(f"ProductCorrection {correction_id} is not an outlier")

        correction.outlier_status = new_status.value  # Use .value for string column

        try:
            self.db.commit()
            self.db.refresh(correction)
        except Exception:
            self.db.rollback()
            raise

        return correction

    # ==========================================================================
    # Issue #457: BULK OPERATIONS BY PRODUCT NAME
    # When a product has multiple SalesEnrichment records (e.g., same product
    # sold multiple times), applying correction to ONE leaves others in queue.
    # These methods apply corrections to ALL records with the same product_name.
    # ==========================================================================

    def _get_all_enrichments_by_product_name(
        self, sales_enrichment_id: UUID
    ) -> list[SalesEnrichment]:
        """
        Get all SalesEnrichment records that share the same product_name.

        Used for bulk operations - when you classify one product, we want to
        apply the same classification to ALL occurrences of that product.
        """
        from app.models.sales_data import SalesData

        # First get the product_name from the given enrichment
        enrichment = self.db.get(SalesEnrichment, sales_enrichment_id)
        if not enrichment or not enrichment.sales_data:
            return []

        product_name = enrichment.sales_data.product_name
        if not product_name:
            return [enrichment]  # Fall back to just this one

        # Find ALL enrichments with the same product_name that don't have corrections
        exists_correction = select(ProductCorrection.id).where(
            ProductCorrection.sales_enrichment_id == SalesEnrichment.id
        )

        all_enrichments = (
            self.db.query(SalesEnrichment)
            .join(SalesData, SalesData.id == SalesEnrichment.sales_data_id)
            .filter(SalesData.product_name == product_name)
            .filter(SalesEnrichment.product_type == "venta_libre")
            .filter(~exists(exists_correction))
            .all()
        )

        return all_enrichments if all_enrichments else [enrichment]

    def bulk_approve_by_product_name(
        self,
        sales_enrichment_id: UUID,
        pharmacy_id: Optional[UUID] = None,
        reviewer_notes: Optional[str] = None,
    ) -> int:
        """
        Approve classification for ALL SalesEnrichment records with the same product_name.

        Returns the number of records approved.
        """
        enrichments = self._get_all_enrichments_by_product_name(sales_enrichment_id)
        count = 0

        for enrichment in enrichments:
            # Skip if already has a correction
            existing = (
                self.db.query(ProductCorrection)
                .filter(ProductCorrection.sales_enrichment_id == enrichment.id)
                .first()
            )
            if existing:
                continue

            correction = ProductCorrection(
                sales_enrichment_id=enrichment.id,
                product_name=self._get_product_name(enrichment),
                predicted_category=enrichment.ml_category or "unknown",
                prediction_source=self._infer_prediction_source(enrichment),
                confidence_score=float(enrichment.ml_confidence) if enrichment.ml_confidence else 0.0,
                correction_type=CorrectionType.APPROVE.value,
                corrected_category=None,
                reviewer_notes=reviewer_notes,
                pharmacy_id=pharmacy_id,
                created_at=datetime.now(timezone.utc),
                processed_for_rules=False,
            )
            self.db.add(correction)
            count += 1

        try:
            self.db.commit()
        except Exception:
            self.db.rollback()
            raise

        return count

    def bulk_correct_by_product_name(
        self,
        sales_enrichment_id: UUID,
        corrected_category: str,
        pharmacy_id: Optional[UUID] = None,
        reviewer_notes: Optional[str] = None,
    ) -> int:
        """
        Correct classification for ALL SalesEnrichment records with the same product_name.

        Returns the number of records corrected.
        """
        enrichments = self._get_all_enrichments_by_product_name(sales_enrichment_id)
        count = 0

        for enrichment in enrichments:
            # Skip if already has a correction
            existing = (
                self.db.query(ProductCorrection)
                .filter(ProductCorrection.sales_enrichment_id == enrichment.id)
                .first()
            )
            if existing:
                continue

            correction = ProductCorrection(
                sales_enrichment_id=enrichment.id,
                product_name=self._get_product_name(enrichment),
                predicted_category=enrichment.ml_category or "unknown",
                prediction_source=self._infer_prediction_source(enrichment),
                confidence_score=float(enrichment.ml_confidence) if enrichment.ml_confidence else 0.0,
                correction_type=CorrectionType.CORRECT.value,
                corrected_category=corrected_category,
                reviewer_notes=reviewer_notes,
                pharmacy_id=pharmacy_id,
                created_at=datetime.now(timezone.utc),
                processed_for_rules=False,
            )
            self.db.add(correction)

            # Update the enrichment with the corrected category
            enrichment.ml_category = corrected_category
            enrichment.ml_confidence = 1.0  # Human confidence

            count += 1

        try:
            self.db.commit()
        except Exception:
            self.db.rollback()
            raise

        return count

    def bulk_mark_outlier_by_product_name(
        self,
        sales_enrichment_id: UUID,
        pharmacy_id: Optional[UUID] = None,
        reviewer_notes: Optional[str] = None,
        outlier_reason: Optional[OutlierReason] = None,
    ) -> int:
        """
        Mark ALL SalesEnrichment records with the same product_name as outliers.

        Returns the number of records marked.
        """
        enrichments = self._get_all_enrichments_by_product_name(sales_enrichment_id)
        count = 0

        for enrichment in enrichments:
            # Skip if already has a correction
            existing = (
                self.db.query(ProductCorrection)
                .filter(ProductCorrection.sales_enrichment_id == enrichment.id)
                .first()
            )
            if existing:
                continue

            correction = ProductCorrection(
                sales_enrichment_id=enrichment.id,
                product_name=self._get_product_name(enrichment),
                predicted_category=enrichment.ml_category or "unknown",
                prediction_source=self._infer_prediction_source(enrichment),
                confidence_score=float(enrichment.ml_confidence) if enrichment.ml_confidence else 0.0,
                correction_type=CorrectionType.OUTLIER.value,
                corrected_category=None,
                reviewer_notes=reviewer_notes,
                pharmacy_id=pharmacy_id,
                created_at=datetime.now(timezone.utc),
                processed_for_rules=False,
                outlier_reason=(outlier_reason.value if outlier_reason else OutlierReason.OTRO.value),
                outlier_status=OutlierStatus.PENDING_REVIEW.value,
            )
            self.db.add(correction)
            count += 1

        try:
            self.db.commit()
        except Exception:
            self.db.rollback()
            raise

        return count
