﻿# backend/app/services/reenrichment_service.py
"""
Servicio de Re-Enriquecimiento Incremental
Detecta cambios en el catálogo y actualiza datos de ventas afectados
"""

from datetime import datetime
from typing import Any, Dict, List, Optional

import structlog
from sqlalchemy import func, text
from sqlalchemy.orm import Session

from app.database import SessionLocal
from app.models import ProductCatalog, SalesData
from app.utils.datetime_utils import utc_now

logger = structlog.get_logger()


class ReEnrichmentService:
    """
    Servicio para re-enriquecer datos de ventas cuando el catálogo se actualiza.

    Estrategia:
    1. Trackear última fecha de enriquecimiento por farmacia
    2. Detectar productos actualizados en catálogo después de esa fecha
    3. Re-procesar solo ventas de productos modificados
    4. Actualizar estadísticas de enriquecimiento
    """

    def __init__(self, db_session: Optional[Session] = None):
        self.db = db_session or SessionLocal()
        self.batch_size = 1000

    def _derive_product_type(self, product_catalog: ProductCatalog) -> str:
        """
        Determina el tipo de venta basado en clasificación de prescripción.
        Replica la lógica de EnrichmentService._derive_product_type para consistencia.

        Regla:
        - Tiene xfarma_prescription_category (excepto VETERINARIA) → "prescription"
        - VETERINARIA → depende de cima_requiere_receta
        - NO tiene xfarma_prescription_category (NULL) → "venta_libre"
        """
        # EXCEPCIÓN: Categoría VETERINARIA
        if product_catalog.xfarma_prescription_category == "VETERINARIA":
            if product_catalog.cima_requiere_receta is True:
                return "prescription"
            elif product_catalog.cima_requiere_receta is False:
                return "venta_libre"
            else:
                return "prescription"  # NULL: asumir prescription por seguridad

        # REGLA GENERAL: Tener categoría → prescription
        elif product_catalog.xfarma_prescription_category is not None:
            return "prescription"

        # Sin categoría (NULL) → Venta Libre
        else:
            return "venta_libre"

    def detect_catalog_changes(self, since: datetime) -> List[str]:
        """
        Detecta qué productos del catálogo han cambiado desde una fecha

        Returns:
            Lista de códigos nacionales que han sido actualizados
        """
        logger.info(f"Detectando cambios en catálogo desde {since}")

        updated_products = self.db.query(ProductCatalog.national_code).filter(ProductCatalog.updated_at > since).all()

        changed_codes = [p.national_code for p in updated_products]
        logger.info(f"Encontrados {len(changed_codes)} productos actualizados")

        return changed_codes

    def get_last_enrichment_date(self, pharmacy_id: str) -> Optional[datetime]:
        """
        Obtiene la última fecha de enriquecimiento para una farmacia
        """
        # Buscar en metadatos del sistema o en una tabla de tracking
        # Por ahora usamos la fecha más antigua de carga de datos
        oldest_sale = (
            self.db.query(func.min(SalesData.created_at)).filter(SalesData.pharmacy_id == pharmacy_id).scalar()
        )

        return oldest_sale

    def identify_stale_enrichments(self, pharmacy_id: str) -> Dict[str, Any]:
        """
        Identifica datos de ventas que necesitan re-enriquecimiento
        """
        stats = {
            "pharmacy_id": pharmacy_id,
            "total_sales": 0,
            "needs_update": 0,
            "products_to_update": set(),
            "reasons": [],
        }

        # Obtener última sincronización
        last_enrichment = self.get_last_enrichment_date(pharmacy_id)
        if not last_enrichment:
            return stats

        # Productos actualizados después del último enriquecimiento
        updated_products = self.detect_catalog_changes(last_enrichment)

        if updated_products:
            # Contar ventas afectadas
            affected_sales = (
                self.db.query(func.count(SalesData.id))
                .filter(
                    SalesData.pharmacy_id == pharmacy_id,
                    SalesData.codigo_nacional.in_(updated_products),
                )
                .scalar()
            )

            stats["needs_update"] = affected_sales
            stats["products_to_update"] = set(updated_products)
            stats["reasons"].append(f"Catálogo actualizado para {len(updated_products)} productos")

        # Detectar productos sin enriquecer (nuevos en catálogo)
        missing_enrichment = (
            self.db.query(SalesData.codigo_nacional, func.count(SalesData.id).label("count"))
            .outerjoin(
                ProductCatalog,
                SalesData.codigo_nacional == ProductCatalog.national_code,
            )
            .filter(
                SalesData.pharmacy_id == pharmacy_id,
                SalesData.codigo_nacional.isnot(None),
                ProductCatalog.id.is_(None),
            )
            .group_by(SalesData.codigo_nacional)
            .all()
        )

        if missing_enrichment:
            missing_codes = [m.codigo_nacional for m in missing_enrichment]
            missing_count = sum(m.count for m in missing_enrichment)

            stats["needs_update"] += missing_count
            stats["products_to_update"].update(missing_codes)
            stats["reasons"].append(f"{len(missing_codes)} productos sin enriquecer")

        stats["total_sales"] = (
            self.db.query(func.count(SalesData.id)).filter(SalesData.pharmacy_id == pharmacy_id).scalar()
        )

        return stats

    def re_enrich_sales(self, pharmacy_id: str, product_codes: List[str]) -> Dict[str, Any]:
        """
        Re-enriquece ventas específicas con datos actualizados del catálogo
        """
        results = {"processed": 0, "updated": 0, "errors": 0, "details": []}

        logger.info(f"Re-enriqueciendo {len(product_codes)} productos para farmacia {pharmacy_id}")

        for batch_start in range(0, len(product_codes), self.batch_size):
            batch = product_codes[batch_start : batch_start + self.batch_size]

            try:
                # Obtener datos actualizados del catálogo
                catalog_data = self.db.query(ProductCatalog).filter(ProductCatalog.national_code.in_(batch)).all()

                catalog_map = {c.national_code: c for c in catalog_data}

                # Actualizar ventas con nuevos datos
                sales_to_update = (
                    self.db.query(SalesData)
                    .filter(
                        SalesData.pharmacy_id == pharmacy_id,
                        SalesData.codigo_nacional.in_(batch),
                    )
                    .all()
                )

                for sale in sales_to_update:
                    if sale.codigo_nacional in catalog_map:
                        catalog = catalog_map[sale.codigo_nacional]

                        # Buscar registro de enriquecimiento existente
                        from app.models.sales_enrichment import SalesEnrichment

                        enrichment = (
                            self.db.query(SalesEnrichment).filter(SalesEnrichment.sales_data_id == sale.id).first()
                        )

                        if enrichment:
                            # Actualizar registro existente con datos del catálogo actualizado
                            enrichment.enrichment_source = catalog.data_sources or "nomenclator"
                            enrichment.enriched_at = utc_now()
                            enrichment.last_updated = utc_now()
                            enrichment.product_catalog_id = catalog.id
                            # CRÍTICO: Siempre recalcular product_type al re-enriquecer
                            enrichment.product_type = self._derive_product_type(catalog)
                            # CRÍTICO: Actualizar status de manual_review a enriched
                            if enrichment.enrichment_status == "manual_review":
                                enrichment.enrichment_status = "enriched"
                                enrichment.match_method = "codigo_nacional"
                                enrichment.match_confidence = 95
                                enrichment.manual_review_reason = None

                            # Actualizar categorías derivadas si cambió el catálogo
                            if hasattr(catalog, "nomen_codigo_homogeneo"):
                                enrichment.has_generic_alternative = bool(catalog.nomen_codigo_homogeneo)

                            results["updated"] += 1
                        else:
                            # Crear nuevo enriquecimiento si no existía
                            # CRÍTICO: Siempre incluir product_type al crear enrichment
                            new_enrichment = SalesEnrichment(
                                sales_data_id=sale.id,
                                product_catalog_id=catalog.id,
                                enrichment_source=catalog.data_sources or "nomenclator",
                                enrichment_status="enriched",
                                match_method="codigo_nacional",
                                match_confidence=95,
                                enriched_at=utc_now(),
                                product_type=self._derive_product_type(catalog),
                            )
                            self.db.add(new_enrichment)
                            results["updated"] += 1

                    results["processed"] += 1

                self.db.commit()
                logger.info(f"Batch {batch_start//self.batch_size + 1} procesado: {len(batch)} productos")

            except Exception as e:
                logger.error(f"Error en batch {batch_start//self.batch_size + 1}: {str(e)}")
                results["errors"] += len(batch)
                self.db.rollback()

        return results

    def schedule_re_enrichment(self, pharmacy_id: str = None) -> Dict[str, Any]:
        """
        Programa re-enriquecimiento para farmacias que lo necesiten
        """
        report = {
            "timestamp": utc_now().isoformat(),
            "pharmacies_analyzed": 0,
            "updates_needed": 0,
            "updates_performed": 0,
            "details": [],
        }

        # Si se especifica farmacia, solo esa
        if pharmacy_id:
            pharmacy_ids = [pharmacy_id]
        else:
            # Todas las farmacias con datos
            pharmacy_ids = self.db.query(SalesData.pharmacy_id.distinct()).all()
            pharmacy_ids = [p[0] for p in pharmacy_ids]

        report["pharmacies_analyzed"] = len(pharmacy_ids)

        for pid in pharmacy_ids:
            logger.info(f"Analizando farmacia {pid}")

            # Identificar datos obsoletos
            stale_data = self.identify_stale_enrichments(pid)

            if stale_data["needs_update"] > 0:
                report["updates_needed"] += 1

                logger.info(f"Farmacia {pid} necesita actualizar {stale_data['needs_update']} registros")

                # Ejecutar re-enriquecimiento
                if stale_data["products_to_update"]:
                    results = self.re_enrich_sales(pid, list(stale_data["products_to_update"]))

                    if results["updated"] > 0:
                        report["updates_performed"] += 1

                    report["details"].append(
                        {
                            "pharmacy_id": pid,
                            "stale_records": stale_data["needs_update"],
                            "updated_records": results["updated"],
                            "reasons": stale_data["reasons"],
                        }
                    )

        logger.info(f"Re-enriquecimiento completado: {report['updates_performed']} farmacias actualizadas")

        return report

    def process_pending_changes(self) -> Dict[str, Any]:
        """
        Procesa cambios pendientes de catalog_change_log y ejecuta re-enriquecimiento
        """
        results = {
            "processed_changes": 0,
            "affected_pharmacies": set(),
            "updated_records": 0,
            "errors": 0,
        }

        logger.info("Procesando cambios pendientes del catálogo")

        # Obtener cambios sin procesar
        pending_changes = self.db.execute(
            text(
                """
            SELECT national_code, change_type, changed_at, id
            FROM catalog_change_log
            WHERE processed = false
            ORDER BY changed_at DESC
        """
            )
        ).fetchall()

        if not pending_changes:
            logger.info("No hay cambios pendientes para procesar")
            return results

        logger.info(f"Encontrados {len(pending_changes)} cambios pendientes")

        # Agrupar códigos nacionales únicos
        affected_codes = list(set([change.national_code for change in pending_changes]))

        # Obtener todas las farmacias que tienen ventas de estos productos
        pharmacies_with_sales = (
            self.db.query(SalesData.pharmacy_id.distinct()).filter(SalesData.codigo_nacional.in_(affected_codes)).all()
        )

        pharmacy_ids = [p[0] for p in pharmacies_with_sales]
        results["affected_pharmacies"] = set(pharmacy_ids)

        logger.info(f"Afectadas {len(pharmacy_ids)} farmacias con productos cambiados")

        # Re-enriquecer por farmacia
        for pharmacy_id in pharmacy_ids:
            try:
                reenrich_results = self.re_enrich_sales(pharmacy_id, affected_codes)
                results["updated_records"] += reenrich_results["updated"]
            except Exception as e:
                logger.error(f"Error re-enriqueciendo farmacia {pharmacy_id}: {str(e)}")
                results["errors"] += 1

        # Marcar cambios como procesados
        try:
            change_ids = [change.id for change in pending_changes]
            self.db.execute(
                text(
                    """
                UPDATE catalog_change_log
                SET processed = true
                WHERE id = ANY(:change_ids)
            """
                ),
                {"change_ids": change_ids},
            )

            results["processed_changes"] = len(pending_changes)
            self.db.commit()

            logger.info(
                f"Procesamiento completo: {results['updated_records']} registros actualizados en {len(pharmacy_ids)} farmacias"
            )

        except Exception as e:
            logger.error(f"Error marcando cambios como procesados: {str(e)}")
            self.db.rollback()
            results["errors"] += 1

        return results

    def redetect_brands(self, pharmacy_id: str = None, limit: int = None) -> Dict[str, Any]:
        """
        Re-detecta marcas en productos venta_libre usando BrandDetectionService.

        Issue #446: Permite actualizar detected_brand cuando se añaden nuevas marcas
        al servicio de detección sin necesidad de re-enriquecer todo.

        Args:
            pharmacy_id: Si se especifica, solo procesa esa farmacia
            limit: Limitar número de registros a procesar (para testing)

        Returns:
            Dict con estadísticas del proceso
        """
        from app.models.sales_enrichment import SalesEnrichment
        from app.services.brand_detection_service import (
            brand_detection_service,
            detect_brand_with_fallback,
            is_service_product,
            MARCA_BLANCA,
        )

        results = {
            "processed": 0,
            "updated": 0,
            "already_detected": 0,
            "no_brand_found": 0,
            "marca_blanca_assigned": 0,  # Issue #486: Track MARCA_BLANCA fallbacks
            "services_skipped": 0,  # Issue #486: Track service products skipped
            "errors": 0,
            "brands_found": {},
        }

        logger.info(
            "redetect_brands.start",
            pharmacy_id=pharmacy_id,
            limit=limit,
            brands_configured=len(brand_detection_service.brand_priority),
        )

        # Query base: venta_libre sin detected_brand
        query = (
            self.db.query(SalesEnrichment, SalesData.product_name)
            .join(SalesData, SalesData.id == SalesEnrichment.sales_data_id)
            .filter(
                SalesEnrichment.product_type == "venta_libre",
                SalesEnrichment.detected_brand.is_(None),
            )
        )

        if pharmacy_id:
            query = query.filter(SalesData.pharmacy_id == pharmacy_id)

        if limit:
            query = query.limit(limit)

        records = query.all()
        total = len(records)

        logger.info("redetect_brands.found_records", count=total)

        if total == 0:
            logger.info("redetect_brands.nothing_to_process")
            return results

        # Procesar en batches
        for i, (enrichment, product_name) in enumerate(records):
            try:
                # Guard against NULL product names from DB
                if not product_name:
                    results["errors"] += 1
                    results["processed"] += 1
                    continue

                # Issue #486: Skip service products (SPD, SERVICIO NUTRICION, etc.)
                if is_service_product(product_name):
                    results["services_skipped"] += 1
                    results["processed"] += 1
                    continue

                # Issue #486: Use detect_brand_with_fallback to apply MARCA_BLANCA
                # when no real brand is found but product has valid necesidad
                current_necesidad = enrichment.ml_category
                result = detect_brand_with_fallback(product_name, current_necesidad)

                if result.detected_brand:
                    enrichment.detected_brand = result.detected_brand
                    enrichment.brand_line = result.brand_line
                    enrichment.ml_subcategory = result.ml_subcategory

                    # Track if MARCA_BLANCA was assigned
                    if result.detected_brand == MARCA_BLANCA:
                        results["marca_blanca_assigned"] += 1
                    else:
                        # Actualizar ml_category si brand detection tiene mejor info
                        # (solo para marcas reales, no MARCA_BLANCA)
                        if result.necesidad:
                            current_category = enrichment.ml_category
                            if (
                                not current_category
                                or current_category in ["Otros", "Dermofarmacia"]
                                or (enrichment.ml_confidence and result.confidence > enrichment.ml_confidence)
                            ):
                                enrichment.ml_category = result.necesidad
                                enrichment.ml_confidence = result.confidence
                                enrichment.ml_model_version = "brand_detection_v1.0"

                    results["updated"] += 1

                    # Tracking de marcas encontradas
                    brand = result.detected_brand
                    results["brands_found"][brand] = results["brands_found"].get(brand, 0) + 1
                else:
                    results["no_brand_found"] += 1

                results["processed"] += 1

                # Commit cada batch
                if (i + 1) % self.batch_size == 0:
                    self.db.commit()
                    logger.info(
                        "redetect_brands.batch_progress",
                        processed=results["processed"],
                        total=total,
                        updated=results["updated"],
                    )

            except Exception as e:
                logger.error(
                    "redetect_brands.error",
                    product_name=product_name,
                    error=str(e),
                )
                results["errors"] += 1

        # Commit final
        self.db.commit()

        logger.info(
            "redetect_brands.complete",
            processed=results["processed"],
            updated=results["updated"],
            marca_blanca=results["marca_blanca_assigned"],  # Issue #486
            no_brand=results["no_brand_found"],
            errors=results["errors"],
        )

        return results

    def create_change_triggers(self):
        """
        Crea triggers en PostgreSQL para detectar cambios en el catálogo
        """
        trigger_sql = """
        -- Trigger para detectar cambios en product_catalog
        CREATE OR REPLACE FUNCTION notify_catalog_change()
        RETURNS trigger AS $$
        BEGIN
            -- Notificar que hubo cambios
            PERFORM pg_notify('catalog_updated', json_build_object(
                'action', TG_OP,
                'national_code', NEW.national_code,
                'timestamp', NOW()
            )::text);

            -- Insertar en tabla de tracking
            INSERT INTO catalog_change_log (
                national_code,
                change_type,
                changed_at,
                old_data_sources,
                new_data_sources
            ) VALUES (
                NEW.national_code,
                TG_OP,
                NOW(),
                OLD.data_sources,
                NEW.data_sources
            ) ON CONFLICT DO NOTHING;

            RETURN NEW;
        END;
        $$ LANGUAGE plpgsql;

        -- Aplicar trigger a la tabla
        DROP TRIGGER IF EXISTS catalog_change_trigger ON product_catalog;
        CREATE TRIGGER catalog_change_trigger
        AFTER INSERT OR UPDATE ON product_catalog
        FOR EACH ROW
        EXECUTE FUNCTION notify_catalog_change();

        -- Tabla para log de cambios
        CREATE TABLE IF NOT EXISTS catalog_change_log (
            id SERIAL PRIMARY KEY,
            national_code VARCHAR(20),
            change_type VARCHAR(10),
            changed_at TIMESTAMP DEFAULT NOW(),
            old_data_sources VARCHAR(100),
            new_data_sources VARCHAR(100),
            processed BOOLEAN DEFAULT FALSE,
            INDEX idx_change_log_unprocessed (processed, changed_at)
        );
        """

        try:
            self.db.execute(text(trigger_sql))
            self.db.commit()
            logger.info("Triggers de detección de cambios creados exitosamente")
            return True
        except Exception as e:
            logger.error(f"Error creando triggers: {str(e)}")
            self.db.rollback()
            return False
