﻿# backend/app/services/catalog_maintenance_service.py
"""
Servicio maestro para mantenimiento del catálogo de productos
Orquesta la sincronización desde múltiples fuentes de datos
"""

import json
import time
from collections import Counter
from typing import Any, Dict, Optional

import structlog
from sqlalchemy import and_, case, func, or_
from sqlalchemy.orm import Session

from app.exceptions import CatalogSyncError, ProcessingError
from app.models.nomenclator_local import NomenclatorLocal  # Moved to module level (Issue #268)
from app.services.atc_backfill_service import atc_backfill_service  # Issue #517: ATC backfill post-sync
from app.utils.datetime_utils import utc_now

# Usar structlog para mejor tracking
logger = structlog.get_logger()


class RealProgressTracker:
    """
    Sistema de tracking de progreso REAL para operaciones del catálogo.
    Elimina completamente el 'teatro' de progreso simulado.
    """

    def __init__(self, db: Session, component: str):
        self.db = db
        self.component = component
        self.start_time = time.time()
        self.last_update_time = self.start_time

        # Métricas reales
        self.total_items = 0
        self.processed_items = 0
        self.current_phase = ""
        self.current_chunk = 0
        self.total_chunks = 0
        self.errors_count = 0
        self.last_error_message = None  # Issue #211: Guardar último error específico
        self.items_per_second = 0.0

        # Para cálculo de velocidad
        self.speed_samples = []  # Últimas 10 mediciones de velocidad
        self.last_speed_calc_time = self.start_time
        self.last_speed_calc_items = 0

    def set_total(self, total: int, chunks: int = 1):
        """Establece el total de items a procesar."""
        self.total_items = total
        self.total_chunks = chunks
        logger.info(
            "progress.tracker.initialized",
            component=self.component,
            total_items=total,
            total_chunks=chunks,
        )

    def update_progress(
        self,
        processed: int,
        phase: str,
        chunk: Optional[int] = None,
        force_update: bool = False,
    ) -> Dict[str, Any]:
        """
        Actualiza el progreso con datos REALES.

        Args:
            processed: Número real de items procesados
            phase: Fase actual descriptiva
            chunk: Número de chunk actual (si aplica)
            force_update: Forzar actualización en DB aunque no haya pasado suficiente tiempo

        Returns:
            Diccionario con métricas de progreso
        """
        self.processed_items = processed
        self.current_phase = phase
        if chunk is not None:
            self.current_chunk = chunk

        # Calcular velocidad real cada 5 segundos o 100 items
        current_time = time.time()
        time_since_speed_calc = current_time - self.last_speed_calc_time
        items_since_speed_calc = processed - self.last_speed_calc_items

        if time_since_speed_calc >= 5 or items_since_speed_calc >= 100:
            if time_since_speed_calc > 0:
                current_speed = items_since_speed_calc / time_since_speed_calc
                self.speed_samples.append(current_speed)

                # Mantener solo las últimas 10 muestras
                if len(self.speed_samples) > 10:
                    self.speed_samples.pop(0)

                # Promedio de velocidad
                self.items_per_second = sum(self.speed_samples) / len(self.speed_samples)

                self.last_speed_calc_time = current_time
                self.last_speed_calc_items = processed

        # Calcular porcentaje real
        percentage = 0
        if self.total_items > 0:
            percentage = int((processed / self.total_items) * 100)

        # Calcular tiempo estimado restante basado en velocidad real
        eta_seconds = None
        if self.items_per_second > 0 and self.total_items > processed:
            remaining_items = self.total_items - processed
            eta_seconds = int(remaining_items / self.items_per_second)

        # Actualizar DB solo cada 3 segundos o cuando sea forzado
        time_since_last_update = current_time - self.last_update_time
        should_update_db = force_update or time_since_last_update >= 3

        if should_update_db:
            self._persist_progress(percentage, eta_seconds)
            self.last_update_time = current_time

        # Retornar métricas reales
        elapsed = int(current_time - self.start_time)

        return {
            "processed_items": self.processed_items,
            "total_items": self.total_items,
            "percentage": percentage,
            "current_phase": self.current_phase,
            "current_chunk": self.current_chunk,
            "total_chunks": self.total_chunks,
            "items_per_second": round(self.items_per_second, 2),
            "elapsed_seconds": elapsed,
            "eta_seconds": eta_seconds,
            "errors_count": self.errors_count,
        }

    def _persist_progress(self, percentage: int, eta_seconds: Optional[int]):
        """Persiste el progreso en la base de datos."""
        try:
            from app.models.system_status import SystemComponent, SystemStatus, SystemStatusEnum

            # Mapear componente a enum
            component_map = {
                "nomenclator": SystemComponent.NOMENCLATOR,
                "cima": SystemComponent.CIMA,
                "catalog": SystemComponent.CATALOG,
            }

            component_enum = component_map.get(self.component.lower())
            if not component_enum:
                return

            # Buscar o crear registro
            status = self.db.query(SystemStatus).filter_by(component=component_enum).first()
            if not status:
                status = SystemStatus(component=component_enum)
                self.db.add(status)

            # Actualizar con datos REALES
            status.status = SystemStatusEnum.UPDATING
            status.progress = percentage
            status.processed_items = self.processed_items
            status.total_items = self.total_items

            # Mensaje descriptivo con datos reales
            if self.total_chunks > 1:
                status.message = f"{self.current_phase} - Chunk {self.current_chunk}/{self.total_chunks} - {self.processed_items:,}/{self.total_items:,} items"
            else:
                status.message = (
                    f"{self.current_phase} - {self.processed_items:,}/{self.total_items:,} items procesados"
                )

            # Detalles en JSON con todas las métricas
            details = {
                "phase": self.current_phase,
                "processed": self.processed_items,
                "total": self.total_items,
                "chunk": self.current_chunk,
                "total_chunks": self.total_chunks,
                "speed": round(self.items_per_second, 2),
                "eta_seconds": eta_seconds,
                "errors": self.errors_count,
                "timestamp": utc_now().isoformat(),
            }
            status.details = json.dumps(details)

            status.updated_at = utc_now()
            self.db.commit()

        except Exception as e:
            logger.error("progress.persist.error", error=str(e))
            self.db.rollback()

    def add_error(self, error_msg: str):
        """Registra un error durante el procesamiento."""
        self.errors_count += 1
        self.last_error_message = error_msg  # Issue #211: Guardar último error para mensaje descriptivo
        logger.error(
            "progress.error",
            component=self.component,
            error=error_msg,
            total_errors=self.errors_count,
        )

    def complete(self, success: bool = True):
        """Marca la operación como completada."""
        try:
            from app.models.system_status import SystemComponent, SystemStatus, SystemStatusEnum

            component_map = {
                "nomenclator": SystemComponent.NOMENCLATOR,
                "cima": SystemComponent.CIMA,
                "catalog": SystemComponent.CATALOG,
            }

            component_enum = component_map.get(self.component.lower())
            if not component_enum:
                return

            status = self.db.query(SystemStatus).filter_by(component=component_enum).first()
            if status:
                if success:
                    status.status = SystemStatusEnum.READY
                    status.progress = 100
                    status.message = f"Completado: {self.processed_items:,} items procesados"
                    status.last_success_at = utc_now()
                else:
                    status.status = SystemStatusEnum.ERROR
                    # Issue #211: Mensaje descriptivo con pluralización correcta
                    error_word = "error" if self.errors_count == 1 else "errores"
                    if self.last_error_message:
                        status.message = f"Error en sincronización: {self.last_error_message}"
                    else:
                        status.message = (
                            f"{self.errors_count} {error_word} durante sincronización (ver logs para detalles)"
                        )
                    status.last_error_at = utc_now()
                    status.error_count = (status.error_count or 0) + 1

                elapsed = int(time.time() - self.start_time)
                status.details = json.dumps(
                    {
                        "completed": True,
                        "success": success,
                        "total_processed": self.processed_items,
                        "total_errors": self.errors_count,
                        "elapsed_seconds": elapsed,
                        "average_speed": round(self.processed_items / elapsed if elapsed > 0 else 0, 2),
                    }
                )

                self.db.commit()

                logger.info(
                    "progress.completed",
                    component=self.component,
                    success=success,
                    processed=self.processed_items,
                    errors=self.errors_count,
                    elapsed=elapsed,
                )
        except Exception as e:
            logger.error("progress.complete.error", error=str(e))
            self.db.rollback()


class CatalogMaintenanceService:
    """
    Servicio central para mantener product_catalog actualizado

    Coordina:
    - Sincronización desde nomenclator_local
    - Enriquecimiento desde CIMA
    - Actualización de grupos homogéneos
    - Futuro: CIMAVet y ML Classifier
    """

    def __init__(self, db_session: Optional[Session] = None):
        # Importar servicios de integración
        from app.external_data.cima_integration import CIMAIntegrationService
        from app.external_data.nomenclator_integration import nomenclator_integration

        self.nomenclator_service = nomenclator_integration
        self.cima_service = CIMAIntegrationService()
        self.db = db_session  # Para actualizar estado del sistema

        # Configuración optimizada para Render (memoria limitada)
        self.max_days_old = 10  # Datos considerados antiguos después de 10 días
        self.batch_size = 500  # Reducido para evitar exceso de memoria
        self.memory_limit_mb = 400  # Límite de memoria antes de pausar

    async def _invalidate_catalog_caches(self):
        """
        Invalidate all catalog-related caches after catalog updates.
        Extracted helper to avoid loop.run_until_complete() pattern spreading.

        Issue #194: Centralized cache invalidation for catalog operations.
        """
        from app.services.enrichment_cache import enrichment_cache
        from app.services.laboratory_cache_service import laboratory_cache_service

        # Invalidate enrichment cache
        await enrichment_cache.invalidate_all()
        logger.info("[CATALOG] Cache de enriquecimiento invalidado tras actualización")

        # Issue #194: Invalidate catalog stats cache
        await enrichment_cache.delete("catalog:stats:global")
        logger.info("[CATALOG] Cache de catalog stats invalidado tras actualización")

        # Invalidate laboratory mappings cache
        invalidated_count = laboratory_cache_service.invalidate_all_laboratory_cache()
        logger.info(f"[CATALOG] Cache de laboratory mappings invalidado: {invalidated_count} keys")

    def _get_memory_info(self) -> Dict[str, Any]:
        """Obtiene información de memoria para heartbeat"""
        try:
            import psutil

            process = psutil.Process()
            memory_info = process.memory_info()
            return {
                "rss_mb": round(memory_info.rss / 1024 / 1024, 2),
                "vms_mb": round(memory_info.vms / 1024 / 1024, 2),
                "memory_percent": round(process.memory_percent(), 2),
            }
        except ImportError:
            return {"error": "psutil_not_available"}
        except Exception as e:
            return {"error": f"memory_check_failed: {str(e)[:100]}"}

    def _detect_environment(self) -> str:
        """Detecta el entorno de ejecución para heartbeat"""
        import os

        if os.getenv("RENDER") == "true":
            return "render_production"
        elif "onrender.com" in os.getenv("RENDER_EXTERNAL_URL", ""):
            return "render_production"
        elif os.getenv("ENVIRONMENT") == "development":
            return "development"
        else:
            return "unknown"

    def _update_system_status(
        self,
        component: str,
        status: str,
        progress: int = 0,
        message: str = None,
        details: str = None,
        total_items: int = None,
        processed_items: int = None,
    ) -> None:
        """Actualizar el estado del sistema en tiempo real"""
        if not self.db:
            return

        try:
            from app.models.system_status import SystemComponent, SystemStatus, SystemStatusEnum

            # Buscar o crear el registro de estado
            component_enum = SystemComponent(component.lower())

            # Mapear estados correctamente
            status_map = {
                "processing": "updating",
                "ready": "ready",
                "error": "error",
                "initializing": "initializing",
            }
            mapped_status = status_map.get(status.lower(), "updating")
            status_enum = SystemStatusEnum(mapped_status)

            system_status = self.db.query(SystemStatus).filter_by(component=component_enum).first()
            if not system_status:
                system_status = SystemStatus(component=component_enum)
                self.db.add(system_status)

            # Actualizar campos
            system_status.status = status_enum
            system_status.progress = progress
            system_status.message = message

            # Serializar details si es un dict
            if isinstance(details, dict):
                import json

                system_status.details = json.dumps(details)
            else:
                system_status.details = details
            system_status.total_items = total_items
            system_status.processed_items = processed_items
            system_status.updated_at = utc_now()

            if status == "INITIALIZING" and not system_status.started_at:
                system_status.started_at = utc_now()
            elif status == "READY":
                system_status.last_success_at = utc_now()
            elif status == "ERROR":
                system_status.last_error_at = utc_now()
                system_status.error_count = (system_status.error_count or 0) + 1

            self.db.commit()
            logger.info(
                "system.status.updated",
                component=component,
                status=status,
                progress=progress,
            )

        except Exception as e:
            logger.error("system.status.update.error", error=str(e))
            self.db.rollback()

    async def full_sync(self, db: Session, force_update: bool = False) -> Dict[str, Any]:
        """
        Sincronización completa del catálogo desde todas las fuentes

        IMPORTANTE: Método async para no bloquear el event loop de FastAPI.

        Args:
            db: Sesión de base de datos
            force_update: Forzar actualización aunque los datos sean recientes

        Returns:
            Estadísticas del proceso completo
        """
        logger.info("[CATALOG] Iniciando sincronización completa del catálogo")

        # Crear progress tracker principal
        catalog_tracker = RealProgressTracker(db, "catalog")
        catalog_tracker.set_total(5, 5)  # 5 fases principales

        stats = {
            "started_at": utc_now(),
            "nomenclator": {},
            "cima": {},
            "catalog": {},
            "homogeneous_groups": {},
            "status": "in_progress",
        }

        try:
            # 1. Actualizar nomenclator_local si es necesario
            logger.info("[CATALOG] Paso 1: Actualizando nomenclator_local")
            catalog_tracker.update_progress(0, "Paso 1: Verificando nomenclátor local", 1)

            # Issue #267 Fix: Validar resultado del nomenclátor ANTES de continuar
            try:
                nomen_result = self._update_nomenclator_if_needed(db, force_update)
                stats["nomenclator"] = nomen_result

                # Validar que el resultado indica éxito
                if nomen_result.get("status") == "error":
                    raise ProcessingError(
                        "Nomenclator update failed, cannot proceed with catalog sync", details=nomen_result
                    )

                logger.info(
                    f"[CATALOG] Paso 1 completado: Nomenclátor con "
                    f"{nomen_result.get('product_count', 'N/A')} productos"
                )
                catalog_tracker.update_progress(1, "Paso 1: Nomenclátor verificado", 1, force_update=True)
            except ProcessingError as e:
                logger.error(f"[CATALOG] CRÍTICO: Falló paso 1 (nomenclátor) - {str(e)}")
                stats["nomenclator"] = {"status": "error", "message": str(e), "details": getattr(e, "details", {})}
                # Re-lanzar para que full_sync lo capture en el except principal
                raise

            # 2. Sincronizar product_catalog desde nomenclator
            logger.info("[CATALOG] Paso 2: Sincronizando product_catalog desde nomenclator")
            catalog_tracker.update_progress(1, "Paso 2: Sincronizando catálogo desde nomenclátor", 2)

            # Crear tracker específico para nomenclator
            nomen_tracker = RealProgressTracker(db, "nomenclator")
            catalog_result = self._sync_catalog_from_nomenclator(db, progress_tracker=nomen_tracker)
            stats["catalog"]["from_nomenclator"] = catalog_result
            catalog_tracker.update_progress(2, "Paso 2: Catálogo sincronizado", 2, force_update=True)

            # 3. Enriquecer con datos CIMA
            logger.info("[CATALOG] Paso 3: Enriqueciendo con datos CIMA")
            catalog_tracker.update_progress(2, "Paso 3: Enriqueciendo con datos CIMA", 3)
            cima_result = await self._enrich_from_cima_with_real_progress(db, force_update)
            stats["cima"] = cima_result
            catalog_tracker.update_progress(3, "Paso 3: Enriquecimiento CIMA completado", 3, force_update=True)

            # 4. Actualizar grupos homogéneos (catálogo maestro)
            logger.info("[CATALOG] Paso 4: Actualizando catálogo maestro de grupos homogéneos")
            catalog_tracker.update_progress(3, "Paso 4: Actualizando grupos homogéneos", 4)
            groups_result = self._update_homogeneous_groups_master_with_progress(db)
            stats["homogeneous_groups"] = groups_result
            catalog_tracker.update_progress(4, "Paso 4: Grupos homogéneos actualizados", 4, force_update=True)

            # 5. Marcar productos no sincronizados
            logger.info("[CATALOG] Paso 5: Marcando productos no sincronizados")
            catalog_tracker.update_progress(4, "Paso 5: Marcando productos no sincronizados", 5)
            unsync_result = self._mark_unsynchronized_products(db)
            stats["catalog"]["unsynchronized"] = unsync_result
            catalog_tracker.update_progress(5, "Paso 5: Productos marcados", 5, force_update=True)

            # Estadísticas finales
            stats["completed_at"] = utc_now()
            stats["duration_seconds"] = (stats["completed_at"] - stats["started_at"]).total_seconds()
            stats["status"] = "success"

            # Marcar como completado
            catalog_tracker.complete(success=True)

            logger.info(f"[CATALOG] Sincronización completada en {stats['duration_seconds']:.2f} segundos")

            # Invalidar cache después de actualizar el catálogo (Issue #194)
            try:
                import asyncio

                # Use centralized helper (avoids loop.run_until_complete() pattern spreading)
                loop = asyncio.new_event_loop()
                asyncio.set_event_loop(loop)
                try:
                    loop.run_until_complete(self._invalidate_catalog_caches())
                finally:
                    loop.close()
            except Exception as cache_error:
                logger.warning(f"[CATALOG] No se pudo invalidar cache: {cache_error}")

            # 6. RE-ENRIQUECER DATOS DE VENTAS SI HAY CAMBIOS
            logger.info("[CATALOG] Paso 6: Verificando si es necesario re-enriquecer datos de ventas")
            try:
                from app.services.reenrichment_service import ReEnrichmentService

                reenrich_service = ReEnrichmentService(db)

                # Ejecutar re-enriquecimiento para todas las farmacias
                reenrich_result = reenrich_service.schedule_re_enrichment()
                stats["reenrichment"] = reenrich_result

                if reenrich_result["updates_performed"] > 0:
                    logger.info(
                        f"[CATALOG] Re-enriquecimiento ejecutado: {reenrich_result['updates_performed']} farmacias actualizadas"
                    )
                else:
                    logger.info("[CATALOG] No fue necesario re-enriquecer datos de ventas")

            except Exception as reenrich_error:
                logger.warning(f"[CATALOG] Error en re-enriquecimiento automático: {reenrich_error}")
                stats["reenrichment"] = {
                    "status": "error",
                    "message": str(reenrich_error),
                }

        except Exception as e:
            logger.error(f"[CATALOG] Error en sincronización: {str(e)}")
            stats["status"] = "error"
            stats["error"] = str(e)
            catalog_tracker.add_error(str(e))
            catalog_tracker.complete(success=False)
            db.rollback()

        return stats

    def _update_nomenclator_if_needed(self, db: Session, force_update: bool) -> Dict[str, Any]:
        """
        Actualiza nomenclator_local si es necesario

        Issue #267 Fix: Valida que la actualización fue exitosa y que los datos
        realmente se descargaron del Ministerio (no solo leyó tabla local)
        """
        try:
            # Issue #267: Contar productos ANTES de actualización
            count_before = db.query(func.count(NomenclatorLocal.id)).scalar() or 0
            logger.info(f"[CATALOG] Nomenclátor local ANTES: {count_before:,} productos")

            # Threshold conservador: Ministerio reporta ~20k productos oficialmente,
            # usamos 15k (75%) para tolerar variaciones menores en actualizaciones
            MIN_EXPECTED_PRODUCTS = 15000

            # FORZAR actualización si count está por debajo del threshold
            count_below_threshold = count_before < MIN_EXPECTED_PRODUCTS

            if count_below_threshold:
                logger.warning(
                    f"[CATALOG] Count por debajo del threshold ({count_before:,} < {MIN_EXPECTED_PRODUCTS:,}), "
                    f"forzando descarga fresca del Ministerio"
                )

            if (
                force_update
                or count_below_threshold
                or not self.nomenclator_service.is_database_recent(db, self.max_days_old)
            ):
                logger.info("[CATALOG] Actualizando nomenclator_local desde Ministerio de Sanidad")
                result = self.nomenclator_service.update_if_needed(db, force_update=True)

                # Issue #267: Validación estricta del resultado
                if result.get("status") == "error":
                    error_msg = result.get("message", "Unknown error")
                    logger.error(f"[CATALOG] CRÍTICO: Falló actualización nomenclátor - {error_msg}")
                    raise ProcessingError(f"Nomenclator update failed: {error_msg}", details=result)

                # Issue #267: Verificar que el count aumentó (descarga fresca exitosa)
                count_after = db.query(func.count(NomenclatorLocal.id)).scalar() or 0
                logger.info(f"[CATALOG] Nomenclátor local DESPUÉS: {count_after:,} productos")

                # Validar que hubo cambios si se forzó actualización
                if force_update and count_after == count_before:
                    logger.warning(
                        f"[CATALOG] ADVERTENCIA: Actualización forzada pero count no cambió "
                        f"({count_before:,} → {count_after:,}). Puede indicar problema en descarga CSV."
                    )

                # Validar mínimo de productos esperados del Ministerio (~20k)
                if count_after < MIN_EXPECTED_PRODUCTS:
                    logger.warning(
                        f"[CATALOG] ADVERTENCIA: Solo {count_after:,} productos en nomenclátor "
                        f"(esperado >{MIN_EXPECTED_PRODUCTS:,}). Puede indicar descarga incompleta."
                    )

                # Log detalle de cambio
                delta = count_after - count_before
                logger.info(
                    f"[CATALOG] Nomenclátor actualizado exitosamente: "
                    f"{count_before:,} → {count_after:,} productos "
                    f"({'+'if delta > 0 else ''}{delta:,})"
                )

                return result
            else:
                logger.info(f"[CATALOG] Nomenclátor considerado actualizado ({count_before:,} productos) - NO se descargará del Ministerio")
                return {"status": "up_to_date", "message": "No update needed", "product_count": count_before}
        except ProcessingError:
            # Re-lanzar errores de procesamiento para que full_sync los capture
            raise
        except Exception as e:
            logger.error(f"[CATALOG] Error actualizando nomenclator: {str(e)}")
            raise ProcessingError(f"Nomenclator update error: {str(e)}", details={"exception_type": type(e).__name__})

    def _sync_catalog_from_nomenclator(
        self, db: Session, progress_tracker: Optional[RealProgressTracker] = None
    ) -> Dict[str, Any]:
        """
        Sincroniza product_catalog desde nomenclator_local
        """
        from app.models.nomenclator_local import NomenclatorLocal
        from app.models.product_catalog import ProductCatalog

        stats = {"created": 0, "updated": 0, "errors": 0, "total_processed": 0}

        try:
            # Obtener todos los productos del nomenclator
            nomen_products = db.query(NomenclatorLocal).all()
            total = len(nomen_products)
            logger.info(f"[CATALOG] Procesando {total} productos desde nomenclator_local")

            # Inicializar progress tracker si no existe
            if not progress_tracker:
                progress_tracker = RealProgressTracker(db, "nomenclator")

            # Establecer total real
            progress_tracker.set_total(total)

            # BULK OPERATIONS: Preparar todos los productos para UPSERT
            logger.info(f"[CATALOG] Preparando {total} productos del nomenclator para BULK UPSERT...")

            products_to_upsert = []
            for nomen_product in nomen_products:
                try:
                    # Preparar datos del producto
                    product_data = {
                        "national_code": nomen_product.national_code,
                        "nomen_nombre": (nomen_product.product_name[:500] if nomen_product.product_name else None),
                        "nomen_laboratorio": (nomen_product.laboratory[:200] if nomen_product.laboratory else None),
                        "nomen_codigo_laboratorio": (nomen_product.lab_code[:20] if nomen_product.lab_code else None),
                        "nomen_principio_activo": (
                            nomen_product.active_ingredient[:500] if nomen_product.active_ingredient else None
                        ),
                        "nomen_precio_referencia": nomen_product.reference_price,
                        "sync_status": "SINCRONIZADO",
                        "data_sources": "nomenclator",
                        "updated_at": utc_now(),
                    }

                    # CAMPOS HOMOGÉNEOS CRÍTICOS
                    if hasattr(nomen_product, "homogeneous_code"):
                        product_data["nomen_codigo_homogeneo"] = (
                            nomen_product.homogeneous_code[:50] if nomen_product.homogeneous_code else None
                        )
                    if hasattr(nomen_product, "homogeneous_name"):
                        product_data["nomen_nombre_homogeneo"] = (
                            nomen_product.homogeneous_name[:200] if nomen_product.homogeneous_name else None
                        )
                    if hasattr(nomen_product, "status"):
                        product_data["nomen_estado"] = nomen_product.status[:20] if nomen_product.status else None
                    if hasattr(nomen_product, "pvp"):
                        product_data["nomen_pvp"] = nomen_product.pvp

                    # Detectar tipo de fármaco
                    if nomen_product.is_generic is not None:
                        product_data["nomen_tipo_farmaco"] = "GENERICO" if nomen_product.is_generic else "MARCA"

                    products_to_upsert.append(product_data)

                except Exception as e:
                    stats["errors"] += 1
                    logger.error(f"[CATALOG] Error preparando {nomen_product.national_code}: {str(e)}")
                    continue

            # BULK UPSERT con PostgreSQL
            from sqlalchemy.dialects.postgresql import insert

            CHUNK_SIZE = 1000
            total_to_process = len(products_to_upsert)

            logger.info(f"[CATALOG] Iniciando BULK UPSERT de {total_to_process} productos del nomenclator...")

            for i in range(0, total_to_process, CHUNK_SIZE):
                chunk = products_to_upsert[i : i + CHUNK_SIZE]
                chunk_size = len(chunk)

                try:
                    # Preparar statement UPSERT
                    stmt = insert(ProductCatalog).values(chunk)

                    # En caso de conflicto, actualizar campos del nomenclator
                    update_dict = {}
                    # Obtener todos los campos del primer elemento del chunk para saber qué actualizar
                    if chunk:
                        for col_name in chunk[0].keys():
                            # Actualizar campos nomen_ y metadatos
                            if col_name.startswith("nomen_") or col_name in [
                                "updated_at",
                                "sync_status",
                            ]:
                                update_dict[col_name] = stmt.excluded[col_name]
                            # Para data_sources, mantener fuentes existentes
                            elif col_name == "data_sources":
                                # Simplemente concatenar si no existe 'nomenclator'
                                update_dict[col_name] = case(
                                    (
                                        ProductCatalog.data_sources.is_(None),
                                        "nomenclator",
                                    ),
                                    (
                                        func.strpos(ProductCatalog.data_sources, "nomenclator") > 0,
                                        ProductCatalog.data_sources,
                                    ),
                                    else_=func.concat(ProductCatalog.data_sources, ",nomenclator"),
                                )

                    # Ejecutar UPSERT masivo
                    stmt = stmt.on_conflict_do_update(index_elements=["national_code"], set_=update_dict)

                    # Ejecutar la operación bulk
                    db.execute(stmt)
                    db.commit()

                    stats["total_processed"] += chunk_size

                    # Log de progreso
                    logger.info(f"[CATALOG BULK] Procesados {stats['total_processed']}/{total_to_process} productos")

                    # Actualizar progreso REAL
                    progress_tracker.update_progress(
                        processed=stats["total_processed"],
                        phase="Sincronizando productos del nomenclátor",
                        chunk=i // CHUNK_SIZE + 1,
                        force_update=True,  # Forzar actualización después de cada chunk
                    )

                except Exception as e:
                    stats["errors"] += chunk_size
                    logger.error(f"[CATALOG BULK] Error procesando chunk {i//CHUNK_SIZE + 1}: {str(e)}")
                    db.rollback()

                    # Fallback: procesar individualmente este chunk
                    for product_data in chunk:
                        try:
                            national_code = product_data["national_code"]
                            existing = (
                                db.query(ProductCatalog).filter(ProductCatalog.national_code == national_code).first()
                            )

                            if existing:
                                # Actualizar campos nomenclator
                                for key, value in product_data.items():
                                    if key.startswith("nomen_") or key in [
                                        "updated_at",
                                        "sync_status",
                                    ]:
                                        setattr(existing, key, value)

                                # Actualizar fuentes de datos
                                sources = existing.data_sources or ""
                                if "nomenclator" not in sources:
                                    sources = "nomenclator" if not sources else sources + ",nomenclator"
                                    existing.data_sources = sources
                                stats["updated"] += 1
                            else:
                                # Crear nuevo producto
                                new_product = ProductCatalog(**product_data)
                                db.add(new_product)
                                stats["created"] += 1

                            db.commit()
                            stats["errors"] -= 1  # Reducir error si se procesa exitosamente

                        except Exception as inner_e:
                            logger.warning(
                                f"[CATALOG] Error procesando producto individual {product_data.get('national_code', 'unknown')}: {str(inner_e)}"
                            )
                            db.rollback()
            logger.info(f"[CATALOG] Sincronización desde nomenclator completada: {stats}")

            # Marcar como completado con datos reales
            progress_tracker.complete(success=True)

        except Exception as e:
            logger.error(f"[CATALOG] Error en sincronización: {str(e)}")
            db.rollback()
            stats["error"] = str(e)

            # Registrar error real
            if progress_tracker:
                progress_tracker.add_error(str(e))
                progress_tracker.complete(success=False)

        return stats

    async def _enrich_from_cima_with_real_progress(self, db: Session, force_update: bool) -> Dict[str, Any]:
        """
        Enriquece product_catalog con datos de CIMA usando streaming y progreso REAL.
        Incluye sistema de monitoreo automático con heartbeats.

        IMPORTANTE: Método async para no bloquear el event loop de FastAPI.
        """
        try:
            # Import monitor components
            from app.services.cima_sync_monitor import update_cima_heartbeat

            cima_tracker = RealProgressTracker(db, "cima")

            # Usar el nuevo método de streaming si está disponible
            if hasattr(self.cima_service, "sync_catalog_streaming"):
                logger.info("[CATALOG] Usando sincronización CIMA con streaming y progreso REAL + monitoring")

                # Callback mejorado para actualizar progreso REAL + heartbeat
                def update_system_status(status: str, progress: int, message: str, stats: dict):
                    if stats and isinstance(stats, dict):
                        total = stats.get("estimated_total", 67000)
                        processed = stats.get("total_processed", 0)
                        chunk_num = stats.get("chunks_completed", 0)
                        current_chunk_size = stats.get("current_chunk_size", 300)
                        chunks_total = stats.get("total_chunks", 223)

                        # Si no tenemos total establecido, establecerlo ahora
                        if cima_tracker.total_items == 0:
                            cima_tracker.set_total(total, chunks=chunks_total)

                        # Actualizar con datos reales
                        phase = f"Descargando productos CIMA {message}"
                        cima_tracker.update_progress(processed=processed, phase=phase, chunk=chunk_num)

                        # NUEVO: Actualizar heartbeat del monitor automático
                        try:
                            heartbeat_updated = update_cima_heartbeat(
                                processed_items=processed,
                                total_items=total,
                                current_phase=phase,
                                chunk_number=chunk_num,
                                chunk_total=chunks_total,
                                additional_data={
                                    "current_chunk_size": current_chunk_size,
                                    "status": status,
                                    "progress_percentage": progress,
                                    "memory_info": self._get_memory_info(),
                                    "last_heartbeat_timestamp": int(time.time()),
                                },
                            )

                            if not heartbeat_updated:
                                logger.warning("monitor.heartbeat.update_failed", processed=processed, phase=phase)

                        except Exception as heartbeat_error:
                            # No fallar la sync si el heartbeat falla
                            logger.error(
                                "monitor.heartbeat.error",
                                error=str(heartbeat_error)[:200],
                                processed=processed,
                                phase=phase,
                            )

                # Inicializar heartbeat antes de empezar
                try:
                    update_cima_heartbeat(
                        processed_items=0,
                        total_items=67000,  # Estimación inicial
                        current_phase="Iniciando sincronización CIMA",
                        chunk_number=0,
                        chunk_total=223,
                        additional_data={
                            "sync_start_time": int(time.time()),
                            "force_update": force_update,
                            "environment": self._detect_environment(),
                        },
                    )
                except Exception as init_heartbeat_error:
                    logger.error("monitor.heartbeat.init_failed", error=str(init_heartbeat_error)[:200])

                # Ejecutar sync con callback mejorado (async)
                result = await self.cima_service.sync_catalog_streaming(db, update_system_status)

                # Limpiar heartbeat y marcar como completado en tracker
                from app.services.cima_sync_monitor import cima_sync_monitor

                if result.get("status") == "success":
                    # Heartbeat final exitoso
                    final_processed = result.get("processed", 0)

                    try:
                        update_cima_heartbeat(
                            processed_items=final_processed,
                            total_items=final_processed,  # 100% completado
                            current_phase="Sincronización CIMA completada exitosamente",
                            chunk_number=result.get("chunks_completed", 0),
                            chunk_total=result.get("total_chunks", 223),
                            additional_data={
                                "sync_completion_time": int(time.time()),
                                "final_status": "success",
                                "result_summary": result,
                            },
                        )
                    except Exception as heartbeat_error:
                        logger.error("monitor.heartbeat.success_update_failed", error=str(heartbeat_error)[:200])

                    # ========== PASO ADICIONAL: Fallback enrichment ==========
                    # Enriquecer productos que solo tienen nomenclator (no están en /presentaciones)
                    # Usa llamadas paralelas (~50 concurrentes) para completar en ~2 min
                    try:
                        logger.info("[CATALOG] Paso adicional: Enriqueciendo productos solo-nomenclator")
                        fallback_result = await self.cima_service.enrich_nomenclator_only_products(db)
                        result["fallback_enrichment"] = fallback_result
                        logger.info(
                            f"[CATALOG] Fallback enrichment completado: "
                            f"{fallback_result.get('enriched', 0)} productos en "
                            f"{fallback_result.get('duration_seconds', 0)}s"
                        )
                    except Exception as fallback_error:
                        logger.warning(f"[CATALOG] Fallback enrichment falló (no crítico): {fallback_error}")
                        result["fallback_enrichment"] = {"status": "error", "error": str(fallback_error)}
                    # ==========================================================

                    # ✅ FIX Issue #212: Marcar como completado para registrar last_success_at
                    # Actualizar processed_items para reflejar el total real procesado
                    try:
                        cima_tracker.processed_items = final_processed
                        cima_tracker.complete(success=True)
                    except Exception as tracker_error:
                        logger.error(
                            "progress.tracker.complete_failed", error=str(tracker_error)[:200], component="cima"
                        )

                    # Limpiar heartbeat después de marcar completado
                    try:
                        cima_sync_monitor.heartbeat.clear_heartbeat()
                    except Exception as cleanup_error:
                        logger.error("monitor.heartbeat.cleanup_failed", error=str(cleanup_error)[:200])

                else:
                    # Heartbeat de error
                    try:
                        update_cima_heartbeat(
                            processed_items=result.get("processed", 0),
                            total_items=result.get("estimated_total", 67000),
                            current_phase=f"Error en sincronización: {result.get('message', 'Unknown error')}",
                            additional_data={
                                "sync_error_time": int(time.time()),
                                "final_status": "error",
                                "error_details": result,
                            },
                        )
                    except Exception as heartbeat_error:
                        logger.error("monitor.heartbeat.error_update_failed", error=str(heartbeat_error)[:200])

                    # ✅ FIX Issue #212: Marcar como error para registrar last_error_at
                    try:
                        cima_tracker.add_error(result.get("message", "Unknown error"))
                        cima_tracker.complete(success=False)
                    except Exception as tracker_error:
                        logger.error(
                            "progress.tracker.error_complete_failed", error=str(tracker_error)[:200], component="cima"
                        )

                return result

            else:
                # Fallback al método antiguo si no existe el nuevo
                logger.warning("[CATALOG] Método streaming no disponible, usando método legacy")
                return self.cima_service.update_product_catalog_from_cima(db, force_update)

        except Exception as e:
            logger.error(f"[CATALOG] Error enriqueciendo desde CIMA: {str(e)}")

            # Heartbeat de error crítico
            try:
                from app.services.cima_sync_monitor import update_cima_heartbeat

                update_cima_heartbeat(
                    processed_items=0,
                    total_items=67000,
                    current_phase=f"Error crítico en sync CIMA: {str(e)[:100]}",
                    additional_data={
                        "critical_error_time": int(time.time()),
                        "error_type": "critical_exception",
                        "error_details": str(e)[:500],
                    },
                )
            except Exception as heartbeat_error:
                logger.warning(f"Could not update error heartbeat: {heartbeat_error}")
                pass  # No fallar si no podemos actualizar heartbeat de error

            if "cima_tracker" in locals():
                cima_tracker.add_error(str(e))
                cima_tracker.complete(success=False)
            return {"status": "error", "message": str(e)}

    async def _enrich_from_cima(self, db: Session, force_update: bool) -> Dict[str, Any]:
        """
        Enriquece product_catalog con datos de CIMA usando streaming

        IMPORTANTE: Método async para no bloquear el event loop de FastAPI.
        """
        try:
            # Usar el nuevo método de streaming si está disponible
            if hasattr(self.cima_service, "sync_catalog_streaming"):
                logger.info("[CATALOG] Usando sincronización CIMA con streaming (más eficiente)")

                # Callback para actualizar el estado del sistema
                def update_system_status(status: str, progress: int, message: str, stats: dict):
                    from app.models.system_status import SystemComponent, SystemStatus, SystemStatusEnum

                    try:
                        cima_status = db.query(SystemStatus).filter_by(component=SystemComponent.CIMA).first()
                        if not cima_status:
                            cima_status = SystemStatus(component=SystemComponent.CIMA)
                            db.add(cima_status)

                        # Mapear estados
                        status_map = {
                            "INITIALIZING": SystemStatusEnum.INITIALIZING,
                            "PROCESSING": SystemStatusEnum.UPDATING,
                            "COMPLETED": SystemStatusEnum.READY,
                            "ERROR": SystemStatusEnum.ERROR,
                        }

                        cima_status.status = status_map.get(status, SystemStatusEnum.UPDATING)
                        cima_status.progress = progress
                        cima_status.message = message

                        # Serializar stats si es un dict
                        if isinstance(stats, dict):
                            import json

                            cima_status.details = json.dumps(stats)
                        else:
                            cima_status.details = stats

                        if status == "INITIALIZING":
                            cima_status.started_at = utc_now()
                        elif status == "COMPLETED":
                            cima_status.last_success_at = utc_now()
                        elif status == "ERROR":
                            cima_status.last_error_at = utc_now()

                        db.commit()
                    except Exception as e:
                        logger.error(f"[CATALOG] Error actualizando estado del sistema: {str(e)}")

                return await self.cima_service.sync_catalog_streaming(db, update_system_status)
            else:
                # Fallback al método antiguo si no existe el nuevo
                logger.warning("[CATALOG] Método streaming no disponible, usando método legacy")
                return self.cima_service.update_product_catalog_from_cima(db, force_update)

        except Exception as e:
            logger.error(f"[CATALOG] Error enriqueciendo desde CIMA: {str(e)}")
            return {"status": "error", "message": str(e)}

    def _update_homogeneous_groups(self, db: Session) -> Dict[str, Any]:
        """
        Actualiza la tabla de grupos homogéneos con métricas pre-calculadas
        """
        from app.models.homogeneous_group import HomogeneousGroup
        from app.models.product_catalog import ProductCatalog

        stats = {
            "groups_processed": 0,
            "groups_created": 0,
            "groups_updated": 0,
            "errors": 0,
        }

        try:
            # Obtener todos los códigos homogéneos únicos desde product_catalog
            homogeneous_codes = (
                db.query(ProductCatalog.nomen_codigo_homogeneo)
                .filter(
                    ProductCatalog.nomen_codigo_homogeneo.isnot(None),
                    ProductCatalog.nomen_codigo_homogeneo != "",
                )
                .distinct()
                .all()
            )

            logger.info(f"[CATALOG] Procesando {len(homogeneous_codes)} grupos homogéneos")

            for (codigo,) in homogeneous_codes:
                try:
                    # Obtener todos los productos del grupo
                    products = db.query(ProductCatalog).filter(ProductCatalog.nomen_codigo_homogeneo == codigo).all()

                    if not products:
                        continue

                    # Buscar grupo existente (sin pharmacy_id para catálogo maestro)
                    group = (
                        db.query(HomogeneousGroup)
                        .filter(
                            and_(
                                HomogeneousGroup.homogeneous_code == codigo,
                                HomogeneousGroup.pharmacy_id.is_(None),  # Grupo maestro sin farmacia
                            )
                        )
                        .first()
                    )

                    if not group:
                        group = HomogeneousGroup(
                            homogeneous_code=codigo,
                            pharmacy_id=None,  # Catálogo maestro
                        )
                        db.add(group)
                        stats["groups_created"] += 1
                    else:
                        stats["groups_updated"] += 1

                    # Actualizar métricas
                    group.homogeneous_name = (
                        products[0].nomen_nombre_homogeneo if products[0].nomen_nombre_homogeneo else None
                    )
                    group.total_products = len(products)

                    # Contar productos activos (estado ALTA)
                    active_products = [p for p in products if p.nomen_estado == "ALTA"]
                    group.total_products_activos = len(active_products)

                    # Estadísticas de precios
                    prices = [float(p.nomen_pvp) for p in products if p.nomen_pvp]
                    if prices:
                        group.precio_menor = min(prices)
                        group.precio_maximo = max(prices)
                        group.precio_medio = sum(prices) / len(prices)

                        # Calcular moda (precio más frecuente)
                        price_counts = Counter(prices)
                        group.precio_moda = price_counts.most_common(1)[0][0]

                        # Distribución de precios
                        group.distribucion_precios = dict(price_counts)

                    # Precio de referencia del grupo
                    ref_prices = [float(p.nomen_precio_referencia) for p in products if p.nomen_precio_referencia]
                    if ref_prices:
                        group.precio_referencia_grupo = min(ref_prices)

                    # Laboratorios
                    labs = {}
                    for product in products:
                        if product.nomen_laboratorio:
                            lab = product.nomen_laboratorio
                            if lab not in labs:
                                labs[lab] = {"productos": 0, "precio_medio": []}
                            labs[lab]["productos"] += 1
                            if product.nomen_pvp:
                                labs[lab]["precio_medio"].append(float(product.nomen_pvp))

                    # Calcular precio medio por laboratorio
                    for lab, data in labs.items():
                        if data["precio_medio"]:
                            data["precio_medio"] = sum(data["precio_medio"]) / len(data["precio_medio"])
                        else:
                            data["precio_medio"] = None

                    group.laboratorios = list(labs.values())
                    group.total_laboratorios = len(labs)

                    # Principio activo principal
                    if products[0].nomen_principio_activo:
                        group.principio_activo = products[0].nomen_principio_activo[:500]

                    # Timestamps
                    group.ultima_actualizacion = utc_now()

                    stats["groups_processed"] += 1

                    if stats["groups_processed"] % 100 == 0:
                        db.commit()
                        logger.info(f"[CATALOG] Procesados {stats['groups_processed']} grupos homogéneos")

                except Exception as e:
                    stats["errors"] += 1
                    logger.error(f"[CATALOG] Error procesando grupo {codigo}: {str(e)}")
                    continue

            # Commit final
            db.commit()
            logger.info(f"[CATALOG] Actualización de grupos homogéneos completada: {stats}")

        except Exception as e:
            logger.error(f"[CATALOG] Error actualizando grupos homogéneos: {str(e)}")
            db.rollback()
            stats["error"] = str(e)

        return stats

    def _mark_unsynchronized_products(self, db: Session) -> Dict[str, Any]:
        """
        Marca productos que no están sincronizados con las fuentes actuales
        """
        from app.models.product_catalog import ProductCatalog

        stats = {"marked_as_baja": 0, "marked_as_unsync": 0}

        try:
            # Marcar productos con estado BAJA en nomenclator
            baja_products = db.query(ProductCatalog).filter(ProductCatalog.nomen_estado == "BAJA").all()

            for product in baja_products:
                product.sync_status = "BAJA"
                stats["marked_as_baja"] += 1

            # Marcar productos sin fuentes de datos
            unsync_products = (
                db.query(ProductCatalog)
                .filter(
                    or_(
                        ProductCatalog.data_sources.is_(None),
                        ProductCatalog.data_sources == "",
                    )
                )
                .all()
            )

            for product in unsync_products:
                product.sync_status = "NO_SINCRONIZADO"
                stats["marked_as_unsync"] += 1

            db.commit()
            logger.info(
                f"[CATALOG] Marcados {stats['marked_as_baja']} productos como BAJA, {stats['marked_as_unsync']} como NO_SINCRONIZADO"
            )

        except Exception as e:
            logger.error(f"[CATALOG] Error marcando productos no sincronizados: {str(e)}")
            db.rollback()
            stats["error"] = str(e)

        return stats

    def check_and_update(self, db: Session) -> Dict[str, Any]:
        """
        Verifica si el catálogo necesita actualización y la ejecuta si es necesario

        Args:
            db: Sesión de base de datos

        Returns:
            Resultado del check y posible actualización
        """
        from app.models.product_catalog import ProductCatalog

        try:
            # Verificar última actualización
            last_update = db.query(func.max(ProductCatalog.updated_at)).scalar()

            # Verificar si falta CIMA data (Issue #12 fix)
            cima_count = (
                db.query(func.count(ProductCatalog.id)).filter(ProductCatalog.data_sources.like("%cima%")).scalar() or 0
            )

            total_count = db.query(func.count(ProductCatalog.id)).scalar() or 0

            # Considerar que necesita actualización si:
            # 1. No hay datos de CIMA (0 productos con CIMA)
            # 2. Menos del 50% de productos tienen datos CIMA
            # 3. Los datos son muy antiguos (más de 10 días)
            needs_cima_update = (cima_count == 0) or (total_count > 0 and cima_count < total_count * 0.5)

            if last_update:
                # Usar datetime UTC estándar para comparación
                now = utc_now()
                days_old = (now - last_update).days

                if days_old <= self.max_days_old and not needs_cima_update:
                    logger.info(
                        f"[CATALOG] Catálogo actualizado hace {days_old} días, con {cima_count}/{total_count} productos CIMA"
                    )
                    return {
                        "status": "up_to_date",
                        "last_update": last_update.isoformat(),
                        "days_old": days_old,
                        "cima_coverage": f"{cima_count}/{total_count}",
                    }

            # Necesita actualización
            if needs_cima_update:
                logger.info(
                    f"[CATALOG] Catálogo requiere actualización CIMA ({cima_count}/{total_count} productos con CIMA)"
                )
            else:
                logger.info("[CATALOG] Catálogo requiere actualización por antigüedad")
            return self.full_sync(db, force_update=False)

        except Exception as e:
            logger.error(f"[CATALOG] Error en check_and_update: {str(e)}")
            return {"status": "error", "message": str(e)}

    def get_catalog_status(self, db: Session) -> Dict[str, Any]:
        """
        Obtiene el estado actual del catálogo

        Args:
            db: Sesión de base de datos

        Returns:
            Estadísticas del catálogo
        """
        from app.models.homogeneous_group import HomogeneousGroup
        from app.models.nomenclator_local import NomenclatorLocal
        from app.models.product_catalog import ProductCatalog
        from app.models.system_status import SystemComponent, SystemStatus

        try:
            # Contar productos
            total_products = db.query(func.count(ProductCatalog.id)).scalar() or 0

            # Calcular distribución correcta por fuente de datos
            # Solo nomenclator
            only_nomenclator = (
                db.query(func.count(ProductCatalog.id)).filter(ProductCatalog.data_sources == "nomenclator").scalar()
                or 0
            )

            # Solo CIMA
            only_cima = (
                db.query(func.count(ProductCatalog.id)).filter(ProductCatalog.data_sources == "cima").scalar() or 0
            )

            # Ambas fuentes
            both_sources = (
                db.query(func.count(ProductCatalog.id))
                .filter(
                    or_(
                        ProductCatalog.data_sources == "nomenclator,cima",
                        ProductCatalog.data_sources == "cima,nomenclator",
                    )
                )
                .scalar()
                or 0
            )

            # Sin sincronizar
            no_sources = (
                db.query(func.count(ProductCatalog.id))
                .filter(
                    or_(
                        ProductCatalog.data_sources.is_(None),
                        ProductCatalog.data_sources == "",
                    )
                )
                .scalar()
                or 0
            )

            # Totales para verificación (estos incluyen productos con ambas fuentes)
            with_nomenclator = only_nomenclator + both_sources
            with_cima = only_cima + both_sources

            # Productos por estado de sincronización
            sync_status_counts = {}
            for status, count in (
                db.query(ProductCatalog.sync_status, func.count(ProductCatalog.id))
                .group_by(ProductCatalog.sync_status)
                .all()
            ):
                sync_status_counts[status or "NULL"] = count

            # Última actualización
            last_update = db.query(func.max(ProductCatalog.updated_at)).scalar()

            # Nomenclator local
            nomen_count = db.query(func.count(NomenclatorLocal.id)).scalar()
            nomen_last_update = db.query(func.max(NomenclatorLocal.updated_at)).scalar()

            # Grupos homogéneos maestros (sin pharmacy_id)
            groups_count = (
                db.query(func.count(HomogeneousGroup.id)).filter(HomogeneousGroup.pharmacy_id.is_(None)).scalar()
            )

            # Obtener fechas de última sincronización exitosa de cada componente
            system_statuses = {}
            for component in [
                SystemComponent.NOMENCLATOR,
                SystemComponent.CIMA,
                SystemComponent.CATALOG,
            ]:
                status = db.query(SystemStatus).filter(SystemStatus.component == component).first()
                if status:
                    system_statuses[component.value] = {
                        "status": status.status,
                        "last_success_at": (status.last_success_at.isoformat() if status.last_success_at else None),
                        "message": status.message,
                        "progress": status.progress,
                    }
                else:
                    system_statuses[component.value] = {
                        "status": "NEVER_RUN",
                        "last_success_at": None,
                        "message": "Nunca sincronizado",
                        "progress": 0,
                    }

            # Verificar si hay algún componente en estado INITIALIZING (sincronización activa)
            active_sync = db.query(SystemStatus).filter(SystemStatus.status == "INITIALIZING").first()

            return {
                "status": "healthy" if total_products > 0 else "empty",
                "catalog": {
                    "total_products": total_products,
                    "with_nomenclator_data": with_nomenclator,
                    "with_cima_data": with_cima,
                    "distribution": {
                        "only_nomenclator": only_nomenclator,
                        "only_cima": only_cima,
                        "both_sources": both_sources,
                        "no_sources": no_sources,
                    },
                    "sync_status": sync_status_counts,
                    "last_update": last_update.isoformat() if last_update else None,
                    "days_since_update": (
                        ((utc_now() if last_update.tzinfo else utc_now()) - last_update).days if last_update else None
                    ),
                },
                "nomenclator_local": {
                    "total_products": nomen_count,
                    "last_update": (nomen_last_update.isoformat() if nomen_last_update else None),
                    "is_recent": self.nomenclator_service.is_database_recent(db, self.max_days_old),
                },
                "homogeneous_groups": {"total_groups": groups_count},
                "system_status": system_statuses,
                "active_sync": {
                    "is_active": active_sync is not None,
                    "component": active_sync.component.value if active_sync else None,
                    "message": active_sync.message if active_sync else None,
                    "progress": active_sync.progress if active_sync else 0,
                },
                # Fix for Issue #12: Consider CIMA coverage in needs_update
                "needs_update": (
                    # No data at all
                    (last_update is None)
                    or
                    # Too old
                    ((utc_now() if last_update.tzinfo else utc_now()) - last_update).days > self.max_days_old
                    or
                    # No CIMA data or less than 50% coverage
                    (with_cima == 0)
                    or (total_products > 0 and with_cima < total_products * 0.5)
                ),
            }

        except Exception as e:
            logger.error(f"[CATALOG] Error obteniendo estado: {str(e)}")
            return {"status": "error", "message": str(e)}

    def get_catalog_status_optimized(self, db: Session) -> Dict[str, Any]:
        """
        Obtiene el estado actual del catálogo usando single query optimizado (Issue #293).

        **Performance**: ~10-50x más rápido que get_catalog_status() (5-10s → 200-500ms)

        Consolida 14 queries secuenciales en 1-2 queries usando PostgreSQL CTEs:
        - Single table scan de product_catalog (vs 7 scans separados)
        - Usa COUNT(*) FILTER (WHERE ...) para agregaciones eficientes
        - Reduce latencia de red DB ↔ Backend (1 roundtrip vs 14)

        Args:
            db: Sesión de base de datos

        Returns:
            Estadísticas del catálogo (formato idéntico a get_catalog_status())

        References:
            - Issue #293: Optimización endpoint /api/v1/admin/catalog/stats
            - Método original: get_catalog_status() (línea 1524)
        """
        from sqlalchemy import text


        start_time = time.time()

        try:
            # Single CTE query consolidando todas las agregaciones
            optimized_query = text(
                """
                WITH catalog_aggregates AS (
                    SELECT
                        COUNT(*) as total_products,
                        COUNT(*) FILTER (WHERE data_sources = 'nomenclator') as only_nomenclator,
                        COUNT(*) FILTER (WHERE data_sources = 'cima') as only_cima,
                        COUNT(*) FILTER (WHERE data_sources IN ('nomenclator,cima', 'cima,nomenclator')) as both_sources,
                        COUNT(*) FILTER (WHERE data_sources IS NULL OR data_sources = '') as no_sources,
                        COUNT(*) FILTER (WHERE cima_nombre_comercial IS NOT NULL) as cima_count,
                        MAX(updated_at) as last_update
                    FROM product_catalog
                ),
                sync_status_counts AS (
                    SELECT
                        COALESCE(sync_status::text, 'NULL') as status_key,
                        COUNT(*) as status_count
                    FROM product_catalog
                    GROUP BY sync_status
                ),
                nomenclator_stats AS (
                    SELECT
                        COUNT(*) as total_products,
                        MAX(updated_at) as last_update
                    FROM nomenclator_local
                ),
                groups_stats AS (
                    SELECT COUNT(*) as total_groups
                    FROM homogeneous_groups
                    WHERE pharmacy_id IS NULL
                ),
                sales_stats AS (
                    SELECT
                        COUNT(DISTINCT sd.id) as total_sales,
                        COUNT(DISTINCT se.id) as enriched_sales
                    FROM sales_data sd
                    LEFT JOIN sales_enrichment se ON se.sales_data_id = sd.id
                ),
                system_status_agg AS (
                    SELECT
                        component,
                        status,
                        last_success_at,
                        message,
                        progress
                    FROM system_status
                    -- No filtrar por component IN (...) aquí, hacerlo en Python
                )
                SELECT
                    c.total_products,
                    c.only_nomenclator,
                    c.only_cima,
                    c.both_sources,
                    c.no_sources,
                    c.cima_count,
                    c.last_update,
                    (
                        SELECT jsonb_object_agg(status_key, status_count)
                        FROM sync_status_counts
                    ) as sync_status_counts,
                    n.total_products as nomen_count,
                    n.last_update as nomen_last_update,
                    g.total_groups,
                    s.total_sales,
                    s.enriched_sales,
                    (
                        SELECT json_agg(
                            json_build_object(
                                'component', component,
                                'status', status,
                                'last_success_at', last_success_at,
                                'message', message,
                                'progress', progress
                            )
                        )
                        FROM system_status_agg
                    ) as system_statuses
                FROM catalog_aggregates c
                CROSS JOIN nomenclator_stats n
                CROSS JOIN groups_stats g
                CROSS JOIN sales_stats s
            """
            )

            result = db.execute(optimized_query).fetchone()

            if not result:
                logger.warning("[CATALOG OPTIMIZED] No data returned from optimized query")
                return {"status": "error", "message": "No data available"}

            # Transformar resultado a formato compatible con get_catalog_status()
            total_products = result[0] or 0
            only_nomenclator = result[1] or 0
            only_cima = result[2] or 0
            both_sources = result[3] or 0
            no_sources = result[4] or 0
            cima_count = result[5] or 0
            last_update = result[6]
            sync_status_counts_json = result[7] or {}
            nomen_count = result[8] or 0
            nomen_last_update = result[9]
            groups_count = result[10] or 0
            total_sales = result[11] or 0
            enriched_sales = result[12] or 0
            system_statuses_json = result[13] or []

            # Calcular totales derivados
            with_nomenclator = only_nomenclator + both_sources
            with_cima = only_cima + both_sources

            # Procesar sync_status_counts (convertir JSONB a dict Python)
            sync_status_counts = sync_status_counts_json if isinstance(sync_status_counts_json, dict) else {}

            # Procesar system_statuses (convertir JSON array a dict por componente)
            system_statuses = {}
            if system_statuses_json:
                for status_item in system_statuses_json:
                    component = status_item.get("component")
                    last_success = status_item.get("last_success_at")

                    # Manejar last_success_at: puede ser datetime, string, o None
                    if last_success:
                        if isinstance(last_success, str):
                            # Ya es string ISO8601 desde JSON
                            last_success_str = last_success
                        else:
                            # Es datetime, convertir a ISO8601
                            last_success_str = last_success.isoformat()
                    else:
                        last_success_str = None

                    system_statuses[component] = {
                        "status": status_item.get("status"),
                        "last_success_at": last_success_str,
                        "message": status_item.get("message"),
                        "progress": status_item.get("progress", 0),
                    }

            # Agregar estados por defecto para componentes sin datos
            for component in ["nomenclator", "cima", "catalog"]:
                if component not in system_statuses:
                    system_statuses[component] = {
                        "status": "NEVER_RUN",
                        "last_success_at": None,
                        "message": "Nunca sincronizado",
                        "progress": 0,
                    }

            # Verificar sincronización activa
            active_sync_query = text(
                """
                SELECT component::text, message, progress
                FROM system_status
                WHERE status = 'INITIALIZING'
                LIMIT 1
            """
            )
            active_sync_result = db.execute(active_sync_query).fetchone()

            # Construir respuesta en formato idéntico al original
            response = {
                "status": "healthy" if total_products > 0 else "empty",
                "catalog": {
                    "total_products": total_products,
                    "with_nomenclator_data": with_nomenclator,
                    "with_cima_data": with_cima,
                    "distribution": {
                        "only_nomenclator": only_nomenclator,
                        "only_cima": only_cima,
                        "both_sources": both_sources,
                        "no_sources": no_sources,
                    },
                    "sync_status": sync_status_counts,
                    "last_update": last_update.isoformat() if last_update else None,
                    "days_since_update": ((utc_now() - last_update).days if last_update else None),
                },
                "nomenclator_local": {
                    "total_products": nomen_count,
                    "last_update": nomen_last_update.isoformat() if nomen_last_update else None,
                    "is_recent": self.nomenclator_service.is_database_recent(db, self.max_days_old),
                },
                "homogeneous_groups": {"total_groups": groups_count},
                "system_status": system_statuses,
                "active_sync": {
                    "is_active": active_sync_result is not None,
                    "component": active_sync_result[0] if active_sync_result else None,
                    "message": active_sync_result[1] if active_sync_result else None,
                    "progress": active_sync_result[2] if active_sync_result else 0,
                },
                "needs_update": (
                    (last_update is None)
                    or (utc_now() - last_update).days > self.max_days_old
                    or (with_cima == 0)
                    or (total_products > 0 and with_cima < total_products * 0.5)
                ),
            }

            # Log performance metrics
            execution_time_ms = (time.time() - start_time) * 1000
            logger.info(
                "catalog.status.optimized.success",
                execution_time_ms=round(execution_time_ms, 2),
                total_products=total_products,
                queries_consolidated=14,
                speedup_factor="10-50x",
            )

            return response

        except Exception as e:
            execution_time_ms = (time.time() - start_time) * 1000
            logger.error(
                f"[CATALOG OPTIMIZED] Error obteniendo estado: {str(e)}", execution_time_ms=round(execution_time_ms, 2)
            )
            return {"status": "error", "message": str(e)}

    def _update_homogeneous_groups_master_with_progress(self, db: Session) -> Dict[str, Any]:
        """
        Actualiza el catálogo maestro de grupos homogéneos con progreso REAL.
        """
        from app.models.homogeneous_group_master import HomogeneousGroupMaster
        from app.models.product_catalog import ProductCatalog

        stats = {
            "groups_processed": 0,
            "groups_created": 0,
            "groups_updated": 0,
            "errors": 0,
        }

        # Crear tracker para grupos
        groups_tracker = RealProgressTracker(db, "catalog")

        try:
            # Obtener todos los códigos homogéneos únicos
            homogeneous_codes = (
                db.query(ProductCatalog.nomen_codigo_homogeneo)
                .filter(
                    ProductCatalog.nomen_codigo_homogeneo.isnot(None),
                    ProductCatalog.nomen_codigo_homogeneo != "",
                )
                .distinct()
                .all()
            )

            total_groups = len(homogeneous_codes)
            groups_tracker.set_total(total_groups)

            logger.info(f"[CATALOG] Procesando {total_groups} grupos homogéneos con tracking REAL")

            for idx, (codigo,) in enumerate(homogeneous_codes):
                try:
                    # Obtener todos los productos del grupo
                    products = db.query(ProductCatalog).filter(ProductCatalog.nomen_codigo_homogeneo == codigo).all()

                    if not products:
                        continue

                    # Buscar grupo maestro existente
                    group = (
                        db.query(HomogeneousGroupMaster)
                        .filter(HomogeneousGroupMaster.homogeneous_code == codigo)
                        .first()
                    )

                    if not group:
                        group = HomogeneousGroupMaster(homogeneous_code=codigo)
                        db.add(group)
                        stats["groups_created"] += 1
                    else:
                        stats["groups_updated"] += 1

                    # [... resto del código de actualización del grupo ...]
                    # Actualizar datos básicos
                    group.homogeneous_name = (
                        products[0].nomen_nombre_homogeneo if products[0].nomen_nombre_homogeneo else None
                    )

                    # Estadísticas de productos
                    active_products = [p for p in products if p.nomen_estado == "ALTA"]
                    group.total_products_in_group = len(active_products)

                    stats["groups_processed"] += 1

                    # Actualizar progreso REAL cada 10 grupos o al final
                    if idx % 10 == 0 or idx == total_groups - 1:
                        groups_tracker.update_progress(
                            processed=stats["groups_processed"],
                            phase=f"Procesando grupo {codigo[:20]}...",
                            force_update=True,
                        )
                        db.flush()  # Persistir cambios periódicamente

                except Exception as e:
                    logger.error(f"[CATALOG] Error procesando grupo {codigo}: {str(e)}")
                    stats["errors"] += 1
                    groups_tracker.add_error(f"Grupo {codigo}: {str(e)}")
                    continue

            # Commit final
            db.commit()
            groups_tracker.complete(success=True)

            logger.info(f"[CATALOG] Catálogo maestro actualizado con progreso REAL: {stats}")

        except Exception as e:
            logger.error(f"[CATALOG] Error actualizando grupos homogéneos: {str(e)}")
            groups_tracker.add_error(str(e))
            groups_tracker.complete(success=False)
            stats["errors"] += 1
            db.rollback()

        return stats

    def _update_homogeneous_groups_master(self, db: Session) -> Dict[str, Any]:
        """
        Actualiza el catálogo maestro de grupos homogéneos (HomogeneousGroupMaster)
        Este método reemplaza el antiguo _update_homogeneous_groups para la nueva arquitectura
        """
        from app.models.homogeneous_group_master import HomogeneousGroupMaster
        from app.models.product_catalog import ProductCatalog

        stats = {
            "groups_processed": 0,
            "groups_created": 0,
            "groups_updated": 0,
            "errors": 0,
        }

        try:
            # Obtener todos los códigos homogéneos únicos desde product_catalog
            homogeneous_codes = (
                db.query(ProductCatalog.nomen_codigo_homogeneo)
                .filter(
                    ProductCatalog.nomen_codigo_homogeneo.isnot(None),
                    ProductCatalog.nomen_codigo_homogeneo != "",
                )
                .distinct()
                .all()
            )

            logger.info(f"[CATALOG] Procesando {len(homogeneous_codes)} grupos homogéneos para catálogo maestro")

            for (codigo,) in homogeneous_codes:
                try:
                    # Obtener todos los productos del grupo
                    products = db.query(ProductCatalog).filter(ProductCatalog.nomen_codigo_homogeneo == codigo).all()

                    if not products:
                        continue

                    # Buscar grupo maestro existente
                    group = (
                        db.query(HomogeneousGroupMaster)
                        .filter(HomogeneousGroupMaster.homogeneous_code == codigo)
                        .first()
                    )

                    if not group:
                        group = HomogeneousGroupMaster(homogeneous_code=codigo)
                        db.add(group)
                        stats["groups_created"] += 1
                    else:
                        stats["groups_updated"] += 1

                    # Actualizar datos básicos
                    group.homogeneous_name = (
                        products[0].nomen_nombre_homogeneo if products[0].nomen_nombre_homogeneo else None
                    )

                    # Estadísticas de productos
                    active_products = [p for p in products if p.nomen_estado == "ALTA"]
                    group.total_products_in_group = len(active_products)

                    # Estadísticas de precios (de productos activos)
                    pvps = [float(p.nomen_pvp) for p in active_products if p.nomen_pvp]
                    if pvps:
                        group.min_pvp_in_group = min(pvps)
                        group.max_pvp_in_group = max(pvps)
                        group.avg_pvp_in_group = sum(pvps) / len(pvps)

                        # PVP de referencia (el menor del grupo)
                        group.reference_pvp = min(pvps)

                    # PVL de referencia del grupo (precio de referencia del nomenclator)
                    ref_prices = [
                        float(p.nomen_precio_referencia) for p in active_products if p.nomen_precio_referencia
                    ]
                    if ref_prices:
                        group.calculated_pvl = min(ref_prices)  # PVL = precio de referencia más bajo

                    # Laboratorios disponibles en el grupo
                    labs = set()
                    for product in active_products:
                        if product.nomen_laboratorio and product.nomen_laboratorio.strip():
                            labs.add(product.nomen_laboratorio.strip())

                    group.available_laboratories = list(labs)
                    group.total_labs_in_group = len(labs)

                    # Información terapéutica (del primer producto disponible)
                    representative_product = active_products[0] if active_products else products[0]

                    group.main_active_ingredient = representative_product.nomen_principio_activo

                    # Lista de todos los principios activos únicos
                    active_ingredients = set()
                    for product in products:
                        if product.nomen_principio_activo:
                            # Separar principios activos múltiples (por comas o +)
                            ingredients = product.nomen_principio_activo.replace("+", ",").split(",")
                            for ingredient in ingredients:
                                clean_ingredient = ingredient.strip()
                                if clean_ingredient:
                                    active_ingredients.add(clean_ingredient)

                    group.active_ingredients_list = list(active_ingredients)

                    # Información regulatoria
                    group.requires_prescription = representative_product.cima_requiere_receta
                    group.is_efg = (
                        representative_product.nomen_tipo_farmaco == "GENERICO"
                        if representative_product.nomen_tipo_farmaco
                        else None
                    )

                    # Forma farmacéutica
                    group.pharmaceutical_form = representative_product.cima_forma_farmaceutica

                    # Código ATC (si está disponible desde CIMA)
                    if hasattr(representative_product, "cima_atc_codes") and representative_product.cima_atc_codes:
                        atc_codes = representative_product.cima_atc_codes
                        if isinstance(atc_codes, list) and atc_codes:
                            group.atc_code = atc_codes[0]  # Primer código ATC
                        elif isinstance(atc_codes, dict) and atc_codes:
                            group.atc_code = list(atc_codes.keys())[0]

                    # Estado de sincronización
                    group.sync_status = "SYNCHRONIZED"
                    group.last_nomenclator_sync = utc_now()

                    # Metadatos del nomenclator
                    group.nomenclator_data = {
                        "sync_date": utc_now().isoformat(),
                        "source_products_count": len(products),
                        "active_products_count": len(active_products),
                        "price_range": {
                            "min": (float(group.min_pvp_in_group) if group.min_pvp_in_group else None),
                            "max": (float(group.max_pvp_in_group) if group.max_pvp_in_group else None),
                            "avg": (float(group.avg_pvp_in_group) if group.avg_pvp_in_group else None),
                        },
                        "laboratories_count": len(labs),
                        "has_multiple_labs": len(labs) > 1,
                    }

                    stats["groups_processed"] += 1

                    if stats["groups_processed"] % 100 == 0:
                        logger.info(f"[CATALOG] Procesados {stats['groups_processed']} grupos homogéneos...")
                        db.flush()  # Persistir cambios periódicamente

                except Exception as e:
                    logger.error(f"[CATALOG] Error procesando grupo {codigo}: {str(e)}")
                    stats["errors"] += 1
                    continue

            # Commit final
            db.commit()

            logger.info(
                f"[CATALOG] Catálogo maestro actualizado: {stats['groups_created']} creados, {stats['groups_updated']} actualizados"
            )

        except Exception as e:
            logger.error(f"[CATALOG] Error actualizando catálogo maestro de grupos homogéneos: {str(e)}")
            stats["errors"] += 1
            db.rollback()

        return stats

    def sync_nomenclator_only(self, db: Session, force_update: bool = False) -> Dict[str, Any]:
        """
        Sincronizar SOLO nomenclator sin ejecutar el pipeline completo.
        """
        # Track sync start time for duration calculation
        sync_start_time = time.time()

        try:
            logger.info(f"[NOMENCLATOR-ONLY] Iniciando sincronización exclusiva (force_update={force_update})")

            # Actualizar estado de nomenclator
            self._update_system_status(
                "nomenclator",
                "initializing",
                0,
                "Iniciando sincronización de nomenclator",
            )

            # Paso 1: Actualizar nomenclator_local si es necesario
            logger.info("[NOMENCLATOR-ONLY] Paso 1: Verificando nomenclator_local")
            nomenclator_result = self._update_nomenclator_if_needed(db, force_update)
            logger.info(f"[NOMENCLATOR-ONLY] Resultado nomenclator: status={nomenclator_result.get('status')}, message={nomenclator_result.get('message', 'N/A')}")

            if nomenclator_result.get("status") == "error":
                self._update_system_status(
                    "nomenclator",
                    "error",
                    0,
                    f"Error en nomenclator: {nomenclator_result.get('message')}",
                )
                return nomenclator_result

            self._update_system_status(
                "nomenclator",
                "updating",
                30,
                "Nomenclator local actualizado, sincronizando catálogo",
            )

            # Paso 2: Sincronizar catálogo desde nomenclator
            logger.info("[NOMENCLATOR-ONLY] Paso 2: Sincronizando catálogo desde nomenclator")
            catalog_result = self._sync_catalog_from_nomenclator(db)
            logger.info(f"[NOMENCLATOR-ONLY] Resultado catalog: status={catalog_result.get('status')}, created={catalog_result.get('created', 0)}, updated={catalog_result.get('updated', 0)}")

            if catalog_result.get("status") == "error":
                self._update_system_status(
                    "nomenclator",
                    "error",
                    50,
                    f"Error sincronizando catálogo: {catalog_result.get('message')}",
                )
                return catalog_result

            self._update_system_status("nomenclator", "updating", 60, "Catálogo sincronizado, actualizando grupos homogéneos")

            # Paso 3: Actualizar grupos homogéneos master (relacionados con nomenclator)
            logger.info("[NOMENCLATOR-ONLY] Paso 3: Actualizando catálogo maestro de grupos homogéneos")
            groups_result = self._update_homogeneous_groups_master_with_progress(db)
            logger.info(f"[NOMENCLATOR-ONLY] Resultado grupos: {groups_result.get('total_groups', 0)} grupos actualizados")

            self._update_system_status("nomenclator", "updating", 90, "Grupos homogéneos actualizados, finalizando")

            # Finalizar
            self._update_system_status("nomenclator", "ready", 100, "Nomenclator sincronizado correctamente")

            logger.info("[NOMENCLATOR-ONLY] Sincronización completada exitosamente")

            result = {
                "status": "success",
                "message": "Nomenclator sincronizado correctamente",
                "details": {
                    "nomenclator_result": nomenclator_result,
                    "catalog_result": catalog_result,
                    "homogeneous_groups": groups_result,
                },
            }

            # Log sync event to history (Issue #349)
            try:
                from app.models.catalog_sync_history import SyncType, SyncStatus, TriggerType
                from app.services.catalog_sync_history_service import CatalogSyncHistoryService

                sync_duration = time.time() - sync_start_time
                records_updated = catalog_result.get("created", 0) + catalog_result.get("updated", 0)

                sync_history_service = CatalogSyncHistoryService(db)
                sync_history_service.log_sync_event(
                    sync_type=SyncType.NOMENCLATOR,
                    status=SyncStatus.SUCCESS,
                    records_updated=records_updated,
                    duration_seconds=sync_duration,
                    triggered_by=TriggerType.AUTOMATIC if not force_update else TriggerType.MANUAL,
                    error_message=None
                )
                logger.info(f"[NOMENCLATOR-ONLY] Sync history logged: {records_updated} records in {sync_duration:.2f}s")
            except Exception as log_error:
                # Don't fail the sync if history logging fails
                logger.error(f"[NOMENCLATOR-ONLY] Failed to log sync history: {str(log_error)}")

            return result

        except Exception as e:
            self._update_system_status("nomenclator", "error", 0, f"Error en sincronización: {str(e)}")
            logger.error(f"[NOMENCLATOR-ONLY] Error en sincronización: {str(e)}")

            # Log sync failure to history (Issue #349)
            try:
                from app.models.catalog_sync_history import SyncType, SyncStatus, TriggerType
                from app.services.catalog_sync_history_service import CatalogSyncHistoryService

                sync_duration = time.time() - sync_start_time

                sync_history_service = CatalogSyncHistoryService(db)
                sync_history_service.log_sync_event(
                    sync_type=SyncType.NOMENCLATOR,
                    status=SyncStatus.FAILURE,
                    records_updated=0,
                    duration_seconds=sync_duration,
                    triggered_by=TriggerType.AUTOMATIC if not force_update else TriggerType.MANUAL,
                    error_message=str(e)
                )
            except Exception as log_error:
                # Don't fail if history logging fails
                logger.error(f"[NOMENCLATOR-ONLY] Failed to log sync failure to history: {str(log_error)}")

            # Lanzar excepción personalizada para mejor manejo
            raise CatalogSyncError(source="Nomenclator", reason=f"Error sincronizando nomenclator: {str(e)}")

    async def sync_cima_chunked(self, db: Session, chunk_size: int = 500, force_update: bool = False) -> Dict[str, Any]:
        """
        Sincronizar CIMA usando chunks automáticos para Render con PROGRESO REAL.
        Delega al CIMAIntegrationService con chunks configurables.

        Args:
            db: Sesión de base de datos
            chunk_size: Tamaño de chunks (300 para Render, 1000 para local)
            force_update: Forzar actualización

        Returns:
            Resultado de la sincronización chunked
        """
        # Track sync start time for duration calculation
        sync_start_time = time.time()

        try:
            logger.info(f"[CIMA-CHUNKED] Iniciando sincronización CIMA con chunks de {chunk_size} y progreso REAL")

            # Crear tracker de progreso REAL
            cima_tracker = RealProgressTracker(db, "cima")

            # Estimar total basado en experiencia
            estimated_total = 67000  # Aproximadamente 67k productos en CIMA
            estimated_chunks = estimated_total // chunk_size
            cima_tracker.set_total(estimated_total, estimated_chunks)

            # Usar el servicio de CIMA para chunking automático
            from app.external_data.cima_integration import CIMAIntegrationService

            cima_service = CIMAIntegrationService()

            # Callback mejorado para actualizaciones de estado con datos REALES
            def update_status_callback(status, progress, message, stats=None):
                if stats and isinstance(stats, dict):
                    # Extraer datos reales del procesamiento
                    total_processed = stats.get("total_processed", 0)
                    chunks_completed = stats.get("chunks_completed", 0)
                    errors = stats.get("errors", 0)

                    # Si tenemos un total real, actualizar
                    if "estimated_total" in stats and stats["estimated_total"] != cima_tracker.total_items:
                        cima_tracker.set_total(
                            stats["estimated_total"],
                            stats.get("total_chunks", estimated_chunks),
                        )

                    # Actualizar progreso con datos REALES
                    phase_msg = f"Procesando CIMA: {message}"
                    if chunks_completed > 0:
                        phase_msg = f"Chunk {chunks_completed}/{cima_tracker.total_chunks}: {message}"

                    cima_tracker.update_progress(
                        processed=total_processed,
                        phase=phase_msg,
                        chunk=chunks_completed,
                    )

                    # Registrar errores si los hay
                    if errors > 0:
                        for _ in range(errors - cima_tracker.errors_count):
                            cima_tracker.add_error("Error en procesamiento CIMA")

            # Ejecutar sincronización chunked automática
            result = await cima_service.sync_catalog_auto_chunked(
                db=db,
                update_system_status_callback=update_status_callback,
                chunk_size=chunk_size,
            )

            if result.get("status") == "completed":
                # Éxito total con datos REALES
                total_processed = result.get("total_processed", 0)
                chunks_completed = result.get("chunks_completed", 0)

                # Actualizar tracker final y marcar como completado
                cima_tracker.processed_items = total_processed
                cima_tracker.complete(success=True)

                message = (
                    f"CIMA sincronizado: {total_processed:,} productos REALES, {chunks_completed} chunks procesados"
                )
                logger.info(f"[CIMA-CHUNKED] Completado exitosamente con datos REALES: {message}")

                # Log sync event to history (Issue #349)
                try:
                    from app.models.catalog_sync_history import SyncType, SyncStatus, TriggerType
                    from app.services.catalog_sync_history_service import CatalogSyncHistoryService

                    sync_duration = time.time() - sync_start_time

                    sync_history_service = CatalogSyncHistoryService(db)
                    sync_history_service.log_sync_event(
                        sync_type=SyncType.CIMA,
                        status=SyncStatus.SUCCESS,
                        records_updated=total_processed,
                        duration_seconds=sync_duration,
                        triggered_by=TriggerType.AUTOMATIC if not force_update else TriggerType.MANUAL,
                        error_message=None
                    )
                    logger.info(f"[CIMA-CHUNKED] Sync history logged: {total_processed} records in {sync_duration:.2f}s")
                except Exception as log_error:
                    # Don't fail the sync if history logging fails
                    logger.error(f"[CIMA-CHUNKED] Failed to log sync history: {str(log_error)}")

                # Issue #517: Post-sync ATC backfill incremental
                # Solo procesa productos nuevos (última semana) que no tienen ATC
                atc_backfill_result = None
                try:
                    import asyncio
                    import os

                    # Solo ejecutar si está habilitado (default: True en producción)
                    enable_atc_backfill = os.getenv("ENABLE_POST_SYNC_ATC_BACKFILL", "true").lower() == "true"

                    if enable_atc_backfill:
                        logger.info("[CIMA-CHUNKED] Iniciando ATC backfill incremental post-sync...")
                        # Use asyncio.run() instead of deprecated get_event_loop().run_until_complete()
                        atc_backfill_result = asyncio.run(
                            atc_backfill_service.backfill_atc_codes(
                                batch_size=100,
                                concurrent_requests=5,
                                incremental=True,  # Solo productos recientes
                                max_products=500   # Límite para evitar timeouts
                            )
                        )
                        logger.info(
                            "[CIMA-CHUNKED] ATC backfill completado",
                            processed=atc_backfill_result.get("processed", 0),
                            successful=atc_backfill_result.get("successful", 0),
                            coverage=atc_backfill_result.get("coverage", {}).get("coverage_percentage", 0)
                        )
                    else:
                        logger.info("[CIMA-CHUNKED] ATC backfill post-sync deshabilitado (ENABLE_POST_SYNC_ATC_BACKFILL=false)")
                except Exception as atc_error:
                    # Don't fail the sync if ATC backfill fails
                    logger.warning(f"[CIMA-CHUNKED] ATC backfill post-sync failed (non-blocking): {str(atc_error)}")

                return {
                    "status": "success",
                    "message": message,
                    "details": result,
                    "real_metrics": {
                        "total_processed": total_processed,
                        "chunks_completed": chunks_completed,
                        "average_speed": cima_tracker.items_per_second,
                        "total_time": int(time.time() - cima_tracker.start_time),
                    },
                    "atc_backfill": atc_backfill_result,  # Issue #517: Include ATC backfill results
                }
            elif result.get("status") == "chunk_completed":
                # Chunk completado pero hay más trabajo - datos REALES
                total_processed = result.get("total_processed", 0)
                chunks_completed = result.get("chunks_completed", 0)

                # El progreso ya se actualizó en el callback
                message = f"Chunk {chunks_completed} completado: {total_processed:,} productos procesados (REAL)"
                logger.info(f"[CIMA-CHUNKED] {message}")

                # Log partial sync event to history (Issue #349) - only for final incomplete state
                # Don't log intermediate chunks to avoid flooding the history
                if result.get("is_final_partial", False):  # If this is the final state and it's partial
                    try:
                        from app.models.catalog_sync_history import SyncType, SyncStatus, TriggerType
                        from app.services.catalog_sync_history_service import CatalogSyncHistoryService

                        sync_duration = time.time() - sync_start_time

                        sync_history_service = CatalogSyncHistoryService(db)
                        sync_history_service.log_sync_event(
                            sync_type=SyncType.CIMA,
                            status=SyncStatus.PARTIAL,
                            records_updated=total_processed,
                            duration_seconds=sync_duration,
                            triggered_by=TriggerType.AUTOMATIC if not force_update else TriggerType.MANUAL,
                            error_message=f"Partial sync completed: {chunks_completed} chunks, {total_processed} records"
                        )
                    except Exception as log_error:
                        logger.error(f"[CIMA-CHUNKED] Failed to log partial sync to history: {str(log_error)}")

                return {
                    "status": "in_progress",
                    "message": message,
                    "details": result,
                    "real_metrics": {
                        "processed_so_far": total_processed,
                        "chunks_done": chunks_completed,
                        "speed": cima_tracker.items_per_second,
                    },
                }
            else:
                # Error con registro real
                error_msg = result.get("message", "Error desconocido en CIMA chunked")
                cima_tracker.add_error(error_msg)
                cima_tracker.complete(success=False)

                logger.error(f"[CIMA-CHUNKED] Error REAL: {error_msg}")

                # Log sync failure to history (Issue #349)
                try:
                    from app.models.catalog_sync_history import SyncType, SyncStatus, TriggerType
                    from app.services.catalog_sync_history_service import CatalogSyncHistoryService

                    sync_duration = time.time() - sync_start_time

                    sync_history_service = CatalogSyncHistoryService(db)
                    sync_history_service.log_sync_event(
                        sync_type=SyncType.CIMA,
                        status=SyncStatus.FAILURE,
                        records_updated=cima_tracker.processed_items,
                        duration_seconds=sync_duration,
                        triggered_by=TriggerType.AUTOMATIC if not force_update else TriggerType.MANUAL,
                        error_message=error_msg
                    )
                except Exception as log_error:
                    logger.error(f"[CIMA-CHUNKED] Failed to log sync failure to history: {str(log_error)}")

                return {
                    "status": "error",
                    "message": error_msg,
                    "details": result,
                    "real_metrics": {
                        "errors": cima_tracker.errors_count,
                        "processed_before_error": cima_tracker.processed_items,
                    },
                }

        except Exception as e:
            from app.utils.error_logging import log_structured_error

            error_msg = f"Error en sincronización CIMA chunked: {str(e)}"
            error_type = type(e).__name__

            # Registrar en tracker ANTES de lanzar
            if "cima_tracker" in locals():
                cima_tracker.add_error(f"{error_type}: {error_msg}")
                cima_tracker.complete(success=False)

            # Log estructurado con TODO el contexto
            log_structured_error(
                logger=logger,
                exception=e,
                context_message="cima.sync_chunked.error",
                chunk_size=chunk_size,
                force_update=force_update,
                stats=cima_tracker.processed_items if "cima_tracker" in locals() else 0,
            )

            # Lanzar excepción con contexto completo
            raise CatalogSyncError(
                source="CIMA", reason=f"{error_type}: {error_msg} (ver logs para traceback completo)"
            )


# Instancia global del servicio
catalog_maintenance_service = CatalogMaintenanceService()
