# backend/app/services/file_processing_service.py
"""
Servicio de procesamiento de archivos en thread separado.

ISSUE #332: Solución a uploads atascados en Render.
Usa threading para sobrevivir al ciclo de vida del HTTP request.

MEJORAS CRÍTICAS (Code Review):
- Timeout enforcement real con threading.Timer
- Límite de threads concurrentes con threading.Semaphore (max 5)
- Session cleanup verificado en finally block
"""
import os
import threading
import time
from pathlib import Path
from typing import Optional

import pandas as pd
import structlog
from sqlalchemy import create_engine, text
from sqlalchemy.orm import Session, sessionmaker
from sqlalchemy.pool import NullPool

from app.models import FileUpload, ProductCatalog, SalesData, UploadStatus
from app.services.prescription_classification_service import PrescriptionClassificationService
from app.utils.datetime_utils import utc_now

logger = structlog.get_logger(__name__)

# Maximum processing time before timeout (10 minutes)
MAX_PROCESSING_TIME = 10 * 60  # seconds

# Maximum concurrent uploads (protección para Render single-worker)
MAX_CONCURRENT_UPLOADS = 5

# Batch size for database inserts (configurable via env var)
UPLOAD_BATCH_SIZE = int(os.getenv("UPLOAD_BATCH_SIZE", "500"))

# Sub-batch size for fallback when main batch fails
SUB_BATCH_SIZE = 50


class FileProcessingService:
    """
    Servicio para procesar archivos de upload en threads separados.

    Thread-safe: Cada procesamiento usa su propia sesión de DB independiente.

    PROTECCIONES:
    - Timeout enforcement: Procesamiento cancelado después de 10 minutos
    - Límite de concurrencia: Máximo 5 uploads simultáneos
    - Session cleanup: Siempre se cierra la sesión de DB
    """

    def __init__(self):
        # Create dedicated engine for thread processing
        # Use NullPool to avoid connection pool issues across threads
        database_url = os.getenv(
            "DATABASE_URL",
            "postgresql://xfarma_user:xfarma_dev_2024@localhost:5432/xfarma_db"
        )
        self.engine = create_engine(
            database_url,
            poolclass=NullPool,  # NullPool - no connection pooling for thread safety
            echo=False,
        )
        self.SessionLocal = sessionmaker(
            autocommit=False,
            autoflush=False,
            bind=self.engine
        )

        # Semaphore para limitar uploads concurrentes
        self.upload_semaphore = threading.Semaphore(MAX_CONCURRENT_UPLOADS)

        # Active timeout timers (para poder cancelarlos si el procesamiento termina antes)
        self._active_timers = {}  # upload_id -> threading.Timer
        self._timers_lock = threading.Lock()  # Lock para acceso thread-safe al dict

    def process_file_in_thread(
        self,
        upload_id: str,
        file_path: str,
        contents: bytes,
        pharmacy_id: str,
        erp_type: Optional[str],
    ):
        """
        Procesa archivo en thread separado.

        CRITICAL: Esta función debe ser thread-safe y sobrevivir al ciclo
        de vida del HTTP request que la lanzó.

        PROTECCIONES:
        - Timeout enforcement: Procesamiento cancelado después de 10 minutos
        - Límite de concurrencia: Máximo 5 uploads simultáneos (semaphore)
        - Session cleanup: Siempre se cierra la sesión de DB

        Args:
            upload_id: ID del upload record
            file_path: Ruta donde guardar el archivo
            contents: Contenidos del archivo ya leídos
            pharmacy_id: ID de la farmacia
            erp_type: Tipo de ERP (opcional)
        """
        # PROTECCIÓN 1: Adquirir semaphore (limitar concurrencia)
        acquired = self.upload_semaphore.acquire(blocking=True, timeout=30)
        if not acquired:
            logger.error(
                "[CONCURRENCY] No se pudo adquirir semaphore después de 30s",
                upload_id=upload_id,
                max_concurrent=MAX_CONCURRENT_UPLOADS,
            )
            return

        start_time = time.time()
        db = self.SessionLocal()
        timeout_timer = None

        try:
            logger.info(
                "[THREAD] Iniciando procesamiento de archivo (semaphore acquired)",
                upload_id=upload_id,
                pharmacy_id=pharmacy_id,
                file_path=file_path,
            )

            # PROTECCIÓN 2: Crear timer de timeout que cancela el procesamiento
            timeout_timer = threading.Timer(
                MAX_PROCESSING_TIME,
                self._handle_timeout,
                args=[upload_id]
            )
            timeout_timer.start()

            # Registrar timer activo (thread-safe)
            with self._timers_lock:
                self._active_timers[upload_id] = timeout_timer

            # Update status to QUEUED → PARSING
            upload = db.query(FileUpload).filter(FileUpload.id == upload_id).first()
            if not upload:
                logger.error("[THREAD] Upload no encontrado", upload_id=upload_id)
                return

            # FASE 1: Guardar archivo en disco
            self._save_file_to_disk(db, upload_id, file_path, contents)

            # FASE 2: Parsear archivo
            self._parse_file(db, upload_id, file_path, pharmacy_id, erp_type)

            # FASE 3: Validar y guardar datos en BD (con batch processing)
            self._save_data_to_database(db, upload_id, pharmacy_id)

            # FASE 4: Lanzar enriquecimiento asíncrono
            self._trigger_enrichment(db, upload_id, pharmacy_id)

            elapsed = time.time() - start_time
            logger.info(
                "[THREAD] Procesamiento completado exitosamente",
                upload_id=upload_id,
                elapsed_seconds=elapsed,
            )

        except Exception as e:
            logger.error(
                "[THREAD] Error procesando archivo",
                upload_id=upload_id,
                error=str(e),
                error_type=type(e).__name__,
            )

            # Update status to ERROR
            try:
                upload = db.query(FileUpload).filter(FileUpload.id == upload_id).first()
                if upload:
                    upload.status = UploadStatus.ERROR
                    upload.error_message = f"Error procesando archivo: {str(e)}"
                    upload.processing_completed_at = utc_now()
                    db.commit()
            except Exception as commit_error:
                logger.error(
                    "[THREAD] Error actualizando estado de error",
                    upload_id=upload_id,
                    error=str(commit_error),
                )
                db.rollback()

        finally:
            # PROTECCIÓN 3: Cancelar timer de timeout (si el procesamiento terminó antes)
            if timeout_timer:
                timeout_timer.cancel()
                logger.debug("[THREAD] Timer de timeout cancelado", upload_id=upload_id)

            # Remover timer del registro (thread-safe)
            with self._timers_lock:
                self._active_timers.pop(upload_id, None)

            # PROTECCIÓN 4: SIEMPRE cerrar sesión de DB
            try:
                db.close()
                logger.debug("[THREAD] Sesión de DB cerrada", upload_id=upload_id)
            except Exception as close_error:
                logger.error(
                    "[THREAD] Error cerrando sesión de DB",
                    upload_id=upload_id,
                    error=str(close_error),
                )

            # PROTECCIÓN 5: Liberar semaphore (permitir siguiente upload)
            self.upload_semaphore.release()
            logger.debug("[THREAD] Semaphore liberado", upload_id=upload_id)

    def _handle_timeout(self, upload_id: str):
        """
        Maneja timeout de procesamiento.

        Actualiza el upload a ERROR con mensaje apropiado.
        Usa nueva sesión de DB independiente para evitar conflictos.

        Args:
            upload_id: ID del upload que excedió el timeout
        """
        logger.error(
            "[TIMEOUT] Procesamiento excedió el tiempo límite",
            upload_id=upload_id,
            max_seconds=MAX_PROCESSING_TIME,
        )

        # Crear nueva sesión independiente para actualizar el upload
        db = self.SessionLocal()
        try:
            upload = db.query(FileUpload).filter(FileUpload.id == upload_id).first()
            if upload:
                upload.status = UploadStatus.ERROR
                upload.error_message = f"Procesamiento excedió el tiempo límite de {MAX_PROCESSING_TIME // 60} minutos"
                upload.processing_completed_at = utc_now()
                db.commit()
                logger.info(
                    "[TIMEOUT] Upload marcado como ERROR en BD",
                    upload_id=upload_id,
                )
        except Exception as e:
            logger.error(
                "[TIMEOUT] Error actualizando upload a ERROR",
                upload_id=upload_id,
                error=str(e),
            )
            db.rollback()
        finally:
            db.close()

    def _save_file_to_disk(
        self,
        db: Session,
        upload_id: str,
        file_path: str,
        contents: bytes
    ):
        """Guarda archivo en disco y actualiza estado."""
        upload = db.query(FileUpload).filter(FileUpload.id == upload_id).first()
        if not upload:
            raise ValueError(f"Upload {upload_id} no encontrado")

        try:
            logger.info(f"[SAVE] Guardando archivo en disco: {file_path}")
            upload.processing_notes = "📤 Subiendo archivo al servidor..."
            db.commit()

            # Save file
            file_path_obj = Path(file_path)
            file_path_obj.parent.mkdir(parents=True, exist_ok=True)
            with open(file_path, "wb") as f:
                f.write(contents)

            logger.info(f"[SAVE] Archivo guardado exitosamente: {file_path}")

            # Update file_path in DB
            upload.file_path = str(file_path)
            upload.processing_notes = "✓ Archivo recibido. Analizando formato..."
            db.commit()

        except Exception as e:
            logger.error(f"[SAVE] Error guardando archivo: {str(e)}")

            # Refresh para evitar DetachedInstanceError
            db.refresh(upload)

            upload.status = UploadStatus.ERROR
            upload.error_message = f"Error guardando archivo: {str(e)}"
            upload.processing_completed_at = utc_now()

            # Commit con manejo explícito de errores
            try:
                db.commit()
                logger.info(f"[SAVE] Upload {upload_id} marcado como ERROR exitosamente")
            except Exception as commit_err:
                logger.error(
                    f"[SAVE] CRITICAL: Failed to commit ERROR status for {upload_id}: {commit_err}",
                    exc_info=True
                )
                db.rollback()

            raise

    def _parse_file(
        self,
        db: Session,
        upload_id: str,
        file_path: str,
        pharmacy_id: str,
        erp_type: Optional[str]
    ):
        """Parsea archivo ERP y actualiza estado."""
        from app.parsers.parser_factory import ParserFactory

        upload = db.query(FileUpload).filter(FileUpload.id == upload_id).first()
        if not upload:
            raise ValueError(f"Upload {upload_id} no encontrado")

        try:
            # Update status to PARSING
            upload.status = UploadStatus.PARSING
            upload.processing_started_at = utc_now()
            upload.processing_notes = "🔍 Analizando formato del archivo..."
            db.commit()

            # Parse file (pasar db session para auto-actualizar pharmacy.erp_type)
            df, stats = ParserFactory.parse_file(file_path, pharmacy_id, upload_id, erp_type, db)

            if df.empty:
                upload.status = UploadStatus.ERROR
                upload.error_message = "No se pudieron procesar datos del archivo"
                upload.processing_completed_at = utc_now()
                db.commit()
                raise ValueError("DataFrame vacío después de parseo")

            # Issue #412: Guardar warnings no-bloqueantes (ej: encoding fallback)
            upload.warnings = upload.warnings or []
            if stats.get("encoding_fallback"):
                encoding_used = stats.get("encoding_used", "desconocido")
                upload.warnings.append({
                    "type": "encoding_fallback",
                    "message": f"Archivo detectado como {encoding_used.upper()} (encoding típico de ERPs españoles)",
                    "technical": f"UTF-8 falló, se usó {encoding_used}"
                })
                logger.info(f"[ENCODING] Fallback a {encoding_used} - warning agregado")

            # Update status to VALIDATING
            upload.status = UploadStatus.VALIDATING
            upload.processing_notes = f"✓ Formato detectado. Validando {len(df):,} registros..."
            upload.rows_total = len(df)
            upload.rows_processed = 0
            db.commit()

            # Store DataFrame in upload object for next phase
            # (In real implementation, this would be stored temporarily or passed differently)
            upload._temp_df = df
            upload._temp_stats = stats

        except Exception as e:
            logger.error(f"[PARSE] Error parseando archivo: {str(e)}")

            # Refresh para evitar DetachedInstanceError
            db.refresh(upload)

            upload.status = UploadStatus.ERROR
            upload.error_message = f"Error parseando archivo: {str(e)}"
            upload.processing_completed_at = utc_now()

            # Commit con manejo explícito de errores
            try:
                db.commit()
                logger.info(f"[PARSE] Upload {upload_id} marcado como ERROR exitosamente")
            except Exception as commit_err:
                logger.error(
                    f"[PARSE] CRITICAL: Failed to commit ERROR status for {upload_id}: {commit_err}",
                    exc_info=True
                )
                db.rollback()

            raise

    def _save_data_to_database(
        self,
        db: Session,
        upload_id: str,
        pharmacy_id: str
    ):
        """
        Guarda datos parseados en base de datos con batch processing.

        IMPLEMENTA Issue #330: Detección de duplicados por ventana temporal.

        PIPELINE DE GUARDADO:
        1. Cargar DataFrame parseado (desde upload._temp_df)
        2. Obtener ventana temporal de duplicados
        3. Procesar en batches de 500 registros
        4. Actualizar progreso progresivamente
        5. Transicionar a COMPLETED o preparar para enrichment

        Args:
            db: Sesión de base de datos
            upload_id: ID del upload
            pharmacy_id: ID de la farmacia
        """

        logger.info(f"[SAVE_DB] Iniciando guardado de datos para upload {upload_id}")

        upload = db.query(FileUpload).filter(FileUpload.id == upload_id).first()
        if not upload:
            raise ValueError(f"Upload {upload_id} no encontrado")

        # Obtener DataFrame parseado
        df = getattr(upload, '_temp_df', None)
        stats = getattr(upload, '_temp_stats', {})

        # Validar tipo de _temp_df
        if df is not None and not isinstance(df, pd.DataFrame):
            logger.error(f"[ERROR] _temp_df no es DataFrame: {type(df)}")
            upload.status = UploadStatus.ERROR
            upload.error_message = "Error interno: _temp_df no es DataFrame"
            upload.processing_completed_at = utc_now()
            db.commit()
            return

        if df is None or df.empty:
            logger.warning(f"[SAVE_DB] DataFrame vacío para upload {upload_id}")
            upload.status = UploadStatus.ERROR
            upload.error_message = "No hay datos para guardar"
            upload.processing_completed_at = utc_now()
            db.commit()
            return

        # ========================================================================
        # FASE 1: Obtener ventana temporal para detección de duplicados
        # ========================================================================
        min_date = df["sale_date"].min()
        max_date = df["sale_date"].max()

        try:
            upload.processing_notes = "🔄 Verificando registros existentes..."
            db.commit()

            # FIX: Usar text() con SQL nativo porque func.min/max + cast() falla silenciosamente
            # Issue: SQLAlchemy cast(SalesData.sale_time, Interval) retorna None en producción
            # FIX: Parámetro pharmacy_id se pasa como string - PostgreSQL hace cast automático a UUID
            result = db.execute(
                text("""
                    SELECT
                        MIN(sale_date + sale_time::INTERVAL) as min_dt,
                        MAX(sale_date + sale_time::INTERVAL) as max_dt
                    FROM sales_data
                    WHERE pharmacy_id = :pharmacy_id
                """),
                {"pharmacy_id": str(pharmacy_id)}  # Pasar como string para cast a UUID
            ).first()

            # FIX: Verificar que result[0] no sea None (query retorna (None, None) si no hay datos)
            if result and result[0] is not None:
                bd_min_dt, bd_max_dt = result[0], result[1]
            else:
                bd_min_dt, bd_max_dt = None, None

            existing_sales_window = (bd_min_dt, bd_max_dt)

            if bd_min_dt and bd_max_dt:
                window_days = (bd_max_dt - bd_min_dt).days
                logger.info(
                    f"[DUPLICATE_CHECK] Ventana existente: {bd_min_dt} → {bd_max_dt} "
                    f"({window_days} días, {len(df)} registros nuevos a procesar)",
                    extra={
                        "pharmacy_id": pharmacy_id,
                        "window_start": bd_min_dt.isoformat() if bd_min_dt else None,
                        "window_end": bd_max_dt.isoformat() if bd_max_dt else None,
                        "window_days": window_days,
                        "new_records_count": len(df),
                    },
                )
                # Issue #412: Cambiado de ERROR a INFO - esto es información, no un error
                logger.info(
                    f"[VENTANA_TEMPORAL] Detectada ventana: {bd_min_dt.isoformat()} → {bd_max_dt.isoformat()} "
                    f"({window_days} días). Procesando {len(df)} registros."
                )
            else:
                logger.info("[DUPLICATE_CHECK] No hay ventas existentes - primera carga")
                # Issue #412: Cambiado de ERROR a INFO - esto es información, no un error
                logger.info(
                    f"[DUPLICATE_CHECK] PRIMERA CARGA: No se encontraron ventas existentes para pharmacy {pharmacy_id}. "
                    f"Todos los {len(df)} registros serán insertados sin filtrar."
                )

        except Exception as e:
            logger.error(f"[ERROR] Error cargando duplicados: {str(e)}")
            # Usar with_for_update para evitar race condition
            upload = db.query(FileUpload).filter(FileUpload.id == upload_id).with_for_update().first()
            if upload:
                upload.status = UploadStatus.ERROR
                upload.error_message = f"Error cargando duplicados: {str(e)}"
                upload.processing_completed_at = utc_now()
                db.commit()
            raise

        # ========================================================================
        # FASE 2: Procesar batches con detección de duplicados
        # ========================================================================
        records_saved = 0
        records_duplicated = 0
        batch_size = UPLOAD_BATCH_SIZE

        upload.status = UploadStatus.SAVING
        upload.processing_notes = f"💾 Guardando {len(df):,} registros en base de datos..."
        db.commit()

        for idx in range(0, len(df), batch_size):
            batch_df = df.iloc[idx : idx + batch_size]

            try:
                # Procesar batch con detección de duplicados
                batch_records, batch_duplicates = self._process_sales_batch(
                    db, batch_df, existing_sales_window, pharmacy_id, upload_id
                )

                records_duplicated += batch_duplicates

                # Bulk insert del batch
                if batch_records:
                    try:
                        db.bulk_save_objects(batch_records)
                        db.commit()  # Commit FIRST
                        records_saved += len(batch_records)

                        # Update progress (usar with_for_update para evitar race condition)
                        upload = db.query(FileUpload).filter(FileUpload.id == upload_id).with_for_update().first()
                        if upload:
                            progress_pct = int((idx + len(batch_df)) / len(df) * 100)
                            upload.rows_processed = idx + len(batch_df)
                            upload.processing_notes = (
                                f"Guardando datos... {progress_pct}% "
                                f"({records_saved:,} nuevos, {records_duplicated:,} duplicados omitidos)"
                            )
                            db.commit()

                        logger.info(
                            f"[BATCH] Guardados {len(batch_records)} registros "
                            f"(total: {records_saved}, duplicados batch: {batch_duplicates})"
                        )

                    except Exception as e:
                        # CRITICAL: Si bulk_save falla después de filtrar duplicados por ventana,
                        # es un ERROR REAL (no duplicado). Loguear y abortar batch.
                        logger.error(
                            f"[BATCH] CRITICAL: bulk_save falló después de filtrar duplicados. "
                            f"Error: {e}. Esto NO debería pasar si la ventana temporal funciona correctamente."
                        )
                        db.rollback()
                        # NO intentar fallback - es mejor fallar visible que ocultar el problema

            except Exception as e:
                logger.error(f"[BATCH] Error procesando batch: {e}")
                # Continue with next batch even if this one fails

        # ========================================================================
        # FASE 3: Actualizar estadísticas finales
        # ========================================================================
        logger.info(
            f"[INFO] Guardado completado: {records_saved} nuevos registros guardados, "
            f"{records_duplicated} duplicados omitidos"
        )

        try:
            # Usar with_for_update para evitar race condition al actualizar estadísticas finales
            upload = db.query(FileUpload).filter(FileUpload.id == upload_id).with_for_update().first()
            if not upload:
                logger.error(f"[ERROR] Upload {upload_id} not found after processing")
                return

            # Transición a PROCESSING para enriquecimiento (si hay registros)
            if records_saved > 0:
                upload.status = UploadStatus.PROCESSING
                upload.processing_notes = (
                    f"Datos guardados ({records_saved:,} nuevos, {records_duplicated:,} duplicados). "
                    f"Iniciando enriquecimiento con CIMA/nomenclator..."
                )
            else:
                # Sin registros nuevos → marcar como completado
                upload.status = UploadStatus.COMPLETED
                upload.processing_notes = (
                    f"Sin datos nuevos. Todos los {records_duplicated} registros ya existían en la base de datos."
                )
                upload.processing_completed_at = utc_now()

            upload.rows_total = stats.get("total_rows", 0)
            upload.rows_processed = records_saved + records_duplicated
            upload.rows_with_errors = stats.get("error_rows", 0)
            upload.rows_duplicates = records_duplicated

            # Detectar tipo de ERP si se pudo
            if "erp_type" in df.columns and not df["erp_type"].empty:
                from app.models import FileType
                detected_type = df["erp_type"].iloc[0]
                if detected_type == "farmatic":
                    upload.file_type = FileType.FARMATIC
                elif detected_type == "farmanager":
                    upload.file_type = FileType.FARMANAGER

            db.commit()
            logger.info(f"[OK] Archivo procesado: {records_saved} registros guardados")

            # Guardar records_saved en upload para trigger de enrichment
            upload._records_saved = records_saved

        except Exception as e:
            logger.error(f"[ERROR] Error actualizando stats finales: {str(e)}")
            db.rollback()
            raise

    def _process_sales_batch(
        self,
        db: Session,
        batch_df,
        existing_sales_window: tuple,
        pharmacy_id: str,
        upload_id: str
    ) -> tuple[list, int]:
        """
        Procesa un batch de registros de ventas con detección de duplicados por ventana temporal.

        LÓGICA SIMPLE Y EFICIENTE (funcionaba viernes):
        - Si venta está dentro de ventana temporal existente → SKIP (asumimos duplicado)
        - Si venta está fuera de ventana → INSERT
        - NO verificamos duplicados exactos (demasiado lento con 1 query por registro)

        Args:
            db: Sesión de base de datos
            batch_df: DataFrame con el batch de ventas
            existing_sales_window: Tupla (bd_min_dt, bd_max_dt) con ventana temporal existente
            pharmacy_id: ID de la farmacia
            upload_id: ID del upload

        Returns:
            Tupla (batch_records, batch_duplicates):
            - batch_records: Lista de objetos SalesData para insertar
            - batch_duplicates: Contador de duplicados encontrados por ventana temporal
        """
        from app.models.sales_data import SalesData
        from datetime import timedelta

        batch_records = []
        batch_duplicates = 0

        bd_min_dt, bd_max_dt = existing_sales_window

        for _, row in batch_df.iterrows():
            try:
                # Convertir tipos de datos - PRESERVAR timestamp completo
                sale_date_converted = row.get("sale_date")
                sale_time_value = row.get("sale_time")

                # Combinar sale_date + sale_time para datetime completo
                if sale_date_converted and sale_time_value:
                    # sale_time_value puede ser string (ej: "08:48") o datetime
                    if isinstance(sale_time_value, str):
                        # Parsear string de tiempo (HH:MM o HH:MM:SS)
                        try:
                            time_parts = sale_time_value.split(":")
                            hours = int(time_parts[0]) if len(time_parts) > 0 else 0
                            minutes = int(time_parts[1]) if len(time_parts) > 1 else 0
                            seconds = int(time_parts[2]) if len(time_parts) > 2 else 0
                        except (ValueError, IndexError):
                            # Si falla el parsing, usar 00:00:00
                            logger.warning(f"[PARSE_TIME] Error parseando time string '{sale_time_value}', usando 00:00:00")
                            hours = minutes = seconds = 0
                    else:
                        # Es un datetime/time object
                        hours = sale_time_value.hour
                        minutes = sale_time_value.minute
                        seconds = sale_time_value.second

                    sale_datetime = sale_date_converted + timedelta(
                        hours=hours,
                        minutes=minutes,
                        seconds=seconds
                    )
                else:
                    sale_datetime = sale_date_converted

                # Check si venta está dentro de ventana existente → SKIP (simple y rápido)
                if bd_min_dt and bd_max_dt and sale_datetime:
                    if bd_min_dt <= sale_datetime <= bd_max_dt:
                        # Dentro de ventana temporal → Asumimos duplicado, SKIP
                        batch_duplicates += 1
                        logger.debug(
                            f"[DUPLICATE_SKIP] {row.get('product_name')} @ {sale_datetime} "
                            f"→ SKIP (dentro de ventana [{bd_min_dt}, {bd_max_dt}])"
                        )
                        continue
                    else:
                        # Fuera de ventana → Insertar
                        logger.debug(
                            f"[INSERT] {row.get('product_name')} @ {sale_datetime} "
                            f"→ INSERT (fuera de ventana [{bd_min_dt}, {bd_max_dt}])"
                        )

                # Crear registro (fuera de ventana o no hay ventana)
                # Calcular weekday: Python weekday() 0=Mon...6=Sun → xFarma 1=Lun...7=Dom
                weekday_value = sale_date_converted.weekday() + 1 if sale_date_converted else None
                sales_record = SalesData(
                    pharmacy_id=pharmacy_id,
                    upload_id=upload_id,
                    sale_date=sale_date_converted,
                    sale_time=sale_time_value,
                    weekday=weekday_value,
                    ean13=row.get("ean13"),
                    codigo_nacional=row.get("codigo_nacional"),
                    product_name=row.get("product_name"),
                    subcategory=row.get("subcategory"),
                    quantity=row.get("quantity"),
                    unit_price=row.get("unit_price"),
                    purchase_price=row.get("purchase_price"),
                    sale_price=row.get("sale_price"),
                    total_amount=row.get("total_amount"),
                    discount_amount=row.get("discount_amount"),
                    margin_amount=row.get("margin_amount"),
                    margin_percentage=row.get("margin_percentage"),
                    supplier=row.get("supplier"),
                    employee_code=row.get("employee_code"),
                    employee_name=row.get("employee_name"),
                    client_type=row.get("client_type"),
                    client_code=row.get("client_code"),
                    client_name=row.get("client_name"),
                    is_return=row.get("is_return", False),
                )
                batch_records.append(sales_record)

            except Exception as e:
                logger.warning(
                    f"[BATCH] Error procesando registro individual: {str(e)}"
                )
                # Continue con siguiente registro si este falla

        return batch_records, batch_duplicates

    def _trigger_enrichment(
        self,
        db: Session,
        upload_id: str,
        pharmacy_id: str
    ):
        """
        Lanza proceso de enriquecimiento asíncrono.

        NOTA: El enriquecimiento se ejecuta de forma SÍNCRONA en el thread de procesamiento.
        Esto es deliberado para evitar problemas de concurrencia y asegurar que el
        enriquecimiento se complete antes de liberar el thread.

        PIPELINE DE ENRIQUECIMIENTO:
        1. Ejecutar enriquecimiento de datos (CIMA + nomenclator)
        2. Inicializar partners automáticamente
        3. Auto-retry si tasa de enriquecimiento < 90%
        4. Actualizar estado final del upload
        """
        import time
        from app.services.enrichment_service import enrichment_service
        from app.services.pharmacy_partners_service import PharmacyPartnersService

        logger.info(f"[ENRICH] Iniciando enriquecimiento para upload {upload_id}")
        pipeline_start = time.time()

        upload = db.query(FileUpload).filter(FileUpload.id == upload_id).first()
        if not upload:
            logger.error(f"[ENRICH] Upload {upload_id} no encontrado")
            return

        # Obtener records_saved desde upload
        records_saved = getattr(upload, '_records_saved', 0)

        if records_saved == 0:
            logger.info(f"[ENRICH] Sin registros para enriquecer (upload {upload_id})")
            return

        upload.status = UploadStatus.PROCESSING
        upload.processing_notes = "🔗 Enriqueciendo productos con catálogo CIMA..."
        db.commit()

        # ========================================================================
        # TASK 1: Enriquecimiento de datos
        # ========================================================================
        try:
            logger.info(
                f"[ENRICHMENT_TASK] Iniciando enriquecimiento de {records_saved} registros",
                extra={"upload_id": upload_id}
            )

            enrichment_start = time.time()
            stats = enrichment_service.enrich_sales_batch(db, pharmacy_id, upload_id)
            enrichment_time = time.time() - enrichment_start

            logger.info(
                f"[ENRICHMENT_TASK] Completado en {enrichment_time:.2f}s",
                extra={"upload_id": upload_id, "stats": stats}
            )

            enrichment_result = {
                "success": True,
                "stats": stats,
                "processing_time": enrichment_time
            }

        except Exception as e:
            logger.error(
                f"[ENRICHMENT_TASK] Error en enriquecimiento: {str(e)}",
                extra={"upload_id": upload_id, "error_type": type(e).__name__}
            )
            enrichment_result = {
                "success": False,
                "error": str(e),
                "stats": {}
            }

        # ========================================================================
        # TASK 2: Inicialización de partners (independiente)
        # ========================================================================
        try:
            logger.info(
                "[PARTNERS_TASK] Iniciando inicialización automática",
                extra={"upload_id": upload_id, "pharmacy_id": pharmacy_id}
            )

            partners_start = time.time()
            partners_service = PharmacyPartnersService()
            result = partners_service.calculate_and_update_partners(db, pharmacy_id)
            partners_time = time.time() - partners_start

            suggested_count = result.get("auto_suggested_count", 0)

            logger.info(
                f"[PARTNERS_TASK] Completado en {partners_time:.2f}s - {suggested_count} partners sugeridos",
                extra={"upload_id": upload_id}
            )

            partners_result = {
                "success": result.get("success", False),
                "count": suggested_count,
                "message": result.get("message", ""),
                "processing_time": partners_time
            }

        except Exception as e:
            logger.error(
                f"[PARTNERS_TASK] Error en partners: {str(e)}",
                extra={"upload_id": upload_id, "error_type": type(e).__name__}
            )
            partners_result = {
                "success": False,
                "error": str(e),
                "count": 0
            }

        # ========================================================================
        # TASK 3: Clasificación de prescripción (Issue #436)
        # ========================================================================
        classification_result = {"success": True, "classified": 0, "skipped": 0}
        try:
            logger.info(
                "[CLASSIFICATION_TASK] Iniciando clasificación de prescripción",
                extra={"upload_id": upload_id, "pharmacy_id": pharmacy_id}
            )

            classification_start = time.time()

            # Importar servicio de clasificación
            from app.services.prescription_classification_service import (
                PrescriptionClassificationService,
            )
            from app.models.prescription_reference_list import PrescriptionReferenceList
            from app.models.enums import PrescriptionCategory

            # Construir reference_map para evitar N+1 queries (optimización)
            reference_records = db.query(PrescriptionReferenceList).all()
            reference_map = {
                r.national_code: PrescriptionCategory(r.category)
                for r in reference_records
            }
            logger.debug(f"[CLASSIFICATION_TASK] Reference map construido: {len(reference_map)} entradas")

            # Obtener productos sin clasificar que están en las ventas de este upload
            from app.models.sales_data import SalesData
            from app.models.sales_enrichment import SalesEnrichment

            # Subquery: códigos nacionales únicos del upload
            upload_codes = (
                db.query(SalesData.codigo_nacional)
                .filter(
                    SalesData.pharmacy_id == pharmacy_id,
                    SalesData.upload_id == upload_id,
                    SalesData.codigo_nacional.isnot(None)
                )
                .distinct()
                .subquery()
            )

            # Productos sin clasificar que coinciden con las ventas del upload
            products_to_classify = (
                db.query(ProductCatalog)
                .filter(
                    ProductCatalog.national_code.in_(upload_codes),
                    ProductCatalog.xfarma_prescription_category.is_(None)
                )
                .all()
            )

            if products_to_classify:
                logger.info(
                    f"[CLASSIFICATION_TASK] Clasificando {len(products_to_classify)} productos sin categoría"
                )

                # Crear servicio con reference_map para optimización
                classification_service = PrescriptionClassificationService(
                    db=db,
                    reference_map=reference_map
                )

                # Clasificar en batch
                stats = classification_service.bulk_classify(products_to_classify, dry_run=False)

                classification_result = {
                    "success": True,
                    "classified": stats.get("classified_count", 0),
                    "skipped": stats.get("skipped_otc_count", 0),
                    "total": stats.get("total_products", 0),
                    "processing_time": time.time() - classification_start
                }

                db.commit()

                logger.info(
                    f"[CLASSIFICATION_TASK] Completado - Clasificados: {classification_result['classified']}, "
                    f"OTC: {classification_result['skipped']}",
                    extra={"upload_id": upload_id, "stats": classification_result}
                )
            else:
                logger.info(
                    "[CLASSIFICATION_TASK] No hay productos nuevos para clasificar",
                    extra={"upload_id": upload_id}
                )
                classification_result["processing_time"] = time.time() - classification_start

        except Exception as e:
            logger.error(
                f"[CLASSIFICATION_TASK] Error en clasificación: {str(e)}",
                extra={"upload_id": upload_id, "error_type": type(e).__name__}
            )
            classification_result = {
                "success": False,
                "error": str(e),
                "classified": 0
            }

        # ========================================================================
        # AUTO-RETRY si tasa de enriquecimiento < 90%
        # ========================================================================
        if enrichment_result.get("success") and enrichment_result.get("stats"):
            stats = enrichment_result["stats"]
            processed = stats.get("processed", 0)
            enriched = stats.get("enriched", 0)

            if processed > 0:
                enrichment_rate = enriched / processed

                if enrichment_rate < 0.90:
                    logger.warning(
                        f"⚠️ [AUTO_RETRY] Tasa de enriquecimiento baja ({enrichment_rate*100:.1f}%) - "
                        f"Iniciando retry automático...",
                        extra={
                            "upload_id": upload_id,
                            "pharmacy_id": pharmacy_id,
                            "processed": processed,
                            "enriched": enriched,
                            "enrichment_rate": enrichment_rate,
                        },
                    )

                    try:
                        retry_stats = enrichment_service.enrich_sales_batch(db, pharmacy_id, upload_id)
                        logger.info(
                            f"✅ [AUTO_RETRY] Completado - Nuevos enriquecidos: {retry_stats.get('enriched', 0)}",
                            extra={"upload_id": upload_id, "retry_stats": retry_stats}
                        )
                    except Exception as retry_error:
                        logger.error(
                            f"[AUTO_RETRY] Error en retry: {str(retry_error)}",
                            extra={"upload_id": upload_id}
                        )

        # ========================================================================
        # Actualizar upload con resultados del pipeline
        # ========================================================================
        self._update_upload_with_pipeline_results(
            db, upload_id, enrichment_result, partners_result, classification_result, pipeline_start
        )

        total_pipeline_time = time.time() - pipeline_start
        logger.info(
            f"[ENRICH] Pipeline completado en {total_pipeline_time:.2f}s",
            extra={
                "upload_id": upload_id,
                "enrichment_success": enrichment_result.get("success"),
                "partners_success": partners_result.get("success"),
                "classification_success": classification_result.get("success"),
                "classification_count": classification_result.get("classified", 0),
            }
        )

    def _update_upload_with_pipeline_results(
        self,
        db: Session,
        upload_id: str,
        enrichment_result: dict,
        partners_result: dict,
        classification_result: dict,
        pipeline_start: float
    ):
        """
        Actualiza el upload con los resultados del pipeline de enriquecimiento.

        Args:
            db: Sesión de base de datos
            upload_id: ID del upload
            enrichment_result: Resultado del enriquecimiento
            partners_result: Resultado de la inicialización de partners
            classification_result: Resultado de la clasificación de prescripción
            pipeline_start: Timestamp de inicio del pipeline
        """
        import time

        try:
            upload = db.query(FileUpload).filter(FileUpload.id == upload_id).first()
            if not upload:
                logger.error(f"[PIPELINE_UPDATE] Upload {upload_id} no encontrado")
                return

            # Construir mensaje de estado del pipeline
            pipeline_time = round(time.time() - pipeline_start, 2)
            status_parts = []

            if enrichment_result.get("success"):
                stats = enrichment_result.get("stats", {})
                if isinstance(stats, dict):
                    enriched_count = stats.get("enriched", "OK")
                    status_parts.append(f"Enriquecimiento: {enriched_count} productos")
                else:
                    status_parts.append(f"Enriquecimiento: {stats}")
            else:
                status_parts.append(f"Enriquecimiento: ERROR - {enrichment_result.get('error', 'Unknown')}")

            if partners_result.get("success"):
                status_parts.append(f"Partners: {partners_result.get('count', 0)} inicializados")
            else:
                status_parts.append(f"Partners: ERROR - {partners_result.get('error', 'Unknown')}")

            # Add classification stats
            if classification_result.get("success"):
                classified = classification_result.get("classified", 0)
                skipped = classification_result.get("skipped", 0)
                status_parts.append(f"Clasificacion: {classified} productos ({skipped} OTC)")
            else:
                status_parts.append(f"Clasificacion: ERROR - {classification_result.get('error', 'Unknown')}")

            status_parts.append(f"Pipeline: {pipeline_time}s")

            # Actualizar upload
            current_message = upload.error_message or ""
            new_status = " | ".join(status_parts)
            upload.error_message = f"{current_message} | {new_status}" if current_message else new_status

            # Marcar upload como completado si pipeline tuvo éxito
            if enrichment_result.get("success") and partners_result.get("success"):
                upload.status = UploadStatus.COMPLETED
                upload.processing_completed_at = utc_now()
                upload.processing_notes = f"Pipeline completado exitosamente en {pipeline_time}s. " + (
                    upload.processing_notes or ""
                )
            elif not enrichment_result.get("success"):
                # Pipeline falló por enriquecimiento
                upload.status = UploadStatus.ERROR
                upload.processing_completed_at = utc_now()
                upload.error_message = f"Error en enriquecimiento: {enrichment_result.get('error', 'Unknown')}"
            else:
                # Partners falló pero enriquecimiento OK → PARTIAL
                upload.status = UploadStatus.PARTIAL
                upload.processing_completed_at = utc_now()
                upload.error_message = f"Partners failed: {partners_result.get('error', 'Unknown')}"

            db.commit()

            logger.info(
                "[PIPELINE_UPDATE] Upload actualizado con resultados del pipeline",
                extra={
                    "upload_id": upload_id,
                    "pipeline_time_seconds": pipeline_time,
                    "enrichment_success": enrichment_result.get("success"),
                    "partners_success": partners_result.get("success"),
                    "classification_success": classification_result.get("success"),
                    "classification_count": classification_result.get("classified", 0),
                    "final_status": upload.status.value,
                },
            )

        except Exception as e:
            logger.error(
                f"[PIPELINE_UPDATE] Error actualizando upload: {str(e)}",
                extra={"upload_id": upload_id}
            )
            db.rollback()


# Singleton instance
file_processing_service = FileProcessingService()
