# backend/app/services/inventory_processing_service.py
"""
Servicio de procesamiento de ficheros de inventario.

Issue #476: Carga de ficheros de inventario desde ERPs.

Similar a file_processing_service.py pero especializado para inventario:
- Sin enriquecimiento CIMA (no aplica a inventario)
- Detección de duplicados por (pharmacy_id, snapshot_date)
- Reemplazo de snapshot existente si hay conflicto
"""

import os
import threading
import time
from datetime import date
from pathlib import Path
from typing import Optional
from uuid import UUID

import structlog
from sqlalchemy import create_engine, delete
from sqlalchemy.orm import Session, sessionmaker
from sqlalchemy.pool import NullPool

from app.models import (
    FileUpload,
    InventorySnapshot,
    ProductCatalog,
    ProductCatalogVentaLibre,
    UploadStatus,
)
from app.parsers.inventory import FarmanagerInventoryParser
from app.services.venta_libre_catalog_service import VentaLibreCatalogService
from app.utils.datetime_utils import utc_now
from app.services.necesidad_classifier_service import classify_product_by_keywords

logger = structlog.get_logger(__name__)

# Maximum processing time (5 minutes - inventario es más simple)
MAX_PROCESSING_TIME = 5 * 60  # seconds

# Maximum concurrent uploads
MAX_CONCURRENT_UPLOADS = 3

# Batch size for database inserts
INVENTORY_BATCH_SIZE = int(os.getenv("INVENTORY_BATCH_SIZE", "500"))


class InventoryProcessingService:
    """
    Servicio para procesar ficheros de inventario en threads separados.

    Thread-safe: Cada procesamiento usa su propia sesión de DB independiente.

    Flujo:
    1. Guardar fichero en disco
    2. Parsear con FarmanagerInventoryParser (o auto-detectar)
    3. Eliminar snapshot anterior si existe (mismo pharmacy_id + snapshot_date)
    4. Guardar registros en InventorySnapshot

    Protecciones:
    - Timeout enforcement: 5 minutos máximo
    - Límite de concurrencia: 3 uploads simultáneos
    - Session cleanup garantizado
    """

    def __init__(self):
        """Inicializar con engine dedicado para threads."""
        database_url = os.getenv(
            "DATABASE_URL",
            "postgresql://xfarma_user:xfarma_dev_2024@localhost:5432/xfarma_db"
        )
        self.engine = create_engine(
            database_url,
            poolclass=NullPool,
            echo=False,
        )
        self.SessionLocal = sessionmaker(
            autocommit=False,
            autoflush=False,
            bind=self.engine
        )

        self.upload_semaphore = threading.Semaphore(MAX_CONCURRENT_UPLOADS)
        self._active_timers = {}
        self._timers_lock = threading.Lock()

    def process_inventory_in_thread(
        self,
        upload_id: str,
        file_path: str,
        contents: bytes,
        pharmacy_id: str,
        erp_type: Optional[str] = None,
        snapshot_date: Optional[date] = None,
    ):
        """
        Procesa fichero de inventario en thread separado.

        Args:
            upload_id: ID del FileUpload record
            file_path: Ruta donde guardar el fichero
            contents: Contenidos del fichero
            pharmacy_id: ID de la farmacia
            erp_type: Tipo de ERP (farmanager, etc.)
            snapshot_date: Fecha del inventario (default: hoy)
        """
        acquired = self.upload_semaphore.acquire(blocking=True, timeout=30)
        if not acquired:
            logger.error(
                "[INVENTORY] Semaphore timeout",
                upload_id=upload_id,
            )
            return

        start_time = time.time()
        db = self.SessionLocal()
        timeout_timer = None

        try:
            logger.info(
                "[INVENTORY] Iniciando procesamiento",
                upload_id=upload_id,
                pharmacy_id=pharmacy_id,
            )

            # Setup timeout
            timeout_timer = threading.Timer(
                MAX_PROCESSING_TIME,
                self._handle_timeout,
                args=[upload_id]
            )
            timeout_timer.start()

            with self._timers_lock:
                self._active_timers[upload_id] = timeout_timer

            # Get upload record
            upload = db.query(FileUpload).filter(FileUpload.id == upload_id).first()
            if not upload:
                logger.error("[INVENTORY] Upload not found", upload_id=upload_id)
                return

            # FASE 1: Guardar fichero
            self._save_file(db, upload, file_path, contents)

            # FASE 2: Parsear fichero
            df, stats = self._parse_file(db, upload, file_path, pharmacy_id, snapshot_date)

            # FASE 3: Guardar en DB
            self._save_to_database(
                db, upload, df, pharmacy_id, snapshot_date or date.today()
            )

            # FASE 4: Auto-poblado VentaLibre para productos unknown
            # (Issue #476: reduce unknown rate by creating VL entries)
            vl_created = self._populate_venta_libre_for_unknowns(
                db, upload.id, pharmacy_id
            )
            if vl_created > 0:
                logger.info(
                    "[INVENTORY] Auto-poblado VentaLibre",
                    created=vl_created,
                    upload_id=upload_id,
                )

            # Finalizar
            upload.status = UploadStatus.COMPLETED
            upload.processing_completed_at = utc_now()
            elapsed = time.time() - start_time
            upload.processing_notes = (
                f"Inventario procesado en {elapsed:.1f}s. "
                f"Productos: {stats.get('valid_rows', 0)}"
            )
            db.commit()

            logger.info(
                "[INVENTORY] Completado",
                upload_id=upload_id,
                products=stats.get('valid_rows', 0),
                elapsed=f"{elapsed:.1f}s",
            )

        except Exception as e:
            logger.exception(
                "[INVENTORY] Error procesando",
                upload_id=upload_id,
                error=str(e),
            )
            try:
                db.rollback()
                upload = db.query(FileUpload).filter(FileUpload.id == upload_id).first()
                if upload:
                    upload.status = UploadStatus.ERROR
                    upload.error_message = str(e)[:500]
                    db.commit()
            except Exception:
                pass

        finally:
            # Cleanup timeout timer
            if timeout_timer:
                timeout_timer.cancel()
                with self._timers_lock:
                    self._active_timers.pop(upload_id, None)

            # Cleanup session
            try:
                db.close()
            except Exception:
                pass

            # Release semaphore
            self.upload_semaphore.release()

    def _save_file(
        self,
        db: Session,
        upload: FileUpload,
        file_path: str,
        contents: bytes
    ):
        """Guardar fichero en disco."""
        upload.status = UploadStatus.PARSING
        upload.processing_notes = "Guardando fichero..."
        db.commit()

        # Crear directorio si no existe
        path = Path(file_path)
        path.parent.mkdir(parents=True, exist_ok=True)

        # Guardar contenido
        with open(path, "wb") as f:
            f.write(contents)

        upload.file_path = str(path)
        upload.file_size = len(contents)
        db.commit()

        logger.info("[INVENTORY] Fichero guardado", path=str(path))

    def _parse_file(
        self,
        db: Session,
        upload: FileUpload,
        file_path: str,
        pharmacy_id: str,
        snapshot_date: Optional[date],
    ):
        """Parsear fichero de inventario."""
        upload.processing_notes = "Parseando fichero..."
        db.commit()

        # Por ahora solo Farmanager (Issue #476 Fase 1)
        parser = FarmanagerInventoryParser(
            pharmacy_id=pharmacy_id,
            upload_id=str(upload.id),
        )

        # Si se especificó ERP type, marcar como selección manual
        if upload.file_type and "farmanager" in upload.file_type.value.lower():
            parser.manual_selection = True

        df, stats = parser.parse_file(
            file_path,
            snapshot_date=snapshot_date,
        )

        upload.rows_total = stats.get("total_rows", 0)
        upload.processing_notes = f"Parseados {len(df)} productos válidos"

        if stats.get("warnings"):
            upload.warnings = stats["warnings"]

        db.commit()

        logger.info(
            "[INVENTORY] Fichero parseado",
            total_rows=stats.get("total_rows", 0),
            valid_rows=len(df),
        )

        return df, stats

    def _build_classification_lookups(
        self,
        db: Session,
        df,
    ) -> tuple[dict, dict, dict]:
        """
        Build lookup dictionaries for batch classification.

        Performance optimization: Instead of O(n) queries (one per product),
        this does O(3) batch queries regardless of DataFrame size.

        Issue #487: Now includes cima_requiere_receta to distinguish:
        - CIMA products with cima_requiere_receta=True → prescription
        - CIMA products with cima_requiere_receta=False → venta_libre (OTC from CIMA)
        - CIMA products with cima_requiere_receta=NULL → prescription (safety default)

        Returns:
            Tuple of (cn_to_catalog_info, ean_to_vl_id, name_to_vl_id)
            cn_to_catalog_info: dict mapping CN → (id, cima_requiere_receta)
        """
        # Extract unique codes from DataFrame
        all_cns = set()
        all_eans = set()
        all_names = set()

        for _, row in df.iterrows():
            cn = str(row.get("product_code", ""))[:20]
            ean = str(row.get("ean13", ""))[:20]
            name = str(row.get("product_name", ""))[:500].lower().strip()

            if cn and len(cn) >= 6 and cn.lower() not in ("nan", "none", ""):
                all_cns.add(cn)

            # Extract CN from Spanish pharmacy EAN (847000 prefix)
            if ean and len(ean) >= 12 and ean.startswith("847000") and ean.lower() not in ("nan", "none", ""):
                cn_from_ean = ean[6:12]
                all_cns.add(cn_from_ean)

            if ean and len(ean) >= 12 and ean.lower() not in ("nan", "none", ""):
                all_eans.add(ean)

            if name and name not in ("nan", "none", "null", ""):
                all_names.add(name)

        logger.info(
            "[INVENTORY] Building classification lookups",
            unique_cns=len(all_cns),
            unique_eans=len(all_eans),
            unique_names=len(all_names),
        )

        # Batch query: ProductCatalog by CN (Issue #487: include classification fields)
        cn_to_catalog_info = {}
        if all_cns:
            results = db.query(
                ProductCatalog.national_code,
                ProductCatalog.id,
                ProductCatalog.xfarma_prescription_category,
                ProductCatalog.cima_requiere_receta,
            ).filter(
                ProductCatalog.national_code.in_(all_cns)
            ).all()
            # Store tuple (id, xfarma_prescription_category, cima_requiere_receta) for each CN
            cn_to_catalog_info = {
                r.national_code: (r.id, r.xfarma_prescription_category, r.cima_requiere_receta)
                for r in results
            }
            logger.info(f"[INVENTORY] Found {len(cn_to_catalog_info)} CIMA matches by CN")

        # Batch query: VentaLibre by EAN (Issue #481: buscar también en ean_codes array)
        ean_to_vl_id = {}
        if all_eans:
            # 1. Buscar en ean13 principal (B-tree index, más rápido)
            results = db.query(
                ProductCatalogVentaLibre.ean13,
                ProductCatalogVentaLibre.id
            ).filter(
                ProductCatalogVentaLibre.ean13.in_(all_eans),
                ProductCatalogVentaLibre.is_active.is_(True),
            ).all()
            ean_to_vl_id = {r.ean13: r.id for r in results}

            # 2. Buscar EANs restantes en ean_codes array (GIN index)
            remaining_eans = all_eans - set(ean_to_vl_id.keys())
            if remaining_eans:
                # Usar overlap para buscar cualquier EAN que esté en el array
                results = db.query(
                    ProductCatalogVentaLibre.ean_codes,
                    ProductCatalogVentaLibre.id
                ).filter(
                    ProductCatalogVentaLibre.ean_codes.overlap(list(remaining_eans)),
                    ProductCatalogVentaLibre.is_active.is_(True),
                ).all()

                # Mapear cada EAN encontrado en el array al producto
                for result in results:
                    if result.ean_codes:
                        for ean in result.ean_codes:
                            if ean in remaining_eans:
                                ean_to_vl_id[ean] = result.id

            logger.info(f"[INVENTORY] Found {len(ean_to_vl_id)} venta_libre matches by EAN")

        # Batch query: VentaLibre by normalized name
        name_to_vl_id = {}
        if all_names:
            results = db.query(
                ProductCatalogVentaLibre.product_name_normalized,
                ProductCatalogVentaLibre.id
            ).filter(
                ProductCatalogVentaLibre.product_name_normalized.in_(all_names),
                ProductCatalogVentaLibre.is_active.is_(True),
            ).all()
            name_to_vl_id = {r.product_name_normalized: r.id for r in results}
            logger.info(f"[INVENTORY] Found {len(name_to_vl_id)} venta_libre matches by name")

        return cn_to_catalog_info, ean_to_vl_id, name_to_vl_id

    def _derive_product_type_from_catalog(
        self,
        xfarma_prescription_category: str | None,
        cima_requiere_receta: bool | None,
    ) -> str:
        """
        Determina el tipo de venta basado en clasificación de prescripción.

        Replica la lógica de EnrichmentService._derive_product_type para consistencia.
        Issue #487: Fix para clasificar correctamente productos OTC de CIMA.

        Regla:
        - Tiene xfarma_prescription_category (excepto VETERINARIA) → "prescription"
        - VETERINARIA → depende de cima_requiere_receta
        - NO tiene xfarma_prescription_category (NULL) → "venta_libre"
        """
        # EXCEPCIÓN: Categoría VETERINARIA
        if xfarma_prescription_category == "VETERINARIA":
            if cima_requiere_receta is True:
                return "prescription"
            elif cima_requiere_receta is False:
                return "venta_libre"
            else:
                return "prescription"  # NULL: asumir prescription por seguridad

        # REGLA GENERAL: Tener categoría → prescription
        elif xfarma_prescription_category is not None:
            return "prescription"

        # Sin categoría (NULL) → Venta Libre
        else:
            return "venta_libre"

    def _classify_product(
        self,
        cn_lookup: dict,
        ean_vl_lookup: dict,
        name_vl_lookup: dict,
        product_code: str | None,
        ean13: str | None,
        product_name: str | None,
    ) -> tuple[str, UUID | None, UUID | None]:
        """
        Classify a product using pre-built lookup dictionaries.

        Performance: O(1) dictionary lookups instead of O(n) DB queries.

        Issue #487: Now uses _derive_product_type_from_catalog to correctly
        classify OTC products from CIMA (like FRENADOL, VOLTADOL) as "venta_libre".

        Args:
            cn_lookup: Dict mapping CN → (id, xfarma_prescription_category, cima_requiere_receta)
            ean_vl_lookup: Dict mapping EAN → ProductCatalogVentaLibre.id
            name_vl_lookup: Dict mapping normalized_name → ProductCatalogVentaLibre.id

        Returns:
            Tuple of (product_type, product_catalog_id, venta_libre_product_id)
        """
        cn = str(product_code)[:20] if product_code else None
        ean = str(ean13)[:20] if ean13 else None
        name = str(product_name)[:500].lower().strip() if product_name else None

        # Skip invalid values
        if cn and cn.lower() in ("nan", "none", "null", ""):
            cn = None
        if name and name in ("nan", "none", "null", ""):
            name = None

        # 1. Try ProductCatalog by CN (includes OTC from CIMA)
        if cn and len(cn) >= 6 and cn in cn_lookup:
            catalog_id, prescription_category, requiere_receta = cn_lookup[cn]
            product_type = self._derive_product_type_from_catalog(
                prescription_category, requiere_receta
            )
            return (product_type, catalog_id, None)

        # 2. Try ProductCatalog by CN extracted from Spanish pharmacy EAN
        if ean and len(ean) >= 12 and ean.startswith("847000"):
            cn_from_ean = ean[6:12]
            if cn_from_ean in cn_lookup:
                catalog_id, prescription_category, requiere_receta = cn_lookup[cn_from_ean]
                product_type = self._derive_product_type_from_catalog(
                    prescription_category, requiere_receta
                )
                return (product_type, catalog_id, None)

        # 3. Try venta libre by EAN
        if ean and ean in ean_vl_lookup:
            return ("venta_libre", None, ean_vl_lookup[ean])

        # 4. Try venta libre by normalized name
        if name and name in name_vl_lookup:
            return ("venta_libre", None, name_vl_lookup[name])

        return ("unknown", None, None)

    def _save_to_database(
        self,
        db: Session,
        upload: FileUpload,
        df,
        pharmacy_id: str,
        snapshot_date: date,
    ):
        """Guardar registros de inventario en la base de datos con clasificación."""
        upload.status = UploadStatus.SAVING
        upload.processing_notes = "Construyendo lookups de clasificación..."
        db.commit()

        pharmacy_uuid = UUID(pharmacy_id) if isinstance(pharmacy_id, str) else pharmacy_id
        upload_uuid = upload.id

        # Build classification lookups (batch queries - O(1) regardless of size)
        cn_lookup, ean_vl_lookup, name_vl_lookup = self._build_classification_lookups(db, df)

        upload.processing_notes = "Clasificando y guardando..."
        db.commit()

        # IMPORTANTE: Eliminar snapshot anterior para esta pharmacy + fecha
        # NO hacer commit aquí - la transacción debe ser atómica (delete + inserts)
        existing_deleted = db.execute(
            delete(InventorySnapshot).where(
                InventorySnapshot.pharmacy_id == pharmacy_uuid,
                InventorySnapshot.snapshot_date == snapshot_date,
            )
        ).rowcount

        if existing_deleted > 0:
            logger.info(
                "[INVENTORY] Snapshot anterior marcado para reemplazo",
                pharmacy_id=pharmacy_id,
                snapshot_date=str(snapshot_date),
                deleted_rows=existing_deleted,
            )
            # Note: Do NOT commit here - let the full transaction complete atomically

        # Classification stats
        stats = {"prescription": 0, "venta_libre": 0, "unknown": 0}

        # Insertar nuevos registros en batches
        total_inserted = 0
        batch = []

        for _, row in df.iterrows():
            # Truncate strings to match model column lengths (prevent DB errors)
            product_name = row.get("product_name")
            product_code = row.get("product_code")
            ean13 = row.get("ean13")

            # Classify product using pre-built lookups (O(1) dict access)
            product_type, catalog_id, vl_id = self._classify_product(
                cn_lookup, ean_vl_lookup, name_vl_lookup,
                product_code, ean13, product_name
            )
            stats[product_type] += 1

            snapshot = InventorySnapshot(
                pharmacy_id=pharmacy_uuid,
                file_upload_id=upload_uuid,
                product_catalog_id=catalog_id,
                venta_libre_product_id=vl_id,
                product_code=str(product_code)[:20] if product_code else None,
                product_name=str(product_name)[:500] if product_name else None,
                ean13=str(ean13)[:20] if ean13 else None,
                product_type=product_type,
                stock_quantity=int(row.get("stock_quantity", 0)),
                stock_value=row.get("stock_value"),
                cost_value=row.get("cost_value"),
                unit_price=row.get("unit_price"),
                unit_cost=row.get("unit_cost"),
                last_sale_date=row.get("last_sale_date"),
                last_purchase_date=row.get("last_purchase_date"),
                snapshot_date=snapshot_date,
            )
            batch.append(snapshot)

            if len(batch) >= INVENTORY_BATCH_SIZE:
                db.bulk_save_objects(batch)
                db.commit()
                total_inserted += len(batch)
                upload.rows_processed = total_inserted
                upload.processing_notes = f"Guardando... {total_inserted}/{len(df)}"
                db.commit()
                batch = []

        # Guardar último batch
        if batch:
            db.bulk_save_objects(batch)
            db.commit()
            total_inserted += len(batch)

        upload.rows_processed = total_inserted
        logger.info(
            "[INVENTORY] Registros guardados con clasificación",
            total=total_inserted,
            prescription=stats["prescription"],
            venta_libre=stats["venta_libre"],
            unknown=stats["unknown"],
        )

    # Patterns for internal/logistics codes to exclude from VentaLibre
    INTERNAL_CODE_PATTERNS = [
        'DESCUENTO', 'RECOGIDA', 'ALMACEN', 'CODIGO APP', 'CAJAS DE',
        'ENVIO', 'TRANSPORTE', 'LOGISTICA', 'INTERNO', 'AJUSTE',
        'DEVOLUCION', 'BONIFICACION', 'PROMOCION', 'VALE', 'COFARES'
    ]

    def _is_internal_code(self, product_name: str) -> bool:
        """Check if product name indicates an internal/logistics code."""
        if not product_name:
            return True
        upper_name = product_name.upper()
        return any(pattern in upper_name for pattern in self.INTERNAL_CODE_PATTERNS)

    def _populate_venta_libre_for_unknowns(
        self,
        db: Session,
        upload_id: UUID,
        pharmacy_id: str,
    ) -> int:
        """
        Auto-populate VentaLibre catalog with unknown inventory products.

        Issue #476: Reduce unknown rate by creating VL entries for real products.

        This post-processing step:
        1. Finds unknown snapshots from this upload
        2. Filters out internal codes (discounts, logistics, etc.)
        3. Creates VL entries via find_or_create()
        4. Updates snapshots with VL IDs

        Returns:
            Number of products added to VentaLibre
        """
        pharmacy_uuid = UUID(pharmacy_id) if isinstance(pharmacy_id, str) else pharmacy_id

        # Find unknown products from this upload
        unknown_snapshots = (
            db.query(InventorySnapshot)
            .filter(
                InventorySnapshot.file_upload_id == upload_id,
                InventorySnapshot.product_type == "unknown",
            )
            .all()
        )

        if not unknown_snapshots:
            return 0

        catalog_service = VentaLibreCatalogService(db)
        created_count = 0

        for snapshot in unknown_snapshots:
            # Skip internal codes
            if self._is_internal_code(snapshot.product_name):
                continue

            # Skip if no valid name
            if not snapshot.product_name or len(snapshot.product_name.strip()) < 3:
                continue

            # Note: Prescription check is now handled by get_known_product_type()
            # in find_or_create() - returns None for known prescription drugs

            try:
                # Classify product using keyword rules
                ml_category, ml_confidence = classify_product_by_keywords(
                    snapshot.product_name,
                    default_category="unknown",
                    default_confidence=0.0,
                )

                # Find or create in VentaLibre catalog (with classification)
                # Returns None if product is known to be prescription
                vl_product, was_created = catalog_service.find_or_create(
                    product_name=snapshot.product_name,
                    pharmacy_id=pharmacy_uuid,
                    ean13=snapshot.ean13 if snapshot.ean13 and len(snapshot.ean13) == 13 else None,
                    cn=snapshot.product_code if snapshot.product_code and len(snapshot.product_code) in (6, 7) else None,
                    ml_category=ml_category,
                    ml_confidence=ml_confidence,
                )

                # Skip if product is prescription (find_or_create returns None)
                if vl_product is None:
                    snapshot.product_type = "prescription"
                    continue

                # Update snapshot with VL product
                snapshot.venta_libre_product_id = vl_product.id
                snapshot.product_type = "venta_libre"

                if was_created:
                    created_count += 1

            except Exception as e:
                logger.warning(
                    "[INVENTORY] Error adding to VentaLibre",
                    product_name=snapshot.product_name[:50] if snapshot.product_name else None,
                    error=str(e),
                )
                continue

        # Commit all changes
        db.commit()

        return created_count

    def _handle_timeout(self, upload_id: str):
        """Manejar timeout de procesamiento."""
        logger.error(
            "[INVENTORY] Timeout de procesamiento",
            upload_id=upload_id,
            max_time=MAX_PROCESSING_TIME,
        )

        try:
            db = self.SessionLocal()
            upload = db.query(FileUpload).filter(FileUpload.id == upload_id).first()
            if upload and upload.status not in [UploadStatus.COMPLETED, UploadStatus.ERROR]:
                upload.status = UploadStatus.ERROR
                upload.error_message = f"Timeout: procesamiento excedió {MAX_PROCESSING_TIME}s"
                db.commit()
            db.close()
        except Exception as e:
            logger.error("[INVENTORY] Error en timeout handler", error=str(e))


# Singleton instance (thread-safe)
_inventory_processing_service: Optional[InventoryProcessingService] = None
_service_lock = threading.Lock()


def get_inventory_processing_service() -> InventoryProcessingService:
    """Obtener instancia singleton del servicio (thread-safe double-check locking)."""
    global _inventory_processing_service
    if _inventory_processing_service is None:
        with _service_lock:
            if _inventory_processing_service is None:  # Double-check after acquiring lock
                _inventory_processing_service = InventoryProcessingService()
    return _inventory_processing_service
