# backend/app/measures/base.py
"""
Clases base para el sistema de medidas xFarma.
Inspirado en Power BI: contexto de datos + medidas reutilizables.
"""

import logging
from abc import ABC, abstractmethod
from dataclasses import dataclass
from datetime import date
from threading import Lock
from typing import Any, Dict, List, Optional, Union

from sqlalchemy.orm import Session

from app.models.inventory_snapshot import InventorySnapshot
from app.models.product_catalog import ProductCatalog
from app.models.sales_data import SalesData
from app.models.sales_enrichment import SalesEnrichment

logger = logging.getLogger(__name__)


@dataclass
class FilterContext:
    """Context de filtros aplicables a las medidas"""

    pharmacy_id: str
    start_date: Optional[date] = None
    end_date: Optional[date] = None
    product_codes: Optional[List[str]] = None
    therapeutic_categories: Optional[List[str]] = None
    laboratories: Optional[List[str]] = None
    requires_prescription: Optional[bool] = None
    is_generic: Optional[bool] = None
    min_amount: Optional[float] = None
    max_amount: Optional[float] = None

    # Filtros específicos para medidas de genéricos
    partner_laboratories: Optional[List[str]] = None
    discount_percentage: Optional[float] = None
    top_groups: Optional[int] = None
    months_back: Optional[int] = None
    homogeneous_group: Optional[str] = None

    # Filtros adicionales (Issue #469)
    product_type: Optional[str] = None  # "medicamento", "venta_libre", "veterinario"
    min_sales_count: Optional[int] = None  # Mínimo de ventas para incluir producto

    # Filtros de inventario (Issue #470)
    snapshot_date: Optional[date] = None  # Fecha específica del snapshot
    min_stock: Optional[int] = None  # Stock mínimo
    max_stock: Optional[int] = None  # Stock máximo
    days_without_sale: Optional[int] = None  # Días sin venta (para dead stock)
    abc_class: Optional[str] = None  # Clasificación ABC (A, B, C)

    # Filtro de empleados (Issue #472 - P1 review)
    employee_names: Optional[List[str]] = None  # Filtrar por empleados específicos

    # Filtros de segmentación genéricos (Issue #472 - Treemap migration)
    is_prescription: Optional[bool] = None  # True=requiere receta, False=venta libre
    is_substitutable: Optional[bool] = None  # True=tiene grupo homogéneo con genéricos
    partner_laboratory_codes: Optional[List[str]] = None  # Códigos de laboratorios partners

    # Filtros de prescripción (Issue #484)
    prescription_categories: Optional[List[str]] = None  # Categorías de prescripción
    atc_codes: Optional[List[str]] = None  # Códigos ATC para filtrar
    atc_level: Optional[int] = None  # Nivel de agregación ATC (1-5)

    def to_dict(self) -> Dict[str, Any]:
        """Convertir a diccionario para cache keys"""
        return {
            "pharmacy_id": self.pharmacy_id,
            "start_date": self.start_date.isoformat() if self.start_date else None,
            "end_date": self.end_date.isoformat() if self.end_date else None,
            "product_codes": tuple(self.product_codes) if self.product_codes else None,
            "therapeutic_categories": (tuple(self.therapeutic_categories) if self.therapeutic_categories else None),
            "laboratories": tuple(self.laboratories) if self.laboratories else None,
            "requires_prescription": self.requires_prescription,
            "is_generic": self.is_generic,
            "min_amount": self.min_amount,
            "max_amount": self.max_amount,
            # Campos específicos de genéricos
            "partner_laboratories": (tuple(self.partner_laboratories) if self.partner_laboratories else None),
            "discount_percentage": self.discount_percentage,
            "top_groups": self.top_groups,
            "months_back": self.months_back,
            "homogeneous_group": self.homogeneous_group,
            # Campos adicionales (Issue #469)
            "product_type": self.product_type,
            "min_sales_count": self.min_sales_count,
            # Campos de inventario (Issue #470)
            "snapshot_date": self.snapshot_date.isoformat() if self.snapshot_date else None,
            "min_stock": self.min_stock,
            "max_stock": self.max_stock,
            "days_without_sale": self.days_without_sale,
            "abc_class": self.abc_class,
            # Filtro empleados (Issue #472)
            "employee_names": tuple(self.employee_names) if self.employee_names else None,
            # Filtros segmentación genéricos (Issue #472 - Treemap)
            "is_prescription": self.is_prescription,
            "is_substitutable": self.is_substitutable,
            "partner_laboratory_codes": (
                tuple(self.partner_laboratory_codes) if self.partner_laboratory_codes else None
            ),
            # Filtros prescripción (Issue #484)
            "prescription_categories": (
                tuple(self.prescription_categories) if self.prescription_categories else None
            ),
            "atc_codes": tuple(self.atc_codes) if self.atc_codes else None,
            "atc_level": self.atc_level,
        }


class QueryContext:
    """
    Context de consulta maestro - Equivale al modelo de datos de Power BI.

    Proporciona el "dataframe maestro" resultado del merge/join entre:
    - SalesData (ventas)
    - SalesEnrichment (enriquecimiento)
    - ProductCatalog (datos de productos)
    """

    def __init__(self, db_session: Session, filters: FilterContext):
        self.db = db_session
        self.filters = filters
        self._base_query = None
        self._sales_enrichment_query = None  # Issue #472: Solo JOIN a SalesEnrichment
        self._enriched_query = None
        self._inventory_query = None

    @property
    def base_query(self):
        """Query base sobre sales_data con filtros aplicados"""
        if self._base_query is None:
            self._base_query = self._build_base_query()
        return self._base_query

    @property
    def sales_enrichment_query(self):
        """
        Query con JOIN solo a SalesEnrichment (sin ProductCatalog).

        Issue #472: Muchos registros venta_libre NO tienen product_catalog_id.
        Para filtros que solo necesitan SalesEnrichment (product_type),
        usar este query preserva todos los registros enriquecidos.
        """
        if self._sales_enrichment_query is None:
            self._sales_enrichment_query = self._build_sales_enrichment_query()
        return self._sales_enrichment_query

    @property
    def enriched_query(self):
        """Query enriquecida con JOIN a product_catalog"""
        if self._enriched_query is None:
            self._enriched_query = self._build_enriched_query()
        return self._enriched_query

    @property
    def inventory_query(self):
        """Query sobre inventory_snapshots con filtros aplicados (Issue #470)"""
        if self._inventory_query is None:
            self._inventory_query = self._build_inventory_query()
        return self._inventory_query

    def _build_base_query(self):
        """Construir query base con filtros de ventas"""
        query = self.db.query(SalesData).filter(SalesData.pharmacy_id == self.filters.pharmacy_id)

        # Filtros temporales
        if self.filters.start_date:
            query = query.filter(SalesData.sale_date >= self.filters.start_date)
        if self.filters.end_date:
            query = query.filter(SalesData.sale_date <= self.filters.end_date)

        # Filtros de producto
        if self.filters.product_codes:
            query = query.filter(SalesData.codigo_nacional.in_(self.filters.product_codes))

        # Filtros de importe
        if self.filters.min_amount:
            query = query.filter(SalesData.total_amount >= self.filters.min_amount)
        if self.filters.max_amount:
            query = query.filter(SalesData.total_amount <= self.filters.max_amount)

        # Filtro de empleados (Issue #472 - P1 review)
        if self.filters.employee_names:
            query = query.filter(SalesData.employee_name.in_(self.filters.employee_names))

        return query

    def _build_sales_enrichment_query(self):
        """
        Construir query con JOIN solo a SalesEnrichment (sin ProductCatalog).

        Issue #472: Para filtros de product_type (prescription/venta_libre),
        muchos registros NO tienen product_catalog_id. Este query los preserva.
        """
        query = self.base_query.join(SalesEnrichment, SalesData.id == SalesEnrichment.sales_data_id)

        # Filtro prescripción/venta libre basado en product_type
        if self.filters.is_prescription is not None:
            if self.filters.is_prescription:
                query = query.filter(SalesEnrichment.product_type == "prescription")
            else:
                query = query.filter(SalesEnrichment.product_type == "venta_libre")

        # Filtro por product_type genérico
        if self.filters.product_type:
            query = query.filter(SalesEnrichment.product_type == self.filters.product_type)

        return query

    def _build_enriched_query(self):
        """Construir query enriquecida con datos de producto"""
        query = self.base_query.join(SalesEnrichment, SalesData.id == SalesEnrichment.sales_data_id).join(
            ProductCatalog, SalesEnrichment.product_catalog_id == ProductCatalog.id
        )

        # Filtros específicos de productos enriquecidos
        if self.filters.therapeutic_categories:
            query = query.filter(SalesEnrichment.therapeutic_category.in_(self.filters.therapeutic_categories))

        if self.filters.laboratories:
            query = query.filter(ProductCatalog.nomen_laboratorio.in_(self.filters.laboratories))

        if self.filters.requires_prescription is not None:
            query = query.filter(ProductCatalog.cima_requiere_receta == self.filters.requires_prescription)

        if self.filters.is_generic is not None:
            query = query.filter(
                ProductCatalog.nomen_tipo_farmaco == ("GENERICO" if self.filters.is_generic else "MARCA")
            )

        # Filtro por tipo de producto (Issue #469)
        if self.filters.product_type:
            query = query.filter(SalesEnrichment.product_type == self.filters.product_type)

        # Filtro prescripción/venta libre (Issue #472 - Treemap)
        # Usa SalesEnrichment.product_type que refleja correctamente la clasificación
        # - prescription: ~80% (medicamentos con receta)
        # - venta_libre: ~20% (OTC, parafarmacia)
        if self.filters.is_prescription is not None:
            if self.filters.is_prescription:
                query = query.filter(SalesEnrichment.product_type == "prescription")
            else:
                query = query.filter(SalesEnrichment.product_type == "venta_libre")

        # Filtro sustituible - grupos homogéneos con genéricos (Issue #472 - Treemap)
        if self.filters.is_substitutable is not None:
            substitutable_groups = self.get_substitutable_groups_subquery()
            if self.filters.is_substitutable:
                query = query.filter(ProductCatalog.nomen_codigo_homogeneo.in_(substitutable_groups))
            else:
                query = query.filter(
                    ~ProductCatalog.nomen_codigo_homogeneo.in_(substitutable_groups)
                    | ProductCatalog.nomen_codigo_homogeneo.is_(None)
                )

        # Filtro cobertura por partners (Issue #472 - Treemap)
        if self.filters.partner_laboratory_codes:
            covered_groups = self.get_partner_covered_groups_subquery(
                self.filters.partner_laboratory_codes
            )
            query = query.filter(ProductCatalog.nomen_codigo_homogeneo.in_(covered_groups))

        return query

    def get_latest_inventory_costs_subquery(self):
        """
        Obtener subquery con unit_cost del snapshot más reciente por producto.

        Issue #496: Usado por GMROI para calcular COGS con costes reales (PMC).

        Devuelve subquery con:
        - product_code: Código nacional del producto
        - ean13: EAN-13 alternativo
        - inventory_unit_cost: Coste unitario (PMC) o fallback a PVP

        El join en medidas de rentabilidad usa:
        1. product_code = codigo_nacional (primary)
        2. ean13 = ean13 (fallback si no hay match por CN)

        Nota: Incluye todos los productos con coste registrado, independientemente
        del stock actual. Esto maximiza la cobertura de costes para productos que
        se vendieron completamente (alta rotación).
        """
        from sqlalchemy import func, or_
        from sqlalchemy.sql import literal_column

        # Subquery para obtener la fecha del snapshot más reciente
        latest_date_subq = (
            self.db.query(func.max(InventorySnapshot.snapshot_date))
            .filter(InventorySnapshot.pharmacy_id == self.filters.pharmacy_id)
            .scalar_subquery()
        )

        # Subquery de costes por producto del snapshot más reciente
        # Fallback: unit_cost → unit_price → 0
        # Issue #496 code review: Incluir productos con coste aunque stock=0
        return (
            self.db.query(
                InventorySnapshot.product_code,
                InventorySnapshot.ean13,
                func.coalesce(
                    InventorySnapshot.unit_cost,
                    InventorySnapshot.unit_price,
                    literal_column("0")
                ).label("inventory_unit_cost")
            )
            .filter(
                InventorySnapshot.pharmacy_id == self.filters.pharmacy_id,
                InventorySnapshot.snapshot_date == latest_date_subq,
                # Solo productos con coste registrado (PMC o PVP)
                or_(
                    InventorySnapshot.unit_cost.isnot(None),
                    InventorySnapshot.unit_price.isnot(None),
                ),
            )
            .subquery()
        )

    def _build_inventory_query(self):
        """Construir query sobre inventory_snapshots con filtros (Issue #470)"""
        from datetime import date as date_type

        query = self.db.query(InventorySnapshot).filter(
            InventorySnapshot.pharmacy_id == self.filters.pharmacy_id
        )

        # Filtro por fecha de snapshot (usa la más reciente si no se especifica)
        if self.filters.snapshot_date:
            query = query.filter(InventorySnapshot.snapshot_date == self.filters.snapshot_date)
        else:
            # Subquery para obtener el snapshot más reciente
            from sqlalchemy import func

            latest_date_subq = (
                self.db.query(func.max(InventorySnapshot.snapshot_date))
                .filter(InventorySnapshot.pharmacy_id == self.filters.pharmacy_id)
                .scalar_subquery()
            )
            query = query.filter(InventorySnapshot.snapshot_date == latest_date_subq)

        # Filtros de stock
        if self.filters.min_stock is not None:
            query = query.filter(InventorySnapshot.stock_quantity >= self.filters.min_stock)
        if self.filters.max_stock is not None:
            query = query.filter(InventorySnapshot.stock_quantity <= self.filters.max_stock)

        # Filtros de producto
        if self.filters.product_codes:
            query = query.filter(InventorySnapshot.product_code.in_(self.filters.product_codes))

        # Filtro por product_type (Issue #500: Separar inventario prescription/venta_libre)
        if self.filters.product_type:
            # Mapeo de tipos: frontend usa "medicamento" o "venta_libre"
            # DB usa "prescription" o "venta_libre"
            type_map = {
                "medicamento": "prescription",
                "prescription": "prescription",
                "venta_libre": "venta_libre",
                "otc": "venta_libre",
            }
            db_type = type_map.get(self.filters.product_type, self.filters.product_type)
            query = query.filter(InventorySnapshot.product_type == db_type)

        return query

    # =========================================================================
    # Dimension Helpers - Issue #472 (Power BI-style segmentation)
    # =========================================================================

    def get_substitutable_groups_subquery(self):
        """
        Subquery de grupos homogéneos que tienen AL MENOS un genérico disponible.

        Equivale al CTE `generic_homogeneous` del servicio partner_analysis:
        - nomen_codigo_homogeneo IS NOT NULL
        - nomen_tipo_farmaco = 'GENERICO'
        - nomen_estado = 'ALTA'

        Returns:
            Subquery de códigos homogéneos sustituibles
        """
        from sqlalchemy import distinct

        return (
            self.db.query(distinct(ProductCatalog.nomen_codigo_homogeneo))
            .filter(
                ProductCatalog.nomen_codigo_homogeneo.isnot(None),
                ProductCatalog.nomen_codigo_homogeneo != "",
                ProductCatalog.nomen_tipo_farmaco == "GENERICO",
                ProductCatalog.nomen_estado == "ALTA",
            )
            .scalar_subquery()
        )

    def get_partner_covered_groups_subquery(self, partner_codes: List[str]):
        """
        Subquery de grupos homogéneos cubiertos por laboratorios partners.

        Equivale al CTE `partner_availability` del servicio partner_analysis:
        - nomen_codigo_laboratorio IN (partner_codes)
        - nomen_estado = 'ALTA'

        Args:
            partner_codes: Lista de códigos de laboratorio de los partners

        Returns:
            Subquery de códigos homogéneos cubiertos por partners
        """
        from sqlalchemy import distinct

        if not partner_codes:
            # Sin partners, no hay cobertura
            return self.db.query(ProductCatalog.nomen_codigo_homogeneo).filter(False).scalar_subquery()

        return (
            self.db.query(distinct(ProductCatalog.nomen_codigo_homogeneo))
            .filter(
                ProductCatalog.nomen_codigo_homogeneo.isnot(None),
                ProductCatalog.nomen_codigo_laboratorio.in_(partner_codes),
                ProductCatalog.nomen_estado == "ALTA",
            )
            .scalar_subquery()
        )

    def get_aggregated_data(self, group_by_fields: List[str] = None) -> Dict[str, Any]:
        """
        Obtener datos agregados para análisis avanzado.

        OPTIMIZADO (Issue #547): Eliminado query.count() costoso que añadía ~2-3s.
        El total_records ahora es None - las medidas individuales ya retornan
        sus propios metadatos con conteos específicos cuando es necesario.
        """
        if group_by_fields:
            # Implementar grouping dinámico según sea necesario
            pass

        return {
            "context_summary": {
                "total_records": None,  # Removed: query.count() was adding ~2-3s latency
                "date_range": {
                    "start": self.filters.start_date,
                    "end": self.filters.end_date,
                },
                "filters_applied": len([f for f in self.filters.to_dict().values() if f is not None]),
            }
        }


class BaseMeasure(ABC):
    """
    Clase base para medidas farmacéuticas - Equivale a medidas de Power BI.

    Cada medida:
    1. Recibe un FilterContext
    2. Opera sobre el QueryContext (dataframe maestro)
    3. Retorna un valor calculado
    4. Es cacheable y reutilizable
    """

    def __init__(self):
        self.name = self.__class__.__name__
        self.description = ""
        self.unit = ""  # €, unidades, %, etc.
        self.dependencies: List[str] = []  # Otras medidas necesarias

    @abstractmethod
    def calculate(self, context: QueryContext) -> Union[float, int, Dict[str, Any]]:
        """
        Calcular la medida usando el contexto proporcionado.

        Args:
            context: QueryContext con filtros aplicados

        Returns:
            Valor calculado de la medida
        """
        pass

    def get_cache_key(self, filters: FilterContext) -> str:
        """Generar clave de cache única para esta medida y filtros"""
        filter_hash = hash(str(sorted(filters.to_dict().items())))
        return f"{self.name}_{filter_hash}"

    def validate_context(self, context: QueryContext) -> bool:
        """Validar que el contexto tiene los datos necesarios"""
        return True

    def get_metadata(self) -> Dict[str, Any]:
        """Metadatos de la medida para documentación/UI"""
        return {
            "name": self.name,
            "description": self.description,
            "unit": self.unit,
            "dependencies": self.dependencies,
            "category": getattr(self, "category", "General"),
        }


class MeasureRegistry:
    """Registro global de medidas disponibles - Como en Power BI"""

    def __init__(self):
        self._measures: Dict[str, BaseMeasure] = {}
        self._cache: Dict[str, Any] = {}
        self._cache_lock = Lock()  # Thread-safe cache for multi-worker Gunicorn (REGLA #7.6)

    def register(self, name: str, measure: BaseMeasure):
        """Registrar una medida nueva"""
        self._measures[name] = measure
        logger.info(f"Registered measure: {name}")

    def get_measure(self, name: str) -> Optional[BaseMeasure]:
        """Obtener una medida por nombre"""
        return self._measures.get(name)

    def list_measures(self) -> List[Dict[str, Any]]:
        """Listar todas las medidas disponibles"""
        return [measure.get_metadata() for measure in self._measures.values()]

    def calculate_measure(self, measure_name: str, context: QueryContext, use_cache: bool = True) -> Any:
        """
        Calcular una medida específica con cache opcional.

        Args:
            measure_name: Nombre de la medida
            context: Contexto de consulta
            use_cache: Si usar cache o no

        Returns:
            Valor calculado de la medida
        """
        measure = self.get_measure(measure_name)
        if not measure:
            raise ValueError(f"Measure '{measure_name}' not found")

        cache_key = measure.get_cache_key(context.filters)

        # Thread-safe cache read (double-check locking pattern)
        if use_cache:
            with self._cache_lock:
                if cache_key in self._cache:
                    logger.debug(f"Cache hit for measure: {measure_name}")
                    return self._cache[cache_key]

        if not measure.validate_context(context):
            raise ValueError(f"Invalid context for measure '{measure_name}'")

        # Calcular y cachear con thread-safety
        result = measure.calculate(context)
        if use_cache:
            with self._cache_lock:
                self._cache[cache_key] = result

        logger.debug(f"Calculated measure '{measure_name}': {result}")
        return result

    def clear_cache(self):
        """Limpiar cache de medidas (thread-safe)"""
        with self._cache_lock:
            self._cache.clear()
        logger.info("Measure cache cleared")
