"""
VentaLibreService - Servicio para dashboard de ventas OTC por NECESIDAD (Issue #461)

Proporciona agregaciones y listados de productos venta libre/parafarmacia
clasificados por categoría NECESIDAD (ml_category).
"""

from datetime import date, timedelta
from typing import Dict, List, Optional, Tuple
from uuid import UUID

import structlog
from sqlalchemy import and_, case, cast, distinct, func, or_, String
from sqlalchemy.orm import Session

from app.models.sales_data import SalesData
from app.models.sales_enrichment import SalesEnrichment
from app.models.product_catalog_venta_libre import ProductCatalogVentaLibre
from app.models.inventory_snapshot import InventorySnapshot
from app.schemas.symptom_taxonomy import (
    NecesidadPrincipal,
    get_all_subcategories,
)

logger = structlog.get_logger(__name__)


def _expand_to_specific_categories(necesidad: str) -> List[str]:
    """
    Expand a NecesidadPrincipal to all its NecesidadEspecifica subcategories.

    If necesidad is already a specific category, returns it as a single-item list.
    If necesidad is a principal category, returns all its subcategories.

    Args:
        necesidad: Category string (could be principal or specific)

    Returns:
        List of specific categories to filter by
    """
    # Check if it's a principal category
    try:
        NecesidadPrincipal(necesidad)
        # It's a principal category - expand to all subcategories
        subcategories = get_all_subcategories(necesidad)
        if subcategories:
            logger.debug(
                "ventalibre.expand_principal_category",
                principal=necesidad,
                subcategories_count=len(subcategories),
            )
            return subcategories
    except ValueError:
        # Not a principal category - it's a specific one
        pass

    # Return as-is (specific category or unknown)
    return [necesidad]

# Categorías fallback que requieren revisión manual (W14b)
FALLBACK_CATEGORIES = {
    "otros",
    "parafarmacia_otros",
    "sin_clasificar",
    "unknown",
    "desconocido",
}

# Umbral de confianza para revisión manual
CONFIDENCE_THRESHOLD = 0.60


class VentaLibreService:
    """
    Servicio para análisis de ventas de productos venta libre por categoría NECESIDAD.

    Funcionalidades:
    - Agregación de ventas por ml_category (para treemap)
    - Listado paginado de productos con filtros
    - KPIs del dashboard (total ventas, productos, categorías, cobertura)
    - Listado de categorías disponibles
    - Indicadores de salud del dato (pending_corrections) - W14b
    """

    def _get_pending_corrections_by_category(
        self,
        db: Session,
        pharmacy_id: UUID
    ) -> Dict[str, int]:
        """
        Contar productos con correcciones pendientes por categoría (W14b).

        Un producto requiere corrección si:
        - human_verified=False AND (categoría fallback OR confidence < 0.60)

        Args:
            db: Sesión de base de datos
            pharmacy_id: ID de la farmacia

        Returns:
            Dict mapeando ml_category -> conteo de productos pendientes
        """
        try:
            # Subquery para obtener productos únicos con sus categorías
            query = (
                db.query(
                    SalesEnrichment.ml_category,
                    func.count(func.distinct(SalesData.codigo_nacional)).label("pending_count")
                )
                .join(SalesData, SalesEnrichment.sales_data_id == SalesData.id)
                .outerjoin(
                    ProductCatalogVentaLibre,
                    SalesEnrichment.venta_libre_product_id == ProductCatalogVentaLibre.id
                )
                .filter(
                    SalesData.pharmacy_id == pharmacy_id,
                    SalesEnrichment.product_type == "venta_libre",
                    SalesEnrichment.ml_category.isnot(None),
                    # Condición de "pendiente de corrección":
                    # human_verified=False (o no existe en catálogo) AND
                    # (fallback_category OR low_confidence)
                    or_(
                        ProductCatalogVentaLibre.human_verified == False,
                        ProductCatalogVentaLibre.id.is_(None)  # No existe en catálogo
                    ),
                    or_(
                        SalesEnrichment.ml_category.in_(FALLBACK_CATEGORIES),
                        SalesEnrichment.ml_confidence < CONFIDENCE_THRESHOLD
                    )
                )
                .group_by(SalesEnrichment.ml_category)
            )

            results = query.all()
            return {row.ml_category: int(row.pending_count) for row in results}

        except Exception as e:
            logger.warning(
                "ventalibre.pending_corrections.error",
                pharmacy_id=str(pharmacy_id),
                error=str(e)
            )
            return {}

    def get_sales_by_necesidad(
        self,
        db: Session,
        pharmacy_id: UUID,
        date_from: Optional[date] = None,
        date_to: Optional[date] = None,
        employee_ids: Optional[List[str]] = None
    ) -> Dict:
        """
        Agregar ventas por ml_category para treemap.

        Args:
            db: Sesión de base de datos
            pharmacy_id: ID de la farmacia
            date_from: Fecha inicio del rango (opcional)
            date_to: Fecha fin del rango (opcional)
            employee_ids: Lista de IDs de empleados (None/vacío = todos)

        Returns:
            Dict con nodes para treemap:
            {
                "nodes": [
                    {"category": "proteccion_solar", "sales": 15000.0, "count": 234, "percentage": 25.3},
                    ...
                ],
                "total_sales": 60000.0,
                "total_products": 1247
            }
        """
        try:
            # Query base: ventas de venta_libre con ml_category
            query = (
                db.query(
                    SalesEnrichment.ml_category,
                    func.sum(SalesData.total_amount).label("total_sales"),
                    func.count(func.distinct(SalesData.codigo_nacional)).label("product_count"),
                    func.sum(SalesData.quantity).label("total_units")
                )
                .join(SalesData, SalesEnrichment.sales_data_id == SalesData.id)
                .filter(
                    SalesData.pharmacy_id == pharmacy_id,
                    SalesEnrichment.product_type == "venta_libre",
                    SalesEnrichment.ml_category.isnot(None),
                    SalesEnrichment.ml_category != ""
                )
            )

            # Aplicar filtros de fecha
            if date_from:
                query = query.filter(SalesData.sale_date >= date_from)
            if date_to:
                query = query.filter(SalesData.sale_date <= date_to)

            # Aplicar filtro de empleados (usa employee_name, no employee_id)
            if employee_ids:
                # Manejar valor especial "__sin_empleado__"
                has_null = "__sin_empleado__" in employee_ids
                other_names = [e for e in employee_ids if e != "__sin_empleado__"]

                if has_null and other_names:
                    query = query.filter(
                        or_(
                            SalesData.employee_name.in_(other_names),
                            SalesData.employee_name.is_(None)
                        )
                    )
                elif has_null:
                    query = query.filter(SalesData.employee_name.is_(None))
                else:
                    query = query.filter(SalesData.employee_name.in_(other_names))

            # Agrupar por categoría
            query = query.group_by(SalesEnrichment.ml_category)

            results = query.all()

            # Calcular totales
            total_sales = sum(float(r.total_sales or 0) for r in results)
            total_products = sum(int(r.product_count or 0) for r in results)

            # W14b: Obtener conteo de correcciones pendientes por categoría
            pending_by_category = self._get_pending_corrections_by_category(db, pharmacy_id)

            # Construir nodos para treemap
            nodes = []
            for row in results:
                category = row.ml_category
                sales = float(row.total_sales or 0)
                count = int(row.product_count or 0)
                percentage = (sales / total_sales * 100) if total_sales > 0 else 0

                nodes.append({
                    "category": category,
                    "sales": round(sales, 2),
                    "count": count,
                    "percentage": round(percentage, 2),
                    # W14b: Indicador de salud del dato
                    "pending_corrections": pending_by_category.get(category, 0)
                })

            # Ordenar por ventas descendente
            nodes.sort(key=lambda x: x["sales"], reverse=True)

            logger.info(
                "ventalibre.sales_by_necesidad.success",
                pharmacy_id=str(pharmacy_id),
                categories_count=len(nodes),
                total_sales=total_sales
            )

            return {
                "nodes": nodes,
                "total_sales": round(total_sales, 2),
                "total_products": total_products
            }

        except Exception as e:
            logger.error(
                "ventalibre.sales_by_necesidad.error",
                pharmacy_id=str(pharmacy_id),
                error=str(e)
            )
            return {
                "nodes": [],
                "total_sales": 0,
                "total_products": 0
            }

    def get_products_list(
        self,
        db: Session,
        pharmacy_id: UUID,
        necesidad: Optional[str] = None,
        search: Optional[str] = None,
        employee_ids: Optional[List[str]] = None,
        date_from: Optional[date] = None,
        date_to: Optional[date] = None,
        limit: int = 50,
        offset: int = 0
    ) -> Dict:
        """
        Productos con datos de ventas, filtrable por NECESIDAD, búsqueda y empleado.

        Args:
            db: Sesión de base de datos
            pharmacy_id: ID de la farmacia
            necesidad: Filtrar por categoría NECESIDAD específica
            search: Texto de búsqueda en nombre de producto
            employee_ids: Lista de IDs de empleados (None/vacío = todos)
            date_from: Fecha inicio del rango
            date_to: Fecha fin del rango
            limit: Límite de resultados
            offset: Offset para paginación

        Returns:
            Dict con productos paginados:
            {
                "products": [...],
                "total": 1247,
                "page": 1,
                "pages": 25
            }
        """
        try:
            # Subquery para agregar ventas por producto
            sales_subq = (
                db.query(
                    SalesData.codigo_nacional,
                    SalesData.product_name,
                    func.sum(SalesData.total_amount).label("total_sales"),
                    func.sum(SalesData.quantity).label("total_units"),
                    func.max(SalesEnrichment.ml_category).label("ml_category"),
                    func.max(SalesEnrichment.ml_subcategory).label("ml_subcategory"),  # Issue #523: L2 subcategory
                    func.max(SalesEnrichment.ml_confidence).label("ml_confidence"),
                    func.max(SalesEnrichment.detected_brand).label("detected_brand"),
                    func.max(cast(SalesEnrichment.venta_libre_product_id, String)).label("venta_libre_product_id")
                )
                .join(SalesEnrichment, SalesEnrichment.sales_data_id == SalesData.id)
                .filter(
                    SalesData.pharmacy_id == pharmacy_id,
                    SalesEnrichment.product_type == "venta_libre"
                )
            )

            # Aplicar filtros de fecha
            if date_from:
                sales_subq = sales_subq.filter(SalesData.sale_date >= date_from)
            if date_to:
                sales_subq = sales_subq.filter(SalesData.sale_date <= date_to)

            # Aplicar filtro de categoría (soporta NecesidadPrincipal y NecesidadEspecifica)
            if necesidad:
                # Expand principal category to all its specific subcategories
                categories_to_filter = _expand_to_specific_categories(necesidad)
                if len(categories_to_filter) == 1:
                    sales_subq = sales_subq.filter(SalesEnrichment.ml_category == categories_to_filter[0])
                else:
                    sales_subq = sales_subq.filter(SalesEnrichment.ml_category.in_(categories_to_filter))

            # Aplicar filtro de búsqueda
            if search:
                search_pattern = f"%{search}%"
                sales_subq = sales_subq.filter(SalesData.product_name.ilike(search_pattern))

            # Aplicar filtro de empleados (usa employee_name, no employee_id)
            if employee_ids:
                has_null = "__sin_empleado__" in employee_ids
                other_names = [e for e in employee_ids if e != "__sin_empleado__"]

                if has_null and other_names:
                    sales_subq = sales_subq.filter(
                        or_(
                            SalesData.employee_name.in_(other_names),
                            SalesData.employee_name.is_(None)
                        )
                    )
                elif has_null:
                    sales_subq = sales_subq.filter(SalesData.employee_name.is_(None))
                else:
                    sales_subq = sales_subq.filter(SalesData.employee_name.in_(other_names))

            # Agrupar por producto
            sales_subq = sales_subq.group_by(SalesData.codigo_nacional, SalesData.product_name)

            # Contar total antes de aplicar limit/offset
            count_subq = sales_subq.subquery()
            total = db.query(func.count()).select_from(count_subq).scalar() or 0

            # Aplicar orden y paginación
            sales_subq = sales_subq.order_by(func.sum(SalesData.total_amount).desc())
            sales_subq = sales_subq.limit(limit).offset(offset)

            results = sales_subq.all()

            # Construir lista de productos
            products = []
            for row in results:
                products.append({
                    "id": str(row.venta_libre_product_id) if row.venta_libre_product_id else row.codigo_nacional,
                    "product_code": row.codigo_nacional,
                    "product_name": row.product_name,
                    "ml_category": row.ml_category,
                    "ml_subcategory": row.ml_subcategory,  # Issue #523: L2 subcategory
                    "ml_confidence": float(row.ml_confidence) if row.ml_confidence else 0.0,
                    "detected_brand": row.detected_brand,
                    "total_sales": round(float(row.total_sales or 0), 2),
                    "total_units": int(row.total_units or 0)
                })

            # Calcular páginas
            pages = (total + limit - 1) // limit if limit > 0 else 0
            page = (offset // limit) + 1 if limit > 0 else 1

            logger.info(
                "ventalibre.products_list.success",
                pharmacy_id=str(pharmacy_id),
                total=total,
                returned=len(products)
            )

            return {
                "products": products,
                "total": total,
                "page": page,
                "pages": pages
            }

        except Exception as e:
            logger.error(
                "ventalibre.products_list.error",
                pharmacy_id=str(pharmacy_id),
                error=str(e)
            )
            return {
                "products": [],
                "total": 0,
                "page": 1,
                "pages": 0
            }

    def get_kpis(
        self,
        db: Session,
        pharmacy_id: UUID,
        date_from: Optional[date] = None,
        date_to: Optional[date] = None,
        employee_ids: Optional[List[str]] = None
    ) -> Dict:
        """
        KPIs del dashboard venta libre.

        Args:
            db: Sesión de base de datos
            pharmacy_id: ID de la farmacia
            date_from: Fecha inicio del rango
            date_to: Fecha fin del rango
            employee_ids: Lista de IDs de empleados

        Returns:
            Dict con KPIs:
            {
                "total_sales": 45230.50,
                "total_products": 1247,
                "categories_count": 32,
                "coverage_percent": 94.2,
                "yoy_growth": 12.3
            }
        """
        try:
            # Query base
            base_filter = [
                SalesData.pharmacy_id == pharmacy_id,
                SalesEnrichment.product_type == "venta_libre"
            ]

            if date_from:
                base_filter.append(SalesData.sale_date >= date_from)
            if date_to:
                base_filter.append(SalesData.sale_date <= date_to)

            # Aplicar filtro de empleados a base_filter (usa employee_name, no employee_id)
            if employee_ids:
                has_null = "__sin_empleado__" in employee_ids
                other_names = [e for e in employee_ids if e != "__sin_empleado__"]

                if has_null and other_names:
                    base_filter.append(
                        or_(
                            SalesData.employee_name.in_(other_names),
                            SalesData.employee_name.is_(None)
                        )
                    )
                elif has_null:
                    base_filter.append(SalesData.employee_name.is_(None))
                else:
                    base_filter.append(SalesData.employee_name.in_(other_names))

            # Ventas totales de venta libre
            total_query = (
                db.query(
                    func.sum(SalesData.total_amount).label("total_sales"),
                    func.count(func.distinct(SalesData.codigo_nacional)).label("total_products")
                )
                .join(SalesEnrichment, SalesEnrichment.sales_data_id == SalesData.id)
                .filter(*base_filter)
            )

            total_result = total_query.first()
            total_sales = float(total_result.total_sales or 0) if total_result else 0
            total_products = int(total_result.total_products or 0) if total_result else 0

            # Categorías únicas con ventas
            categories_query = (
                db.query(func.count(func.distinct(SalesEnrichment.ml_category)))
                .join(SalesData, SalesEnrichment.sales_data_id == SalesData.id)
                .filter(
                    *base_filter,
                    SalesEnrichment.ml_category.isnot(None),
                    SalesEnrichment.ml_category != ""
                )
            )
            categories_count = categories_query.scalar() or 0

            # Cobertura: productos con ml_category vs total venta libre
            classified_query = (
                db.query(func.count(func.distinct(SalesData.codigo_nacional)))
                .join(SalesEnrichment, SalesEnrichment.sales_data_id == SalesData.id)
                .filter(
                    *base_filter,
                    SalesEnrichment.ml_category.isnot(None),
                    SalesEnrichment.ml_category != "",
                    SalesEnrichment.ml_category != "otros",
                    SalesEnrichment.ml_category != "sin_clasificar"
                )
            )
            classified_count = classified_query.scalar() or 0

            coverage_percent = (classified_count / total_products * 100) if total_products > 0 else 0

            # YoY growth (comparar con mismo período del año anterior)
            yoy_growth = 0.0
            if date_from and date_to:
                # Calcular período anterior
                days_range = (date_to - date_from).days
                prev_date_to = date_from - timedelta(days=1)
                prev_date_from = prev_date_to - timedelta(days=days_range)

                prev_filter = [
                    SalesData.pharmacy_id == pharmacy_id,
                    SalesEnrichment.product_type == "venta_libre",
                    SalesData.sale_date >= prev_date_from,
                    SalesData.sale_date <= prev_date_to
                ]

                # Aplicar filtro de empleados también al período anterior (usa employee_name)
                if employee_ids:
                    has_null = "__sin_empleado__" in employee_ids
                    other_names = [e for e in employee_ids if e != "__sin_empleado__"]

                    if has_null and other_names:
                        prev_filter.append(
                            or_(
                                SalesData.employee_name.in_(other_names),
                                SalesData.employee_name.is_(None)
                            )
                        )
                    elif has_null:
                        prev_filter.append(SalesData.employee_name.is_(None))
                    else:
                        prev_filter.append(SalesData.employee_name.in_(other_names))

                prev_query = (
                    db.query(func.sum(SalesData.total_amount))
                    .join(SalesEnrichment, SalesEnrichment.sales_data_id == SalesData.id)
                    .filter(*prev_filter)
                )
                prev_sales = float(prev_query.scalar() or 0)

                if prev_sales > 0:
                    yoy_growth = ((total_sales - prev_sales) / prev_sales) * 100

            logger.info(
                "ventalibre.kpis.success",
                pharmacy_id=str(pharmacy_id),
                total_sales=total_sales,
                total_products=total_products
            )

            return {
                "total_sales": round(total_sales, 2),
                "total_products": total_products,
                "categories_count": categories_count,
                "coverage_percent": round(coverage_percent, 1),
                "yoy_growth": round(yoy_growth, 1)
            }

        except Exception as e:
            logger.error(
                "ventalibre.kpis.error",
                pharmacy_id=str(pharmacy_id),
                error=str(e)
            )
            return {
                "total_sales": 0,
                "total_products": 0,
                "categories_count": 0,
                "coverage_percent": 0,
                "yoy_growth": 0
            }

    def get_categories(
        self,
        db: Session,
        pharmacy_id: UUID
    ) -> Dict:
        """
        Lista de categorías NECESIDAD disponibles con conteos.

        Args:
            db: Sesión de base de datos
            pharmacy_id: ID de la farmacia

        Returns:
            Dict con categorías:
            {
                "categories": [
                    {"value": "proteccion_solar", "label": "Proteccion Solar", "count": 234},
                    ...
                ]
            }
        """
        try:
            query = (
                db.query(
                    SalesEnrichment.ml_category,
                    func.count(func.distinct(SalesData.codigo_nacional)).label("count")
                )
                .join(SalesData, SalesEnrichment.sales_data_id == SalesData.id)
                .filter(
                    SalesData.pharmacy_id == pharmacy_id,
                    SalesEnrichment.product_type == "venta_libre",
                    SalesEnrichment.ml_category.isnot(None),
                    SalesEnrichment.ml_category != ""
                )
                .group_by(SalesEnrichment.ml_category)
                .order_by(func.count(func.distinct(SalesData.codigo_nacional)).desc())
            )

            results = query.all()

            categories = []
            for row in results:
                category = row.ml_category
                label = category.replace("_", " ").title()
                categories.append({
                    "value": category,
                    "label": label,
                    "count": row.count
                })

            logger.info(
                "ventalibre.categories.success",
                pharmacy_id=str(pharmacy_id),
                count=len(categories)
            )

            return {"categories": categories}

        except Exception as e:
            logger.error(
                "ventalibre.categories.error",
                pharmacy_id=str(pharmacy_id),
                error=str(e)
            )
            return {"categories": []}

    def get_time_series(
        self,
        db: Session,
        pharmacy_id: UUID,
        date_from: Optional[date] = None,
        date_to: Optional[date] = None,
        employee_ids: Optional[List[str]] = None,
        period_months: int = 12
    ) -> Dict:
        """
        Serie temporal mensual de ventas por categoría NECESIDAD (Issue #491).

        Devuelve datos en formato long/flat para gráfico de evolución temporal.
        Agrupa por mes (YYYY-MM) y categoría principal NECESIDAD.

        Args:
            db: Sesión de base de datos
            pharmacy_id: ID de la farmacia
            date_from: Fecha inicio (si None, usa period_months hacia atrás)
            date_to: Fecha fin (si None, usa hoy)
            employee_ids: Lista de IDs de empleados (None/vacío = todos)
            period_months: Meses hacia atrás si no se especifica date_from

        Returns:
            Dict con time_series y category_summary:
            {
                "time_series": [
                    {"period": "2024-01", "category": "proteccion_solar", "sales": 1500.0, "units": 234},
                    ...
                ],
                "category_summary": [
                    {"category": "proteccion_solar", "total_sales": 18000.0, "total_units": 2800},
                    ...
                ],
                "total_sales": 60000.0
            }
        """
        try:
            # Determinar rango de fechas
            if not date_to:
                date_to = date.today()
            if not date_from:
                date_from = date_to - timedelta(days=period_months * 30)

            # Query base: ventas de venta_libre agrupadas por mes y categoría
            query = (
                db.query(
                    func.to_char(SalesData.sale_date, 'YYYY-MM').label('period'),
                    SalesEnrichment.ml_category.label('category'),
                    func.sum(SalesData.total_amount).label('sales'),
                    func.sum(SalesData.quantity).label('units')
                )
                .join(SalesData, SalesEnrichment.sales_data_id == SalesData.id)
                .filter(
                    SalesData.pharmacy_id == pharmacy_id,
                    SalesEnrichment.product_type == "venta_libre",
                    SalesEnrichment.ml_category.isnot(None),
                    SalesEnrichment.ml_category != "",
                    SalesData.sale_date >= date_from,
                    SalesData.sale_date <= date_to
                )
            )

            # Aplicar filtro de empleados
            if employee_ids:
                has_null = "__sin_empleado__" in employee_ids
                other_names = [e for e in employee_ids if e != "__sin_empleado__"]

                if has_null and other_names:
                    query = query.filter(
                        or_(
                            SalesData.employee_name.in_(other_names),
                            SalesData.employee_name.is_(None)
                        )
                    )
                elif has_null:
                    query = query.filter(SalesData.employee_name.is_(None))
                else:
                    query = query.filter(SalesData.employee_name.in_(other_names))

            # Agrupar por mes y categoría
            query = query.group_by(
                func.to_char(SalesData.sale_date, 'YYYY-MM'),
                SalesEnrichment.ml_category
            )

            results = query.all()

            # Construir time_series
            time_series = []
            category_totals = {}  # Para category_summary

            for row in results:
                period = row.period
                category = row.category
                sales = float(row.sales or 0)
                units = int(row.units or 0)

                time_series.append({
                    "period": period,
                    "category": category,
                    "sales": round(sales, 2),
                    "units": units
                })

                # Acumular totales por categoría
                if category not in category_totals:
                    category_totals[category] = {"sales": 0, "units": 0}
                category_totals[category]["sales"] += sales
                category_totals[category]["units"] += units

            # Ordenar time_series por período y categoría
            time_series.sort(key=lambda x: (x["period"], x["category"]))

            # Construir category_summary ordenado por ventas
            category_summary = [
                {
                    "category": cat,
                    "total_sales": round(totals["sales"], 2),
                    "total_units": totals["units"]
                }
                for cat, totals in sorted(
                    category_totals.items(),
                    key=lambda x: x[1]["sales"],
                    reverse=True
                )
            ]

            total_sales = sum(c["total_sales"] for c in category_summary)

            logger.info(
                "ventalibre.time_series.success",
                pharmacy_id=str(pharmacy_id),
                periods=len(set(ts["period"] for ts in time_series)),
                categories=len(category_summary),
                total_sales=total_sales
            )

            return {
                "time_series": time_series,
                "category_summary": category_summary,
                "total_sales": round(total_sales, 2),
                "date_from": date_from.isoformat(),
                "date_to": date_to.isoformat()
            }

        except Exception as e:
            logger.error(
                "ventalibre.time_series.error",
                pharmacy_id=str(pharmacy_id),
                error=str(e)
            )
            return {
                "time_series": [],
                "category_summary": [],
                "total_sales": 0,
                "date_from": None,
                "date_to": None
            }

    def get_yoy_comparison(
        self,
        db: Session,
        pharmacy_id: UUID,
        date_from: Optional[date] = None,
        date_to: Optional[date] = None,
        employee_ids: Optional[List[str]] = None
    ) -> Dict:
        """
        Comparación YoY (Year-over-Year) por categoría NECESIDAD (Issue #491).

        Compara ventas del período actual vs mismo período del año anterior.

        Args:
            db: Sesión de base de datos
            pharmacy_id: ID de la farmacia
            date_from: Fecha inicio período actual
            date_to: Fecha fin período actual
            employee_ids: Lista de IDs de empleados

        Returns:
            Dict con comparación YoY por categoría:
            {
                "categories": [
                    {
                        "category": "proteccion_solar",
                        "current_sales": 15000.0,
                        "previous_sales": 12000.0,
                        "variation_euros": 3000.0,
                        "variation_percent": 25.0,
                        "trend": "up"
                    },
                    ...
                ],
                "total_current": 60000.0,
                "total_previous": 50000.0,
                "total_variation_percent": 20.0
            }
        """
        try:
            # Determinar rango de fechas actual
            if not date_to:
                date_to = date.today()
            if not date_from:
                # Default: últimos 12 meses
                date_from = date_to - timedelta(days=365)

            # Calcular período anterior (mismo rango, año anterior)
            days_in_period = (date_to - date_from).days
            prev_date_to = date_from - timedelta(days=1)
            prev_date_from = prev_date_to - timedelta(days=days_in_period)

            def _get_sales_by_category(start_date: date, end_date: date) -> Dict[str, float]:
                """Helper para obtener ventas por categoría en un período."""
                query = (
                    db.query(
                        SalesEnrichment.ml_category,
                        func.sum(SalesData.total_amount).label('sales')
                    )
                    .join(SalesData, SalesEnrichment.sales_data_id == SalesData.id)
                    .filter(
                        SalesData.pharmacy_id == pharmacy_id,
                        SalesEnrichment.product_type == "venta_libre",
                        SalesEnrichment.ml_category.isnot(None),
                        SalesData.sale_date >= start_date,
                        SalesData.sale_date <= end_date
                    )
                )

                # Aplicar filtro de empleados
                if employee_ids:
                    has_null = "__sin_empleado__" in employee_ids
                    other_names = [e for e in employee_ids if e != "__sin_empleado__"]
                    if has_null and other_names:
                        query = query.filter(
                            or_(SalesData.employee_name.in_(other_names), SalesData.employee_name.is_(None))
                        )
                    elif has_null:
                        query = query.filter(SalesData.employee_name.is_(None))
                    else:
                        query = query.filter(SalesData.employee_name.in_(other_names))

                query = query.group_by(SalesEnrichment.ml_category)
                return {row.ml_category: float(row.sales or 0) for row in query.all()}

            # Obtener ventas de ambos períodos
            current_sales = _get_sales_by_category(date_from, date_to)
            previous_sales = _get_sales_by_category(prev_date_from, prev_date_to)

            # Combinar categorías de ambos períodos
            all_categories = set(current_sales.keys()) | set(previous_sales.keys())

            # Construir comparación
            categories = []
            for category in all_categories:
                current = current_sales.get(category, 0)
                previous = previous_sales.get(category, 0)
                variation_euros = current - previous
                variation_percent = ((current - previous) / previous * 100) if previous > 0 else (100 if current > 0 else 0)

                # Determinar tendencia
                if variation_percent > 5:
                    trend = "up"
                elif variation_percent < -5:
                    trend = "down"
                else:
                    trend = "stable"

                categories.append({
                    "category": category,
                    "current_sales": round(current, 2),
                    "previous_sales": round(previous, 2),
                    "variation_euros": round(variation_euros, 2),
                    "variation_percent": round(variation_percent, 1),
                    "trend": trend,
                    "sparkline_data": []  # Se poblará después
                })

            # Ordenar por variación absoluta (mayor impacto primero)
            categories.sort(key=lambda x: abs(x["variation_euros"]), reverse=True)

            # Obtener datos de sparkline (últimos 6 meses por categoría)
            sparkline_start = date_to - timedelta(days=180)  # ~6 meses
            sparkline_query = (
                db.query(
                    func.date_trunc('month', SalesData.sale_date).label('month'),
                    SalesEnrichment.ml_category,
                    func.sum(SalesData.total_amount).label('sales')
                )
                .join(SalesData, SalesEnrichment.sales_data_id == SalesData.id)
                .filter(
                    SalesData.pharmacy_id == pharmacy_id,
                    SalesEnrichment.product_type == "venta_libre",
                    SalesEnrichment.ml_category.isnot(None),
                    SalesData.sale_date >= sparkline_start,
                    SalesData.sale_date <= date_to
                )
                .group_by(
                    func.date_trunc('month', SalesData.sale_date),
                    SalesEnrichment.ml_category
                )
                .order_by(func.date_trunc('month', SalesData.sale_date))
            )

            # Aplicar filtro de empleados si existe
            if employee_ids:
                has_null = "__sin_empleado__" in employee_ids
                other_names = [e for e in employee_ids if e != "__sin_empleado__"]
                if has_null and other_names:
                    sparkline_query = sparkline_query.filter(
                        or_(SalesData.employee_name.in_(other_names), SalesData.employee_name.is_(None))
                    )
                elif has_null:
                    sparkline_query = sparkline_query.filter(SalesData.employee_name.is_(None))
                else:
                    sparkline_query = sparkline_query.filter(SalesData.employee_name.in_(other_names))

            # Organizar datos de sparkline por categoría
            sparkline_by_category: Dict[str, List[float]] = {}
            for row in sparkline_query.all():
                cat = row.ml_category
                if cat not in sparkline_by_category:
                    sparkline_by_category[cat] = []
                sparkline_by_category[cat].append(float(row.sales or 0))

            # Asignar sparkline data a cada categoría
            for cat_data in categories:
                cat_name = cat_data["category"]
                cat_data["sparkline_data"] = sparkline_by_category.get(cat_name, [])

            # Totales
            total_current = sum(c["current_sales"] for c in categories)
            total_previous = sum(c["previous_sales"] for c in categories)
            total_variation = ((total_current - total_previous) / total_previous * 100) if total_previous > 0 else 0

            logger.info(
                "ventalibre.yoy_comparison.success",
                pharmacy_id=str(pharmacy_id),
                categories_count=len(categories),
                total_variation=round(total_variation, 1)
            )

            return {
                "categories": categories,
                "total_current": round(total_current, 2),
                "total_previous": round(total_previous, 2),
                "total_variation_percent": round(total_variation, 1),
                "current_period": {"from": date_from.isoformat(), "to": date_to.isoformat()},
                "previous_period": {"from": prev_date_from.isoformat(), "to": prev_date_to.isoformat()}
            }

        except Exception as e:
            logger.error(
                "ventalibre.yoy_comparison.error",
                pharmacy_id=str(pharmacy_id),
                error=str(e)
            )
            return {
                "categories": [],
                "total_current": 0,
                "total_previous": 0,
                "total_variation_percent": 0
            }

    def get_top_contributors(
        self,
        db: Session,
        pharmacy_id: UUID,
        date_from: Optional[date] = None,
        date_to: Optional[date] = None,
        employee_ids: Optional[List[str]] = None,
        limit: int = 10,
        direction: str = "all"
    ) -> Dict:
        """
        Top productos que más contribuyen al crecimiento/decrecimiento YoY (Issue #491).

        Args:
            db: Sesión de base de datos
            pharmacy_id: ID de la farmacia
            date_from: Fecha inicio período actual
            date_to: Fecha fin período actual
            employee_ids: Lista de IDs de empleados
            limit: Número de productos a devolver
            direction: "up" (crecimiento), "down" (decrecimiento), "all" (ambos)

        Returns:
            Dict con top contributors:
            {
                "contributors": [
                    {
                        "product_name": "ISDIN FOTOPROTECTOR SPF50+",
                        "category": "proteccion_solar",
                        "current_sales": 2500.0,
                        "previous_sales": 1500.0,
                        "variation_euros": 1000.0,
                        "variation_percent": 66.7,
                        "impact_percent": 3.3
                    },
                    ...
                ],
                "total_change": 3000.0
            }
        """
        try:
            # Determinar rango de fechas
            if not date_to:
                date_to = date.today()
            if not date_from:
                date_from = date_to - timedelta(days=365)

            # Período anterior
            days_in_period = (date_to - date_from).days
            prev_date_to = date_from - timedelta(days=1)
            prev_date_from = prev_date_to - timedelta(days=days_in_period)

            def _get_product_sales(start_date: date, end_date: date) -> Dict[str, Dict]:
                """Helper para obtener ventas por producto en un período."""
                query = (
                    db.query(
                        SalesData.codigo_nacional,
                        SalesData.product_name,
                        SalesEnrichment.ml_category,
                        func.sum(SalesData.total_amount).label('sales'),
                        func.sum(SalesData.quantity).label('units')
                    )
                    .join(SalesEnrichment, SalesEnrichment.sales_data_id == SalesData.id)
                    .filter(
                        SalesData.pharmacy_id == pharmacy_id,
                        SalesEnrichment.product_type == "venta_libre",
                        SalesData.sale_date >= start_date,
                        SalesData.sale_date <= end_date
                    )
                )

                # Aplicar filtro de empleados
                if employee_ids:
                    has_null = "__sin_empleado__" in employee_ids
                    other_names = [e for e in employee_ids if e != "__sin_empleado__"]
                    if has_null and other_names:
                        query = query.filter(
                            or_(SalesData.employee_name.in_(other_names), SalesData.employee_name.is_(None))
                        )
                    elif has_null:
                        query = query.filter(SalesData.employee_name.is_(None))
                    else:
                        query = query.filter(SalesData.employee_name.in_(other_names))

                query = query.group_by(
                    SalesData.codigo_nacional,
                    SalesData.product_name,
                    SalesEnrichment.ml_category
                )

                return {
                    row.codigo_nacional: {
                        "name": row.product_name,
                        "category": row.ml_category,
                        "sales": float(row.sales or 0),
                        "units": int(row.units or 0)
                    }
                    for row in query.all()
                }

            # Obtener ventas de ambos períodos
            current_products = _get_product_sales(date_from, date_to)
            previous_products = _get_product_sales(prev_date_from, prev_date_to)

            # Combinar productos
            all_products = set(current_products.keys()) | set(previous_products.keys())

            # Calcular variaciones
            contributors = []
            total_change = 0

            for codigo in all_products:
                current = current_products.get(codigo, {"name": "Desconocido", "category": "otros", "sales": 0, "units": 0})
                previous = previous_products.get(codigo, {"sales": 0, "units": 0})

                current_sales = current["sales"]
                previous_sales = previous["sales"]
                variation_euros = current_sales - previous_sales

                # Filtrar por dirección
                if direction == "up" and variation_euros <= 0:
                    continue
                if direction == "down" and variation_euros >= 0:
                    continue

                variation_percent = ((current_sales - previous_sales) / previous_sales * 100) if previous_sales > 0 else (100 if current_sales > 0 else 0)

                contributors.append({
                    "product_name": current["name"],
                    "category": current["category"],
                    "current_sales": round(current_sales, 2),
                    "previous_sales": round(previous_sales, 2),
                    "variation_euros": round(variation_euros, 2),
                    "variation_percent": round(variation_percent, 1),
                    "variation_units": current.get("units", 0) - previous.get("units", 0)
                })
                total_change += variation_euros

            # Ordenar por variación absoluta y limitar
            contributors.sort(key=lambda x: abs(x["variation_euros"]), reverse=True)
            contributors = contributors[:limit]

            # Calcular impacto porcentual de cada producto
            if abs(total_change) > 0:
                for c in contributors:
                    c["impact_percent"] = round(abs(c["variation_euros"]) / abs(total_change) * 100, 1)
            else:
                for c in contributors:
                    c["impact_percent"] = 0

            logger.info(
                "ventalibre.top_contributors.success",
                pharmacy_id=str(pharmacy_id),
                contributors_count=len(contributors),
                total_change=round(total_change, 2)
            )

            return {
                "contributors": contributors,
                "total_change": round(total_change, 2),
                "direction": direction
            }

        except Exception as e:
            logger.error(
                "ventalibre.top_contributors.error",
                pharmacy_id=str(pharmacy_id),
                error=str(e)
            )
            return {
                "contributors": [],
                "total_change": 0,
                "direction": direction
            }


    # ========================================================================
    # Issue #493: Métodos para Tab "Categorías y Marcas"
    # ========================================================================

    def _calculate_hhi(self, brand_shares: List[float]) -> float:
        """
        Calcular Índice de Herfindahl-Hirschman (HHI).

        HHI = Σ(cuota_i × 100)²
        Donde cuota_i es decimal (0.25 para 25%)
        Rango: 0 (atomizado) a 10000 (monopolio)
        """
        if not brand_shares:
            return 0.0
        return sum((share * 100) ** 2 for share in brand_shares)

    def _interpret_hhi(self, hhi: float) -> Dict:
        """
        Interpretar HHI con mensajes contextualizados para farmacéuticos.
        """
        if hhi < 1500:
            return {
                "level": "low",
                "color": "success",
                "title": "Mercado atomizado",
                "message": "Categoría muy competitiva. Libertad para probar marcas nuevas."
            }
        elif hhi < 2500:
            return {
                "level": "medium",
                "color": "warning",
                "title": "Concentración moderada",
                "message": "Evalúa alternativas. Algunas marcas dominan."
            }
        else:
            return {
                "level": "high",
                "color": "danger",
                "title": "Oligopolio",
                "message": "Líderes fuertes. Cuidado al eliminar marca top."
            }

    def _get_margin_from_inventory(
        self,
        db: Session,
        pharmacy_id: UUID,
    ):
        """
        Crear subquery para obtener costes desde InventorySnapshot (Issue #538).

        Cuando SalesData.margin_percentage es NULL, calculamos el margen desde
        el coste de inventario usando _calculate_margin_expression().

        Usa el snapshot más reciente por producto (ROW_NUMBER window function).
        El índice idx_inventory_pharmacy_date optimiza esta query.

        Returns:
            Subquery con product_code, unit_cost, unit_price del snapshot más reciente.
        """
        # Subquery para obtener el snapshot más reciente por product_code
        latest_snapshot = (
            db.query(
                InventorySnapshot.product_code,
                InventorySnapshot.unit_cost,
                InventorySnapshot.unit_price,
                func.row_number().over(
                    partition_by=InventorySnapshot.product_code,
                    order_by=InventorySnapshot.snapshot_date.desc()
                ).label("rn")
            )
            .filter(
                InventorySnapshot.pharmacy_id == pharmacy_id,
                InventorySnapshot.unit_cost.isnot(None),
                InventorySnapshot.unit_cost > 0,
            )
            .subquery()
        )

        # Filtrar solo el más reciente (rn = 1)
        inventory_costs = (
            db.query(
                latest_snapshot.c.product_code,
                latest_snapshot.c.unit_cost,
                latest_snapshot.c.unit_price,
            )
            .filter(latest_snapshot.c.rn == 1)
            .subquery()
        )

        return inventory_costs

    def _calculate_margin_expression(self, inventory_subquery):
        """
        Crear expresión SQL para calcular margen desde inventario.

        Formula: ((unit_price - unit_cost) / unit_price) * 100
        Con protección contra división por cero.
        """
        return case(
            (
                and_(
                    inventory_subquery.c.unit_price.isnot(None),
                    inventory_subquery.c.unit_price > 0,
                    inventory_subquery.c.unit_cost.isnot(None),
                ),
                (
                    (inventory_subquery.c.unit_price - inventory_subquery.c.unit_cost)
                    / inventory_subquery.c.unit_price
                ) * 100
            ),
            else_=None
        )

    def get_brands_by_necesidad(
        self,
        db: Session,
        pharmacy_id: UUID,
        necesidad: str,
        date_from: Optional[date] = None,
        date_to: Optional[date] = None,
        employee_name: Optional[str] = None,
    ) -> Dict:
        """
        Obtener marcas por categoría NECESIDAD con índice HHI.

        Args:
            db: Sesión de base de datos
            pharmacy_id: ID de la farmacia
            necesidad: Categoría NECESIDAD a analizar
            date_from: Fecha inicio (opcional)
            date_to: Fecha fin (opcional)
            employee_name: Filtrar por empleado (opcional)

        Returns:
            Dict con brands, total_sales, hhi, hhi_interpretation, coverage_percent
        """
        try:
            # Expandir categoría principal a específicas
            categories = _expand_to_specific_categories(necesidad)

            # Filtros base
            filters = [
                SalesData.pharmacy_id == pharmacy_id,
                SalesEnrichment.product_type == "venta_libre",
                SalesEnrichment.ml_category.in_(categories),
                SalesEnrichment.detected_brand.isnot(None),
                SalesEnrichment.detected_brand != "",
            ]

            if date_from:
                filters.append(SalesData.sale_date >= date_from)
            if date_to:
                filters.append(SalesData.sale_date <= date_to)
            if employee_name:
                filters.append(SalesData.employee_name == employee_name)

            # Issue #538: Obtener costes desde inventario para calcular margen
            inventory_costs = self._get_margin_from_inventory(db, pharmacy_id)
            inventory_margin = self._calculate_margin_expression(inventory_costs)

            # Query: Ventas por marca con margen desde inventario si no está en ventas
            query = (
                db.query(
                    SalesEnrichment.detected_brand.label("brand"),
                    func.sum(SalesData.total_amount).label("sales"),
                    func.sum(SalesData.quantity).label("units"),
                    func.avg(
                        func.coalesce(SalesData.margin_percentage, inventory_margin)
                    ).label("avg_margin"),
                    func.count(distinct(SalesData.id)).label("transactions"),
                )
                .join(SalesData, SalesEnrichment.sales_data_id == SalesData.id)
                .outerjoin(
                    inventory_costs,
                    SalesData.codigo_nacional == inventory_costs.c.product_code
                )
                .filter(and_(*filters))
                .group_by(SalesEnrichment.detected_brand)
                .order_by(func.sum(SalesData.total_amount).desc())
            )

            results = query.all()

            if not results:
                return {
                    "brands": [],
                    "total_sales": 0,
                    "hhi": 0,
                    "hhi_interpretation": self._interpret_hhi(0),
                    "coverage_percent": 0,
                    "brand_duel_available": False,
                    "message": "No hay datos de marcas para esta categoría"
                }

            # Calcular totales (convert to float to avoid Decimal/float mixing)
            total_sales = float(sum(r.sales or 0 for r in results))
            total_units = sum(r.units or 0 for r in results)

            # Calcular cuotas y preparar datos
            brands = []
            brand_shares = []
            for r in results:
                sales = float(r.sales or 0)
                share = sales / total_sales if total_sales > 0 else 0
                brand_shares.append(share)

                brands.append({
                    "brand": r.brand,
                    "sales": round(sales, 2),
                    "units": int(r.units or 0),
                    "share": round(share * 100, 2),
                    "avg_margin": round(float(r.avg_margin or 0), 1),
                    "transactions": int(r.transactions or 0),
                })

            # Calcular HHI
            hhi = self._calculate_hhi(brand_shares)

            # Cobertura: productos con marca vs total
            # IMPORTANT: Always join SalesData to filter by pharmacy_id (avoid cartesian product)
            total_vl_query = (
                db.query(func.count(distinct(SalesEnrichment.id)))
                .join(SalesData, SalesEnrichment.sales_data_id == SalesData.id)
                .filter(
                    SalesData.pharmacy_id == pharmacy_id,
                    SalesEnrichment.product_type == "venta_libre",
                    SalesEnrichment.ml_category.in_(categories),
                )
            )
            if date_from:
                total_vl_query = total_vl_query.filter(SalesData.sale_date >= date_from)
            if date_to:
                total_vl_query = total_vl_query.filter(SalesData.sale_date <= date_to)

            total_vl = total_vl_query.scalar() or 0
            with_brand = len(results)
            coverage = (with_brand / total_vl * 100) if total_vl > 0 else 0

            logger.info(
                "ventalibre.brands_by_necesidad.success",
                pharmacy_id=str(pharmacy_id),
                necesidad=necesidad,
                brands_count=len(brands),
                hhi=round(hhi, 1),
                coverage=round(coverage, 1),
            )

            return {
                "brands": brands,
                "total_sales": round(total_sales, 2),
                "total_units": total_units,
                "hhi": round(hhi, 1),
                "hhi_interpretation": self._interpret_hhi(hhi),
                "coverage_percent": round(coverage, 1),
                "brand_duel_available": len(brands) >= 2,
            }

        except Exception as e:
            logger.error(
                "ventalibre.brands_by_necesidad.error",
                pharmacy_id=str(pharmacy_id),
                necesidad=necesidad,
                error=str(e),
            )
            return {
                "brands": [],
                "total_sales": 0,
                "hhi": 0,
                "hhi_interpretation": self._interpret_hhi(0),
                "coverage_percent": 0,
                "brand_duel_available": False,
                "error": str(e),
            }

    def get_brand_market_share_evolution(
        self,
        db: Session,
        pharmacy_id: UUID,
        necesidad: str,
        date_from: Optional[date] = None,
        date_to: Optional[date] = None,
        top_n: int = 5,
    ) -> Dict:
        """
        Evolución temporal de cuota de mercado por marca.

        Retorna serie temporal mensual con % de cada marca para gráfico de áreas apiladas 100%.
        """
        try:
            categories = _expand_to_specific_categories(necesidad)

            # Default: últimos 12 meses
            if not date_to:
                date_to = date.today()
            if not date_from:
                date_from = date_to - timedelta(days=365)

            filters = [
                SalesData.pharmacy_id == pharmacy_id,
                SalesEnrichment.product_type == "venta_libre",
                SalesEnrichment.ml_category.in_(categories),
                SalesEnrichment.detected_brand.isnot(None),
                SalesData.sale_date >= date_from,
                SalesData.sale_date <= date_to,
            ]

            # Ventas por mes y marca
            query = (
                db.query(
                    func.to_char(SalesData.sale_date, 'YYYY-MM').label("month"),
                    SalesEnrichment.detected_brand.label("brand"),
                    func.sum(SalesData.total_amount).label("sales"),
                )
                .join(SalesData, SalesEnrichment.sales_data_id == SalesData.id)
                .filter(and_(*filters))
                .group_by(
                    func.to_char(SalesData.sale_date, 'YYYY-MM'),
                    SalesEnrichment.detected_brand
                )
                .order_by(func.to_char(SalesData.sale_date, 'YYYY-MM'))
            )

            results = query.all()

            if not results:
                return {"time_series": [], "top_brands": [], "message": "No hay datos"}

            # Identificar top N marcas por ventas totales
            brand_totals: Dict[str, float] = {}
            for r in results:
                brand_totals[r.brand] = brand_totals.get(r.brand, 0) + float(r.sales or 0)

            sorted_brands = sorted(brand_totals.items(), key=lambda x: x[1], reverse=True)
            top_brands = [b[0] for b in sorted_brands[:top_n]]

            # Construir serie temporal
            monthly_data: Dict[str, Dict[str, float]] = {}
            for r in results:
                month = r.month
                brand = r.brand if r.brand in top_brands else "Otras"
                sales = float(r.sales or 0)

                if month not in monthly_data:
                    monthly_data[month] = {}
                monthly_data[month][brand] = monthly_data[month].get(brand, 0) + sales

            # Convertir a porcentajes
            time_series = []
            for month in sorted(monthly_data.keys()):
                month_total = sum(monthly_data[month].values())
                entry = {"month": month}
                for brand in top_brands + ["Otras"]:
                    sales = monthly_data[month].get(brand, 0)
                    entry[brand] = round(sales / month_total * 100, 1) if month_total > 0 else 0
                time_series.append(entry)

            return {
                "time_series": time_series,
                "top_brands": top_brands,
            }

        except Exception as e:
            logger.error(
                "ventalibre.market_share_evolution.error",
                pharmacy_id=str(pharmacy_id),
                error=str(e),
            )
            return {"time_series": [], "top_brands": [], "error": str(e)}

    def get_brand_value_quadrant(
        self,
        db: Session,
        pharmacy_id: UUID,
        necesidad: str,
        date_from: Optional[date] = None,
        date_to: Optional[date] = None,
    ) -> Dict:
        """
        Datos para scatter plot Margen(%) vs Volumen(€) - Cuadrante de Valor.

        Thresholds dinámicos basados en medianas de la categoría.
        """
        try:
            categories = _expand_to_specific_categories(necesidad)

            filters = [
                SalesData.pharmacy_id == pharmacy_id,
                SalesEnrichment.product_type == "venta_libre",
                SalesEnrichment.ml_category.in_(categories),
                SalesEnrichment.detected_brand.isnot(None),
            ]

            if date_from:
                filters.append(SalesData.sale_date >= date_from)
            if date_to:
                filters.append(SalesData.sale_date <= date_to)

            # Issue #538: Obtener costes desde inventario para calcular margen
            inventory_costs = self._get_margin_from_inventory(db, pharmacy_id)
            inventory_margin = self._calculate_margin_expression(inventory_costs)

            # Query: Margen y volumen por marca (con margen desde inventario si no está en ventas)
            query = (
                db.query(
                    SalesEnrichment.detected_brand.label("brand"),
                    func.sum(SalesData.total_amount).label("sales"),
                    func.avg(
                        func.coalesce(SalesData.margin_percentage, inventory_margin)
                    ).label("margin_pct"),
                )
                .join(SalesData, SalesEnrichment.sales_data_id == SalesData.id)
                .outerjoin(
                    inventory_costs,
                    SalesData.codigo_nacional == inventory_costs.c.product_code
                )
                .filter(and_(*filters))
                .group_by(SalesEnrichment.detected_brand)
            )

            results = query.all()

            if not results:
                return {"brands": [], "thresholds": {}, "message": "No hay datos"}

            # Preparar datos
            brands_data = []
            sales_values = []
            margin_values = []

            for r in results:
                sales = float(r.sales or 0)
                margin = float(r.margin_pct or 0)
                sales_values.append(sales)
                margin_values.append(margin)
                brands_data.append({
                    "brand": r.brand,
                    "sales": round(sales, 2),
                    "margin_pct": round(margin, 1),
                })

            # Calcular medianas (thresholds dinámicos)
            sales_values.sort()
            margin_values.sort()
            n = len(sales_values)
            median_sales = sales_values[n // 2] if n > 0 else 0
            median_margin = margin_values[n // 2] if n > 0 else 0

            # Asignar cuadrantes
            for brand in brands_data:
                if brand["sales"] >= median_sales and brand["margin_pct"] >= median_margin:
                    brand["quadrant"] = "star"
                    brand["quadrant_label"] = "Estrellas"
                elif brand["sales"] >= median_sales and brand["margin_pct"] < median_margin:
                    brand["quadrant"] = "traffic"
                    brand["quadrant_label"] = "Generadores de Tráfico"
                elif brand["sales"] < median_sales and brand["margin_pct"] >= median_margin:
                    brand["quadrant"] = "opportunity"
                    brand["quadrant_label"] = "Oportunidades"
                else:
                    brand["quadrant"] = "review"
                    brand["quadrant_label"] = "Revisar"

            return {
                "brands": brands_data,
                "thresholds": {
                    "median_sales": round(median_sales, 2),
                    "median_margin": round(median_margin, 1),
                },
            }

        except Exception as e:
            logger.error(
                "ventalibre.value_quadrant.error",
                pharmacy_id=str(pharmacy_id),
                error=str(e),
            )
            return {"brands": [], "thresholds": {}, "error": str(e)}

    def get_brand_duel(
        self,
        db: Session,
        pharmacy_id: UUID,
        brand_a: str,
        brand_b: str,
        necesidad: str,
        date_from: Optional[date] = None,
        date_to: Optional[date] = None,
    ) -> Dict:
        """
        Comparación lado a lado de 2 marcas (Brand Duel Mode).

        Métricas: Cuota, Margen Medio, Ticket Medio, Unidades, GMROI aproximado.
        """
        try:
            categories = _expand_to_specific_categories(necesidad)

            # Issue #538: Obtener costes desde inventario para calcular margen
            inventory_costs = self._get_margin_from_inventory(db, pharmacy_id)
            inventory_margin = self._calculate_margin_expression(inventory_costs)

            def get_brand_metrics(brand: str) -> Dict:
                filters = [
                    SalesData.pharmacy_id == pharmacy_id,
                    SalesEnrichment.product_type == "venta_libre",
                    SalesEnrichment.ml_category.in_(categories),
                    SalesEnrichment.detected_brand == brand,
                ]
                if date_from:
                    filters.append(SalesData.sale_date >= date_from)
                if date_to:
                    filters.append(SalesData.sale_date <= date_to)

                # Margen con fallback a inventario
                margin_expr = func.coalesce(SalesData.margin_percentage, inventory_margin)

                result = (
                    db.query(
                        func.sum(SalesData.total_amount).label("sales"),
                        func.sum(SalesData.quantity).label("units"),
                        func.avg(margin_expr).label("margin_pct"),
                        func.avg(SalesData.total_amount).label("avg_ticket"),
                        # total_margin: per-row COALESCE para manejar datos mixtos
                        func.sum(
                            func.coalesce(
                                SalesData.margin_amount,
                                SalesData.total_amount * margin_expr / 100
                            )
                        ).label("total_margin"),
                        func.count(distinct(SalesData.id)).label("transactions"),
                    )
                    .select_from(SalesEnrichment)
                    .join(SalesData, SalesEnrichment.sales_data_id == SalesData.id)
                    .outerjoin(
                        inventory_costs,
                        SalesData.codigo_nacional == inventory_costs.c.product_code
                    )
                    .filter(and_(*filters))
                    .first()
                )

                return {
                    "brand": brand,
                    "sales": round(float(result.sales or 0), 2),
                    "units": int(result.units or 0),
                    "margin_pct": round(float(result.margin_pct or 0), 1),
                    "avg_ticket": round(float(result.avg_ticket or 0), 2),
                    "total_margin": round(float(result.total_margin or 0), 2),
                    "transactions": int(result.transactions or 0),
                }

            # Obtener métricas de ambas marcas
            metrics_a = get_brand_metrics(brand_a)
            metrics_b = get_brand_metrics(brand_b)

            # Calcular total de categoría para cuota
            total_filters = [
                SalesData.pharmacy_id == pharmacy_id,
                SalesEnrichment.product_type == "venta_libre",
                SalesEnrichment.ml_category.in_(categories),
            ]
            if date_from:
                total_filters.append(SalesData.sale_date >= date_from)
            if date_to:
                total_filters.append(SalesData.sale_date <= date_to)

            total_sales = float(
                db.query(func.sum(SalesData.total_amount))
                .join(SalesEnrichment, SalesEnrichment.sales_data_id == SalesData.id)
                .filter(and_(*total_filters))
                .scalar() or 0
            )

            # Agregar cuota a cada marca
            metrics_a["share"] = round(metrics_a["sales"] / total_sales * 100, 1) if total_sales > 0 else 0
            metrics_b["share"] = round(metrics_b["sales"] / total_sales * 100, 1) if total_sales > 0 else 0

            # Determinar ganadores por métrica
            # NOTA: Para Rotación, menor es mejor (inverso) - pero no tenemos datos de inventario
            # aquí, así que lo omitimos por ahora
            winners = {}
            metrics_to_compare = [
                ("share", "higher"),
                ("margin_pct", "higher"),
                ("avg_ticket", "higher"),
                ("units", "higher"),
                ("total_margin", "higher"),
            ]

            for metric, direction in metrics_to_compare:
                val_a = metrics_a.get(metric, 0)
                val_b = metrics_b.get(metric, 0)
                if direction == "higher":
                    winners[metric] = "a" if val_a > val_b else ("b" if val_b > val_a else "tie")
                else:  # lower is better
                    winners[metric] = "a" if val_a < val_b else ("b" if val_b < val_a else "tie")

            return {
                "brand_a": metrics_a,
                "brand_b": metrics_b,
                "winners": winners,
                "total_category_sales": round(float(total_sales), 2),
            }

        except Exception as e:
            logger.error(
                "ventalibre.brand_duel.error",
                pharmacy_id=str(pharmacy_id),
                brand_a=brand_a,
                brand_b=brand_b,
                error=str(e),
            )
            return {
                "brand_a": {"brand": brand_a},
                "brand_b": {"brand": brand_b},
                "winners": {},
                "error": str(e),
            }

    def get_price_distribution(
        self,
        db: Session,
        pharmacy_id: UUID,
        necesidad: str,
        date_from: Optional[date] = None,
        date_to: Optional[date] = None,
        top_n: int = 10,
    ) -> Dict:
        """
        Distribución de precios por marca para boxplot (detectar canibalización).

        Retorna estadísticas de precio por marca: min, q1, median, q3, max.
        """
        try:
            categories = _expand_to_specific_categories(necesidad)

            filters = [
                SalesData.pharmacy_id == pharmacy_id,
                SalesEnrichment.product_type == "venta_libre",
                SalesEnrichment.ml_category.in_(categories),
                SalesEnrichment.detected_brand.isnot(None),
                SalesData.sale_price > 0,
            ]

            if date_from:
                filters.append(SalesData.sale_date >= date_from)
            if date_to:
                filters.append(SalesData.sale_date <= date_to)

            # Obtener top N marcas por ventas
            top_brands_query = (
                db.query(
                    SalesEnrichment.detected_brand,
                    func.sum(SalesData.total_amount).label("total_sales"),
                )
                .select_from(SalesEnrichment)
                .join(SalesData, SalesEnrichment.sales_data_id == SalesData.id)
                .filter(and_(*filters))
                .group_by(SalesEnrichment.detected_brand)
                .order_by(func.sum(SalesData.total_amount).desc())
                .limit(top_n)
            )

            top_brands = [r[0] for r in top_brands_query.all()]

            if not top_brands:
                return {"brands": [], "message": "No hay datos de marcas"}

            # Obtener precios por marca
            brands_data = []

            for brand in top_brands:
                price_query = (
                    db.query(
                        SalesData.sale_price,
                        SalesData.codigo_nacional,
                        SalesData.product_name,
                    )
                    .select_from(SalesData)
                    .join(SalesEnrichment, SalesEnrichment.sales_data_id == SalesData.id)
                    .filter(
                        and_(
                            *filters,
                            SalesEnrichment.detected_brand == brand,
                        )
                    )
                    .distinct()
                )

                prices_results = price_query.all()
                prices = [float(r.sale_price) for r in prices_results if r.sale_price]

                if not prices:
                    continue

                prices.sort()
                n = len(prices)

                # Calcular estadísticas
                q1_idx = n // 4
                q3_idx = (3 * n) // 4

                brand_stats = {
                    "brand": brand,
                    "count": n,
                    "min": round(prices[0], 2),
                    "q1": round(prices[q1_idx], 2),
                    "median": round(prices[n // 2], 2),
                    "q3": round(prices[q3_idx], 2),
                    "max": round(prices[-1], 2),
                    "iqr": round(prices[q3_idx] - prices[q1_idx], 2),
                    # Outliers para hover (productos fuera de 1.5*IQR)
                    "sample_products": [
                        {"code": r.codigo_nacional, "name": r.product_name[:50], "price": float(r.sale_price)}
                        for r in prices_results[:5]  # Muestra de 5 productos
                    ],
                }

                brands_data.append(brand_stats)

            return {"brands": brands_data}

        except Exception as e:
            logger.error(
                "ventalibre.price_distribution.error",
                pharmacy_id=str(pharmacy_id),
                error=str(e),
            )
            return {"brands": [], "error": str(e)}

    # =========================================================================
    # Issue #505: L2 Subcategories Methods
    # =========================================================================

    def get_sales_by_l2(
        self,
        db: Session,
        pharmacy_id: UUID,
        l1_category: str,
        date_from: Optional[date] = None,
        date_to: Optional[date] = None,
        employee_ids: Optional[List[str]] = None,
    ) -> Dict:
        """
        Agregar ventas por ml_subcategory_l2 para una categoría L1.

        Args:
            db: Sesión de base de datos
            pharmacy_id: ID de la farmacia
            l1_category: Categoría L1 (dermocosmetica, suplementos, higiene_bucal)
            date_from: Fecha inicio del rango
            date_to: Fecha fin del rango
            employee_ids: Lista de IDs de empleados

        Returns:
            Dict con nodes para treemap L2
        """
        from app.schemas.symptom_taxonomy import (
            L2_ARCHETYPES,
            L2_DISPLAY_NAMES,
        )

        try:
            # Query base: ventas de venta_libre con ml_category y ml_subcategory_l2
            query = (
                db.query(
                    ProductCatalogVentaLibre.ml_subcategory_l2,
                    func.sum(SalesData.total_amount).label("total_sales"),
                    func.count(func.distinct(SalesData.codigo_nacional)).label("product_count"),
                    func.sum(SalesData.quantity).label("total_units"),
                )
                .join(SalesEnrichment, SalesEnrichment.venta_libre_product_id == ProductCatalogVentaLibre.id)
                .join(SalesData, SalesEnrichment.sales_data_id == SalesData.id)
                .filter(
                    SalesData.pharmacy_id == pharmacy_id,
                    SalesEnrichment.product_type == "venta_libre",
                    ProductCatalogVentaLibre.ml_category == l1_category,
                    ProductCatalogVentaLibre.ml_subcategory_l2.isnot(None),
                )
            )

            # Aplicar filtros de fecha
            if date_from:
                query = query.filter(SalesData.sale_date >= date_from)
            if date_to:
                query = query.filter(SalesData.sale_date <= date_to)

            # Aplicar filtro de empleados
            if employee_ids:
                has_null = "__sin_empleado__" in employee_ids
                other_names = [e for e in employee_ids if e != "__sin_empleado__"]

                if has_null and other_names:
                    query = query.filter(
                        or_(
                            SalesData.employee_name.in_(other_names),
                            SalesData.employee_name.is_(None),
                        )
                    )
                elif has_null:
                    query = query.filter(SalesData.employee_name.is_(None))
                elif other_names:
                    query = query.filter(SalesData.employee_name.in_(other_names))

            # Agrupar por L2
            query = query.group_by(ProductCatalogVentaLibre.ml_subcategory_l2)

            results = query.all()

            # Calcular totales
            total_sales = sum(float(r.total_sales or 0) for r in results)
            total_products = sum(r.product_count for r in results)

            nodes = []
            for r in results:
                if not r.ml_subcategory_l2:
                    continue

                sales = float(r.total_sales or 0)
                percentage = (sales / total_sales * 100) if total_sales > 0 else 0

                nodes.append({
                    "category": r.ml_subcategory_l2,
                    "display_name": L2_DISPLAY_NAMES.get(
                        r.ml_subcategory_l2,
                        r.ml_subcategory_l2.replace("_", " ").title()
                    ),
                    "archetype": L2_ARCHETYPES.get(r.ml_subcategory_l2, ""),
                    "sales": round(sales, 2),
                    "count": r.product_count,
                    "units": r.total_units or 0,
                    "percentage": round(percentage, 1),
                })

            # Ordenar por ventas descendente
            nodes.sort(key=lambda x: x["sales"], reverse=True)

            logger.info(
                "ventalibre.sales_by_l2.success",
                pharmacy_id=str(pharmacy_id),
                l1_category=l1_category,
                l2_categories=len(nodes),
                total_sales=total_sales,
            )

            return {
                "nodes": nodes,
                "total_sales": round(total_sales, 2),
                "total_products": total_products,
                "l1_category": l1_category,
            }

        except Exception as e:
            logger.error(
                "ventalibre.sales_by_l2.error",
                pharmacy_id=str(pharmacy_id),
                l1_category=l1_category,
                error=str(e),
            )
            return {
                "nodes": [],
                "total_sales": 0,
                "total_products": 0,
                "l1_category": l1_category,
                "error": str(e),
            }

    def get_l2_value_quadrant(
        self,
        db: Session,
        pharmacy_id: UUID,
        l1_category: str,
        date_from: Optional[date] = None,
        date_to: Optional[date] = None,
    ) -> Dict:
        """
        Datos para scatter plot Margen(%) vs Volumen(€) por subcategoría L2.

        Cuadrante de Valor L2: Posiciona cada subcategoría L2 según su
        volumen de ventas y margen promedio. Thresholds dinámicos (medianas).

        Args:
            db: Sesión de base de datos
            pharmacy_id: ID de la farmacia
            l1_category: Categoría L1 (dermocosmetica, suplementos, higiene_bucal)
            date_from: Fecha inicio del rango
            date_to: Fecha fin del rango

        Returns:
            Dict con subcategorías L2 posicionadas en cuadrantes:
            {
                "subcategories": [
                    {"l2": "solar_facial", "display_name": "Solar Facial",
                     "sales": 5000, "margin_pct": 35.5, "quadrant": "star"},
                    ...
                ],
                "thresholds": {"median_sales": 3000, "median_margin": 30.0},
                "l1_category": "dermocosmetica"
            }
        """
        from app.schemas.symptom_taxonomy import (
            L2_ARCHETYPES,
            L2_DISPLAY_NAMES,
        )

        try:
            # Query: Margen y volumen por subcategoría L2
            filters = [
                SalesData.pharmacy_id == pharmacy_id,
                SalesEnrichment.product_type == "venta_libre",
                ProductCatalogVentaLibre.ml_category == l1_category,
                ProductCatalogVentaLibre.ml_subcategory_l2.isnot(None),
            ]

            if date_from:
                filters.append(SalesData.sale_date >= date_from)
            if date_to:
                filters.append(SalesData.sale_date <= date_to)

            query = (
                db.query(
                    ProductCatalogVentaLibre.ml_subcategory_l2.label("l2"),
                    func.sum(SalesData.total_amount).label("sales"),
                    func.avg(SalesData.margin_percentage).label("margin_pct"),
                    func.count(func.distinct(SalesData.id)).label("transactions"),
                )
                .join(SalesEnrichment, SalesEnrichment.venta_libre_product_id == ProductCatalogVentaLibre.id)
                .join(SalesData, SalesEnrichment.sales_data_id == SalesData.id)
                .filter(and_(*filters))
                .group_by(ProductCatalogVentaLibre.ml_subcategory_l2)
            )

            results = query.all()

            if not results:
                return {
                    "subcategories": [],
                    "thresholds": {},
                    "l1_category": l1_category,
                    "message": "No hay datos L2 para esta categoría",
                }

            # Preparar datos
            subcategories_data = []
            sales_values = []
            margin_values = []

            for r in results:
                if not r.l2:
                    continue

                sales = float(r.sales or 0)
                margin = float(r.margin_pct or 0)
                sales_values.append(sales)
                margin_values.append(margin)

                subcategories_data.append({
                    "l2": r.l2,
                    "display_name": L2_DISPLAY_NAMES.get(r.l2, r.l2.replace("_", " ").title()),
                    "archetype": L2_ARCHETYPES.get(r.l2, ""),
                    "sales": round(sales, 2),
                    "margin_pct": round(margin, 1),
                    "transactions": r.transactions or 0,
                })

            if not subcategories_data:
                return {
                    "subcategories": [],
                    "thresholds": {},
                    "l1_category": l1_category,
                    "message": "No hay subcategorías L2 con datos",
                }

            # Calcular medianas (thresholds dinámicos)
            sales_values.sort()
            margin_values.sort()
            n = len(sales_values)
            median_sales = sales_values[n // 2] if n > 0 else 0
            median_margin = margin_values[n // 2] if n > 0 else 0

            # Asignar cuadrantes
            for subcat in subcategories_data:
                if subcat["sales"] >= median_sales and subcat["margin_pct"] >= median_margin:
                    subcat["quadrant"] = "star"
                    subcat["quadrant_label"] = "Estrellas"
                elif subcat["sales"] >= median_sales and subcat["margin_pct"] < median_margin:
                    subcat["quadrant"] = "traffic"
                    subcat["quadrant_label"] = "Generadores de Tráfico"
                elif subcat["sales"] < median_sales and subcat["margin_pct"] >= median_margin:
                    subcat["quadrant"] = "opportunity"
                    subcat["quadrant_label"] = "Oportunidades"
                else:
                    subcat["quadrant"] = "review"
                    subcat["quadrant_label"] = "Revisar"

            logger.info(
                "ventalibre.l2_value_quadrant.success",
                pharmacy_id=str(pharmacy_id),
                l1_category=l1_category,
                subcategories=len(subcategories_data),
            )

            return {
                "subcategories": subcategories_data,
                "thresholds": {
                    "median_sales": round(median_sales, 2),
                    "median_margin": round(median_margin, 1),
                },
                "l1_category": l1_category,
            }

        except Exception as e:
            logger.error(
                "ventalibre.l2_value_quadrant.error",
                pharmacy_id=str(pharmacy_id),
                l1_category=l1_category,
                error=str(e),
            )
            return {
                "subcategories": [],
                "thresholds": {},
                "l1_category": l1_category,
                "error": str(e),
            }

    def get_l2_coverage(self, db: Session) -> Dict:
        """
        Obtener estadísticas de cobertura L2 por categoría L1.

        NOTA: Esta es cobertura a nivel de CATÁLOGO del sistema,
        no específica de farmacia. Mide qué porcentaje de productos
        en ProductCatalogVentaLibre tienen L2 clasificado.

        Args:
            db: Sesión de base de datos

        Returns:
            Dict con cobertura L2 por categoría y overall
        """
        from app.schemas.symptom_taxonomy import L1_WITH_L2

        try:
            categories = []
            total_products = 0
            total_with_l2 = 0

            for l1_cat in L1_WITH_L2:
                # Contar total de productos en esta L1
                total_query = (
                    db.query(func.count(ProductCatalogVentaLibre.id))
                    .filter(
                        ProductCatalogVentaLibre.ml_category == l1_cat,
                        ProductCatalogVentaLibre.is_active == True,
                    )
                    .scalar()
                )

                # Contar productos con L2
                with_l2_query = (
                    db.query(func.count(ProductCatalogVentaLibre.id))
                    .filter(
                        ProductCatalogVentaLibre.ml_category == l1_cat,
                        ProductCatalogVentaLibre.is_active == True,
                        ProductCatalogVentaLibre.ml_subcategory_l2.isnot(None),
                    )
                    .scalar()
                )

                cat_total = total_query or 0
                cat_with_l2 = with_l2_query or 0

                total_products += cat_total
                total_with_l2 += cat_with_l2

                coverage_pct = (cat_with_l2 / cat_total * 100) if cat_total > 0 else 0

                categories.append({
                    "l1_category": l1_cat,
                    "total_products": cat_total,
                    "with_l2": cat_with_l2,
                    "coverage_percent": round(coverage_pct, 1),
                })

            overall_pct = (total_with_l2 / total_products * 100) if total_products > 0 else 0

            logger.info(
                "ventalibre.l2_coverage.success",
                total_products=total_products,
                total_with_l2=total_with_l2,
                overall_coverage=round(overall_pct, 1),
            )

            return {
                "categories": categories,
                "overall": {
                    "total_products": total_products,
                    "with_l2": total_with_l2,
                    "coverage_percent": round(overall_pct, 1),
                },
            }

        except Exception as e:
            logger.error(
                "ventalibre.l2_coverage.error",
                error=str(e),
            )
            return {
                "categories": [],
                "overall": {
                    "total_products": 0,
                    "with_l2": 0,
                    "coverage_percent": 0,
                },
                "error": str(e),
            }

    # ========================================================================
    # Issue #539: HHI Matrix - Scatter Plot HHI × Margen por Categoría
    # ========================================================================

    def get_hhi_matrix(
        self,
        db: Session,
        pharmacy_id: UUID,
        date_from: Optional[date] = None,
        date_to: Optional[date] = None,
    ) -> Dict:
        """
        Matriz HHI × Margen para scatter plot por categoría (Issue #539).

        Calcula HHI y margen promedio para TODAS las categorías con ventas,
        permitiendo visualizar la estrategia de la farmacia en un scatter plot.

        Diagnósticos:
        - 🎯 Especialista Exitoso: HHI alto + Margen alto
        - ⚠️ Riesgo de Dependencia: HHI alto + Margen bajo
        - ⭐ Generalista Premium: HHI bajo + Margen alto
        - 📊 Generalista de Volumen: HHI bajo + Margen bajo
        - ⚖️ Surtido Balanceado: Zona intermedia

        Args:
            db: Sesión de base de datos
            pharmacy_id: ID de la farmacia
            date_from: Fecha inicio (opcional, default: últimos 12 meses)
            date_to: Fecha fin (opcional, default: hoy)

        Returns:
            Dict con:
            - categories: Lista de categorías con hhi, avg_margin, sales, top_brands
            - thresholds: Umbrales para cuadrantes (hhi: 1500/2500, margin: 15/25)
            - summary: Estadísticas globales
        """
        try:
            # Defaults de fecha
            if not date_to:
                date_to = date.today()
            if not date_from:
                date_from = date_to - timedelta(days=365)

            # Obtener costes desde inventario para calcular margen
            inventory_costs = self._get_margin_from_inventory(db, pharmacy_id)
            inventory_margin = self._calculate_margin_expression(inventory_costs)

            # Query 1: Ventas por marca y categoría (para calcular HHI por categoría)
            brand_query = (
                db.query(
                    SalesEnrichment.ml_category,
                    SalesEnrichment.detected_brand,
                    func.sum(SalesData.total_amount).label("brand_sales"),
                    func.avg(
                        func.coalesce(SalesData.margin_percentage, inventory_margin)
                    ).label("brand_margin"),
                )
                .join(SalesData, SalesEnrichment.sales_data_id == SalesData.id)
                .outerjoin(
                    inventory_costs,
                    SalesData.codigo_nacional == inventory_costs.c.product_code
                )
                .filter(
                    SalesData.pharmacy_id == pharmacy_id,
                    SalesEnrichment.product_type == "venta_libre",
                    SalesEnrichment.ml_category.isnot(None),
                    SalesEnrichment.ml_category != "",
                    SalesEnrichment.detected_brand.isnot(None),
                    SalesEnrichment.detected_brand != "",
                    SalesData.sale_date >= date_from,
                    SalesData.sale_date <= date_to,
                )
                .group_by(SalesEnrichment.ml_category, SalesEnrichment.detected_brand)
            )

            brand_results = brand_query.all()

            if not brand_results:
                return {
                    "categories": [],
                    "thresholds": {
                        "hhi_low": 1500,
                        "hhi_high": 2500,
                        "margin_low": 15.0,
                        "margin_high": 25.0,
                    },
                    "summary": {
                        "total_categories": 0,
                        "avg_hhi": 0,
                        "avg_margin": 0,
                    },
                    "message": "No hay datos de ventas con marca detectada",
                }

            # Agrupar por categoría para calcular HHI
            category_data = {}
            for r in brand_results:
                cat = r.ml_category
                if cat not in category_data:
                    category_data[cat] = {
                        "brands": [],
                        "total_sales": 0,
                        "margins": [],
                    }
                brand_sales = float(r.brand_sales or 0)
                brand_margin = float(r.brand_margin or 0)

                category_data[cat]["brands"].append({
                    "brand": r.detected_brand,
                    "sales": brand_sales,
                    "margin": brand_margin,
                })
                category_data[cat]["total_sales"] += brand_sales
                if brand_margin > 0:
                    category_data[cat]["margins"].append(brand_margin)

            # Calcular HHI y margen promedio por categoría
            categories = []
            for cat, data in category_data.items():
                total_sales = data["total_sales"]
                if total_sales <= 0:
                    continue

                # Calcular cuotas de marca para HHI
                brand_shares = [
                    b["sales"] / total_sales for b in data["brands"]
                ]
                hhi = self._calculate_hhi(brand_shares)

                # Margen promedio ponderado por ventas
                weighted_margin = sum(
                    b["margin"] * b["sales"] for b in data["brands"]
                ) / total_sales if total_sales > 0 else 0

                # Top 3 marcas (Pareto)
                sorted_brands = sorted(
                    data["brands"], key=lambda x: x["sales"], reverse=True
                )[:3]
                top_brands = [
                    {
                        "brand": b["brand"],
                        "share": round(b["sales"] / total_sales * 100, 1),
                    }
                    for b in sorted_brands
                ]

                # Diagnóstico según matriz HHI × Margen
                diagnosis = self._diagnose_hhi_margin(hhi, weighted_margin)

                categories.append({
                    "category": cat,
                    "hhi": round(hhi, 1),
                    "avg_margin": round(weighted_margin, 1),
                    "total_sales": round(total_sales, 2),
                    "brand_count": len(data["brands"]),
                    "top_brands": top_brands,
                    "diagnosis": diagnosis,
                })

            # Ordenar por ventas (para tabla de auditoría)
            categories.sort(key=lambda x: x["total_sales"], reverse=True)

            # Estadísticas globales
            total_categories = len(categories)
            avg_hhi = sum(c["hhi"] for c in categories) / total_categories if total_categories > 0 else 0
            avg_margin = sum(c["avg_margin"] for c in categories) / total_categories if total_categories > 0 else 0

            logger.info(
                "ventalibre.hhi_matrix.success",
                pharmacy_id=str(pharmacy_id),
                categories_count=total_categories,
                avg_hhi=round(avg_hhi, 1),
                avg_margin=round(avg_margin, 1),
            )

            return {
                "categories": categories,
                "thresholds": {
                    "hhi_low": 1500,
                    "hhi_high": 2500,
                    "margin_low": 15.0,
                    "margin_high": 25.0,
                },
                "summary": {
                    "total_categories": total_categories,
                    "avg_hhi": round(avg_hhi, 1),
                    "avg_margin": round(avg_margin, 1),
                    "total_sales": round(sum(c["total_sales"] for c in categories), 2),
                },
            }

        except Exception as e:
            logger.error(
                "ventalibre.hhi_matrix.error",
                pharmacy_id=str(pharmacy_id),
                error=str(e),
                exc_info=True,
            )
            return {
                "categories": [],
                "thresholds": {
                    "hhi_low": 1500,
                    "hhi_high": 2500,
                    "margin_low": 15.0,
                    "margin_high": 25.0,
                },
                "summary": {
                    "total_categories": 0,
                    "avg_hhi": 0,
                    "avg_margin": 0,
                },
                "error": "Error al calcular matriz HHI. Inténtelo de nuevo.",
            }

    def _diagnose_hhi_margin(self, hhi: float, margin: float) -> Dict:
        """
        Diagnóstico cruzado HHI × Margen (Issue #539).

        Args:
            hhi: Índice HHI de la categoría (0-10000)
            margin: Margen promedio en porcentaje

        Returns:
            Dict con emoji, title, color, quadrant
        """
        # Umbrales
        HHI_LOW = 1500
        HHI_HIGH = 2500
        MARGIN_LOW = 15.0
        MARGIN_HIGH = 25.0

        hhi_is_high = hhi > HHI_HIGH
        hhi_is_low = hhi < HHI_LOW
        margin_is_high = margin > MARGIN_HIGH
        margin_is_low = margin < MARGIN_LOW

        if hhi_is_high and margin_is_high:
            return {
                "emoji": "🎯",
                "title": "Especialista Exitoso",
                "color": "success",
                "quadrant": "specialist_success",
            }
        elif hhi_is_high and margin_is_low:
            return {
                "emoji": "⚠️",
                "title": "Riesgo de Dependencia",
                "color": "danger",
                "quadrant": "dependency_risk",
            }
        elif hhi_is_low and margin_is_high:
            return {
                "emoji": "⭐",
                "title": "Generalista Premium",
                "color": "primary",
                "quadrant": "generalist_premium",
            }
        elif hhi_is_low and margin_is_low:
            return {
                "emoji": "📊",
                "title": "Generalista de Volumen",
                "color": "info",
                "quadrant": "generalist_volume",
            }
        else:
            return {
                "emoji": "⚖️",
                "title": "Surtido Balanceado",
                "color": "secondary",
                "quadrant": "balanced",
            }


# Singleton instance
ventalibre_service = VentaLibreService()
