"""
Optimal Partners Service - Algoritmo greedy para selección de partners óptimos
Issue #415: Mejoras en análisis de genéricos

Este servicio implementa un algoritmo greedy que maximiza la cobertura
de conjuntos homogéneos sustituibles con el mínimo número de partners.

LÓGICA DE COBERTURA (basada en datos reales):
- Un partner puede "cubrir" un conjunto homogéneo si tiene productos en ese conjunto
- La cobertura se calcula consultando ProductCatalog para ver qué laboratorios
  tienen productos en cada nomen_codigo_homogeneo
- Las ventas sustituibles son aquellas con has_generic_alternative=True
"""

from typing import List, Optional, Dict, Any, Set
from datetime import date, timedelta
from uuid import UUID
from sqlalchemy.orm import Session
from sqlalchemy import func, and_, distinct
from pydantic import BaseModel, Field
import logging

from app.models.pharmacy_partners import PharmacyPartner
from app.models.sales_data import SalesData
from app.models.sales_enrichment import SalesEnrichment
from app.models.product_catalog import ProductCatalog
from app.models.user import User
from app.utils.subscription_helpers import get_data_date_limit_for_user
from app.services.laboratory_cache_service import laboratory_cache_service

logger = logging.getLogger(__name__)


# Pydantic Models for Response
class OptimalPartner(BaseModel):
    """Partner óptimo con métricas de cobertura"""
    partner_id: UUID
    partner_name: str
    coverage_percentage: float = Field(..., description="% de ventas sustituibles que cubre este partner (decimal)")
    incremental_coverage: float = Field(..., description="% de cobertura adicional que añade (decimal)")
    cumulative_coverage: float = Field(..., description="% de cobertura acumulada hasta este partner (decimal)")
    position: int = Field(..., description="Posición en ranking (1 = primario)")
    substitutable_sales: float = Field(..., description="Valor en € de ventas sustituibles")
    potential_savings: float = Field(..., description="Ahorro potencial estimado (17.5% descuento)")
    homogeneous_groups_covered: int = Field(..., description="Número de conjuntos homogéneos cubiertos")
    is_primary: bool = Field(default=False, description="True si es el partner primario")


class AnalysisMetadata(BaseModel):
    """Metadata del análisis realizado"""
    total_substitutable_sales: float
    total_partners_analyzed: int
    analysis_period: Dict[str, Optional[str]]
    employees_included: Optional[List[str]] = None
    coverage_achieved: float = Field(..., description="Cobertura total alcanzada (decimal)")
    homogeneous_groups_analyzed: int = Field(..., description="Total conjuntos homogéneos analizados")


class OptimalPartnersResponse(BaseModel):
    """Respuesta completa del cálculo de partners óptimos"""
    optimal_partners: List[OptimalPartner]
    analysis_metadata: AnalysisMetadata


def _normalize_laboratory_name(name: str) -> str:
    """
    Normaliza el nombre de laboratorio para comparación.
    Extrae la parte principal del nombre (antes de comas, S.A., S.L., etc.)
    """
    if not name:
        return ""

    # Convertir a mayúsculas
    name = name.upper().strip()

    # Eliminar sufijos corporativos comunes
    suffixes = [
        ", S.A.", " S.A.", ",S.A.", "S.A.",
        ", S.L.", " S.L.", ",S.L.", "S.L.",
        ", S.L.U.", " S.L.U.", ",S.L.U.", "S.L.U.",
        ", S.A.U.", " S.A.U.", ",S.A.U.", "S.A.U.",
        " FARMACEUTICA", " PHARMA", " LABORATORIES", " LABORATORIOS",
        " GENERICOS", " GENERIC", " EFG"
    ]

    for suffix in suffixes:
        if name.endswith(suffix):
            name = name[:-len(suffix)].strip()

    # Tomar solo la primera parte si hay coma
    if "," in name:
        name = name.split(",")[0].strip()

    return name


async def calculate_optimal_partners(
    db: Session,
    pharmacy_id: UUID,
    start_date: Optional[date] = None,
    end_date: Optional[date] = None,
    employee_names: Optional[List[str]] = None,
    max_partners: int = 8,
    current_user: Optional[User] = None
) -> OptimalPartnersResponse:
    """
    Calcula partners óptimos usando algoritmo greedy basado en datos reales.

    ALGORITMO BASADO EN CONJUNTOS HOMOGÉNEOS:
    1. Obtiene ventas sustituibles agrupadas por conjunto homogéneo
    2. Para cada conjunto, consulta qué laboratorios tienen productos en él
    3. Un partner "cubre" un conjunto si tiene productos en ese conjunto
    4. Algoritmo greedy selecciona partners que maximizan cobertura incremental

    Args:
        db: Sesión de base de datos
        pharmacy_id: ID de la farmacia
        start_date: Fecha inicio análisis (default: hace 2 años)
        end_date: Fecha fin análisis (default: hoy)
        employee_names: Filtro por empleados (PRO feature #402)
        max_partners: Número máximo de partners a retornar
        current_user: Usuario actual para aplicar restricciones free tier

    Returns:
        OptimalPartnersResponse con partners ordenados y metadata
    """

    # Establecer fechas por defecto
    if not end_date:
        end_date = date.today()
    if not start_date:
        start_date = end_date - timedelta(days=730)  # 2 años por defecto

    # Aplicar restricción free tier si es necesario (REGLA #18)
    if current_user:
        date_limit = get_data_date_limit_for_user(current_user, db)
        if date_limit:
            start_date = max(start_date, date_limit)
            logger.info(f"Free tier restriction applied: start_date adjusted to {start_date}")

    logger.info(
        f"[OPTIMAL_PARTNERS] Calculating for pharmacy {pharmacy_id} "
        f"from {start_date} to {end_date}"
    )

    # Obtener partners seleccionados de la farmacia
    selected_partners = db.query(PharmacyPartner).filter(
        and_(
            PharmacyPartner.pharmacy_id == pharmacy_id,
            PharmacyPartner.is_selected == True
        )
    ).all()

    if not selected_partners:
        logger.warning(f"[OPTIMAL_PARTNERS] No selected partners found for pharmacy {pharmacy_id}")
        return OptimalPartnersResponse(
            optimal_partners=[],
            analysis_metadata=AnalysisMetadata(
                total_substitutable_sales=0.0,
                total_partners_analyzed=0,
                analysis_period={
                    "start": start_date.isoformat() if start_date else None,
                    "end": end_date.isoformat() if end_date else None
                },
                employees_included=employee_names,
                coverage_achieved=0.0,
                homogeneous_groups_analyzed=0
            )
        )

    # Convertir nombres de partners a códigos usando el mismo método que partner_analysis_service
    # Esto asegura consistencia entre "Analizable" y "Cobertura Partners Óptimos"
    partner_names = [p.laboratory_name for p in selected_partners]
    name_to_code_mapping, _ = laboratory_cache_service.get_names_to_codes_cached(
        db, partner_names, page=None, per_page=None, return_total=False
    )

    # Crear mapeo partner_id -> código de laboratorio
    partner_codes: Dict[UUID, str] = {}
    for partner in selected_partners:
        code = name_to_code_mapping.get(partner.laboratory_name)
        if code:
            partner_codes[partner.id] = code.upper().strip()
            logger.debug(f"[OPTIMAL_PARTNERS] Partner {partner.laboratory_name} -> code: {code}")
        else:
            logger.warning(f"[OPTIMAL_PARTNERS] No code found for partner: {partner.laboratory_name}")

    # ========================================================================
    # PASO 1: Obtener códigos de laboratorio de partners seleccionados
    # ========================================================================
    selected_partner_codes = list(partner_codes.values())

    if not selected_partner_codes:
        logger.warning("[OPTIMAL_PARTNERS] No valid partner codes found")
        return OptimalPartnersResponse(
            optimal_partners=[],
            analysis_metadata=AnalysisMetadata(
                total_substitutable_sales=0.0,
                total_partners_analyzed=len(selected_partners),
                analysis_period={
                    "start": start_date.isoformat() if start_date else None,
                    "end": end_date.isoformat() if end_date else None
                },
                employees_included=employee_names,
                coverage_achieved=0.0,
                homogeneous_groups_analyzed=0
            )
        )

    logger.info(f"[OPTIMAL_PARTNERS] Selected partner codes: {selected_partner_codes}")

    # ========================================================================
    # PASO 1.5: Obtener conjuntos homogéneos con genéricos disponibles
    # (Consistente con partner_analysis_service: nomen_tipo_farmaco = 'GENERICO')
    # Este es el UNIVERSO SUSTITUIBLE - el denominador para calcular cobertura
    # ========================================================================
    generic_homogeneous_subq = db.query(
        ProductCatalog.nomen_codigo_homogeneo
    ).filter(
        and_(
            ProductCatalog.nomen_codigo_homogeneo.isnot(None),
            ProductCatalog.nomen_codigo_homogeneo != '',
            ProductCatalog.nomen_tipo_farmaco == 'GENERICO',
            ProductCatalog.nomen_estado == 'ALTA'
        )
    ).distinct().subquery()

    # ========================================================================
    # PASO 2: Obtener ventas SUSTITUIBLES (en conjuntos con genéricos)
    # SIN filtrar por partners - este es el denominador base (100%)
    # ========================================================================
    sales_query = db.query(
        ProductCatalog.nomen_codigo_homogeneo.label("homogeneous_code"),
        func.sum(SalesData.total_amount).label("total_sales"),
        func.count(distinct(SalesData.id)).label("sales_count")
    ).join(
        SalesEnrichment,
        SalesData.id == SalesEnrichment.sales_data_id
    ).join(
        ProductCatalog,
        SalesEnrichment.product_catalog_id == ProductCatalog.id
    ).filter(
        and_(
            SalesData.pharmacy_id == pharmacy_id,
            SalesData.sale_date >= start_date,
            SalesData.sale_date <= end_date,
            ProductCatalog.nomen_codigo_homogeneo.isnot(None),
            ProductCatalog.nomen_codigo_homogeneo != '',
            # Filtrar a conjuntos con genéricos (igual que partner_analysis_service)
            ProductCatalog.nomen_codigo_homogeneo.in_(
                db.query(generic_homogeneous_subq.c.nomen_codigo_homogeneo)
            )
            # NO filtramos por partners aquí - queremos el universo sustituible completo
        )
    )

    # Aplicar filtro de empleados si está especificado (PRO feature)
    if employee_names:
        sales_query = sales_query.filter(SalesData.employee_name.in_(employee_names))
        logger.info(f"[OPTIMAL_PARTNERS] Filtering by employees: {employee_names}")

    # Agrupar por conjunto homogéneo
    sales_by_homogeneous = sales_query.group_by(
        ProductCatalog.nomen_codigo_homogeneo
    ).all()

    # Calcular totales (ahora sobre universo analizable)
    total_substitutable_sales = float(sum(row.total_sales or 0 for row in sales_by_homogeneous))
    homogeneous_codes = set(row.homogeneous_code for row in sales_by_homogeneous if row.homogeneous_code)

    logger.info(
        f"[OPTIMAL_PARTNERS] Substitutable universe (groups with generics): {len(homogeneous_codes)} groups "
        f"with {total_substitutable_sales:.2f}€ sales"
    )

    if total_substitutable_sales == 0 or not homogeneous_codes:
        logger.warning("[OPTIMAL_PARTNERS] No substitutable sales found in the period")
        return OptimalPartnersResponse(
            optimal_partners=[],
            analysis_metadata=AnalysisMetadata(
                total_substitutable_sales=0.0,
                total_partners_analyzed=len(selected_partners),
                analysis_period={
                    "start": start_date.isoformat() if start_date else None,
                    "end": end_date.isoformat() if end_date else None
                },
                employees_included=employee_names,
                coverage_achieved=0.0,
                homogeneous_groups_analyzed=0
            )
        )

    # Crear diccionario de ventas por conjunto homogéneo
    sales_by_group: Dict[str, float] = {}
    for row in sales_by_homogeneous:
        if row.homogeneous_code:
            sales_by_group[row.homogeneous_code] = float(row.total_sales or 0)

    # ========================================================================
    # PASO 3: Para cada conjunto homogéneo, obtener qué laboratorios tienen productos
    # ========================================================================
    # Usar nomen_codigo_laboratorio (código exacto) igual que partner_analysis_service
    # Esto asegura consistencia entre "Analizable" y "Cobertura Partners Óptimos"
    labs_by_group_query = db.query(
        ProductCatalog.nomen_codigo_homogeneo,
        ProductCatalog.nomen_codigo_laboratorio
    ).filter(
        and_(
            ProductCatalog.nomen_codigo_homogeneo.in_(list(homogeneous_codes)),
            ProductCatalog.nomen_codigo_laboratorio.isnot(None),
            ProductCatalog.nomen_estado == 'ALTA'
        )
    ).distinct().all()

    # Crear mapeo: código de laboratorio -> set de homogeneous_codes
    groups_by_lab_code: Dict[str, Set[str]] = {}  # lab_code -> set of homogeneous_codes

    for row in labs_by_group_query:
        if row.nomen_codigo_homogeneo and row.nomen_codigo_laboratorio:
            lab_code = row.nomen_codigo_laboratorio.upper().strip()
            if lab_code not in groups_by_lab_code:
                groups_by_lab_code[lab_code] = set()
            groups_by_lab_code[lab_code].add(row.nomen_codigo_homogeneo)

    logger.info(f"[OPTIMAL_PARTNERS] Found {len(groups_by_lab_code)} distinct laboratory codes in catalog")

    # ========================================================================
    # PASO 3: Calcular cobertura de cada partner basándose en código exacto
    # ========================================================================
    partner_coverage: Dict[UUID, Dict[str, Any]] = {}

    for partner in selected_partners:
        partner_code = partner_codes.get(partner.id)

        # Buscar qué conjuntos homogéneos puede cubrir este partner
        covered_groups: Set[str] = set()

        if partner_code:
            # Match directo por código de laboratorio (consistente con partner_analysis_service)
            covered_groups = groups_by_lab_code.get(partner_code, set())

        # Calcular ventas cubiertas
        covered_sales = sum(sales_by_group.get(group, 0) for group in covered_groups)
        coverage_pct = covered_sales / total_substitutable_sales if total_substitutable_sales > 0 else 0

        partner_coverage[partner.id] = {
            "partner": partner,
            "covered_groups": covered_groups,
            "covered_sales": covered_sales,
            "coverage_percentage": coverage_pct
        }

        logger.info(
            f"[OPTIMAL_PARTNERS] {partner.laboratory_name} (code={partner_code}): "
            f"{len(covered_groups)} groups, {coverage_pct*100:.1f}% coverage, {covered_sales:.2f}€"
        )

    # ========================================================================
    # PASO 4: Algoritmo greedy para selección óptima
    # ========================================================================
    selected_partners_list: List[OptimalPartner] = []
    covered_groups_global: Set[str] = set()
    cumulative_coverage = 0.0
    position = 1

    remaining_partners = list(partner_coverage.keys())

    while remaining_partners and len(selected_partners_list) < max_partners:
        best_partner_id = None
        best_incremental_coverage = 0.0
        best_incremental_groups: Set[str] = set()
        best_incremental_sales = 0.0

        # Encontrar el partner que añade mayor cobertura incremental
        for partner_id in remaining_partners:
            partner_data = partner_coverage[partner_id]

            # Calcular conjuntos nuevos que añadiría este partner
            new_groups = partner_data["covered_groups"] - covered_groups_global

            if new_groups:
                # Calcular ventas incrementales
                incremental_sales = sum(sales_by_group.get(group, 0) for group in new_groups)
                incremental_coverage = incremental_sales / total_substitutable_sales

                if incremental_coverage > best_incremental_coverage:
                    best_partner_id = partner_id
                    best_incremental_coverage = incremental_coverage
                    best_incremental_groups = new_groups
                    best_incremental_sales = incremental_sales

        # Si no encontramos partner que añada valor, terminar
        if not best_partner_id or best_incremental_coverage <= 0:
            logger.info(f"[OPTIMAL_PARTNERS] No more partners add incremental value")
            break

        # Añadir el mejor partner a la lista
        partner_data = partner_coverage[best_partner_id]
        partner = partner_data["partner"]
        cumulative_coverage += best_incremental_coverage

        # Calcular ahorro potencial (17.5% del valor sustituible)
        savings_rate = 0.175
        potential_savings = best_incremental_sales * savings_rate

        selected_partners_list.append(OptimalPartner(
            partner_id=partner.id,
            partner_name=partner.laboratory_name,
            coverage_percentage=partner_data["coverage_percentage"],
            incremental_coverage=best_incremental_coverage,
            cumulative_coverage=cumulative_coverage,
            position=position,
            substitutable_sales=best_incremental_sales,
            potential_savings=potential_savings,
            homogeneous_groups_covered=len(best_incremental_groups),
            is_primary=(position == 1)
        ))

        logger.info(
            f"[OPTIMAL_PARTNERS] #{position} {partner.laboratory_name}: "
            f"+{best_incremental_coverage*100:.1f}% (cumulative: {cumulative_coverage*100:.1f}%), "
            f"{len(best_incremental_groups)} groups, {best_incremental_sales:.2f}€"
        )

        # Actualizar estado global
        covered_groups_global.update(best_incremental_groups)
        remaining_partners.remove(best_partner_id)
        position += 1

        # Si alcanzamos 99% de cobertura, podemos parar
        if cumulative_coverage >= 0.99:
            logger.info(f"[OPTIMAL_PARTNERS] Reached {cumulative_coverage*100:.1f}% coverage with {position-1} partners")
            break

    # Preparar metadata
    metadata = AnalysisMetadata(
        total_substitutable_sales=total_substitutable_sales,
        total_partners_analyzed=len(selected_partners),
        analysis_period={
            "start": start_date.isoformat() if start_date else None,
            "end": end_date.isoformat() if end_date else None
        },
        employees_included=employee_names,
        coverage_achieved=cumulative_coverage,
        homogeneous_groups_analyzed=len(homogeneous_codes)
    )

    logger.info(
        f"[OPTIMAL_PARTNERS] Completed: {len(selected_partners_list)} partners selected, "
        f"{cumulative_coverage*100:.1f}% coverage achieved over {len(homogeneous_codes)} groups"
    )

    return OptimalPartnersResponse(
        optimal_partners=selected_partners_list,
        analysis_metadata=metadata
    )
