"""
Clustering Visualization API (Issue #458)

.. deprecated:: ADR-004
    Estos endpoints están deprecados desde ADR-004 (Dic 2024).
    El clustering dinámico ha sido reemplazado por grupos curados
    manualmente usando IntercambiableGroup con is_curated=True.
    Los endpoints se mantendrán hasta completar la migración pero
    no deben usarse para nuevos desarrollos.

Endpoints for UMAP 2D visualization and cluster management.
"""

from typing import List, Optional
from uuid import UUID

from fastapi import APIRouter, Depends, HTTPException, Query
from sqlalchemy.orm import Session

from app.api.deps import get_current_admin_user, get_current_user, get_db
from app.models.user import User
from app.schemas.clustering import (
    TaxonomyLabelRequest,
    TaxonomyLabelStatusResponse,
    UMAPDataRequest,
    UMAPDataResponse,
    UMAPMetadata,
    UMAPPoint,
)

# LAZY LOADING: umap_model_service dispara compilación JIT de numba (~45s)
# Se importa solo cuando se necesita, no al startup de la app
# Issue: Smoke tests tardaban 19+ minutos por esta compilación

router = APIRouter(prefix="/clustering", tags=["clustering"])


def _get_umap_service():
    """Lazy load umap_model_service to avoid JIT compilation at startup."""
    from app.services.umap_model_service import umap_model_service
    return umap_model_service


@router.get("/umap-data", response_model=UMAPDataResponse, deprecated=True)
async def get_umap_data(
    necesidad: Optional[str] = Query(None, description="Filter by NECESIDAD category"),
    verified_only: bool = Query(False, description="Only return human-verified products"),
    limit: int = Query(5000, le=10000, description="Maximum number of points"),
    db: Session = Depends(get_db),
    current_user: User = Depends(get_current_user),
) -> UMAPDataResponse:
    """
    Get UMAP 2D coordinates for visualization.

    Returns products with their 2D coordinates for the scatter plot,
    along with metadata about the model and dataset.

    The centroid_distance_zscore field helps detect "wolves in sheep's clothing" -
    products with high textual similarity but physically distant in embedding space.
    """
    # Get products with coordinates (lazy load to avoid JIT at startup)
    umap_service = _get_umap_service()
    products = umap_service.get_products_with_coordinates(
        db=db,
        limit=limit,
        necesidad=necesidad,
        verified_only=verified_only,
    )

    # Calculate Z-scores for outlier detection
    products = umap_service.calculate_centroid_distances(products)

    # Get metadata
    metadata_dict = umap_service.get_metadata(db)
    metadata = UMAPMetadata(
        version=metadata_dict["version"],
        total_products=metadata_dict["total_products"],
        products_with_coords=metadata_dict["products_with_coords"],
        last_trained=metadata_dict.get("last_trained"),
        total_clusters=metadata_dict.get("total_clusters", 0),
        locked_clusters=metadata_dict.get("locked_clusters", 0),
    )

    # Convert to response schema
    points = [
        UMAPPoint(
            id=p["id"],
            product_name=p["product_name"],
            umap_x=p["umap_x"],
            umap_y=p["umap_y"],
            necesidad=p.get("necesidad"),
            detected_brand=p.get("detected_brand"),
            ml_confidence=p.get("ml_confidence"),
            human_verified=p.get("human_verified", False),
            centroid_distance_zscore=p.get("centroid_distance_zscore"),
        )
        for p in products
    ]

    return UMAPDataResponse(
        points=points,
        metadata=metadata,
        ghost_points=[],  # Ghost points are managed by frontend
    )


@router.get("/umap-status", deprecated=True)
async def get_umap_status(
    db: Session = Depends(get_db),
    current_user: User = Depends(get_current_user),
):
    """
    Get UMAP model status and training recommendations.

    Returns whether the model needs retraining and relevant statistics.
    """
    umap_service = _get_umap_service()
    needs_retrain, stats = umap_service.needs_retrain(db)

    return {
        "needs_retrain": needs_retrain,
        "reason": stats.get("reason"),
        "stats": stats,
        "current_version": umap_service.get_current_version(),
    }


@router.get("/necesidades", deprecated=True)
async def get_available_necesidades(
    db: Session = Depends(get_db),
    current_user: User = Depends(get_current_user),
) -> List[str]:
    """
    Get list of available NECESIDAD categories for filtering.
    """
    from sqlalchemy import func
    from app.models.product_catalog_venta_libre import ProductCatalogVentaLibre

    result = db.query(
        ProductCatalogVentaLibre.ml_category
    ).filter(
        ProductCatalogVentaLibre.is_active == True,
        ProductCatalogVentaLibre.ml_category.isnot(None),
        ProductCatalogVentaLibre.umap_x.isnot(None),
    ).distinct().all()

    return sorted([r[0] for r in result if r[0]])


# === TAXONOMY LABELING (Issue #462) ===

from app.services.taxonomy_labeler_service import get_taxonomy_labeler_service


@router.post("/label-taxonomy", deprecated=True)
async def label_clusters_taxonomy(
    request: TaxonomyLabelRequest,
    db: Session = Depends(get_db),
    current_user: User = Depends(get_current_admin_user),
):
    """
    Etiqueta clusters con taxonomía jerárquica (Issue #462).

    Modelo híbrido:
    - Tier 1 (Macro): Mapeo estático determinístico
    - Tier 2 (Sub): Voto ponderado por ventas
    - Tier 3 (Nombre): Generado por LLM

    Requiere rol admin para ejecutar.

    Args:
        force: Forzar re-etiquetado (ignora threshold 10%)
        cluster_ids: Lista específica de clusters (opcional)

    Returns:
        Resultado del etiquetado batch
    """
    service = get_taxonomy_labeler_service(db)

    result = service.batch_label_all(
        cluster_ids=request.cluster_ids,
        force=request.force
    )

    return result.model_dump()


@router.get("/label-taxonomy/status", response_model=TaxonomyLabelStatusResponse, deprecated=True)
async def get_taxonomy_labeling_status(
    db: Session = Depends(get_db),
    current_user: User = Depends(get_current_user),
):
    """
    Obtiene estado actual de etiquetado de clusters.

    Returns:
        Estadísticas de cobertura y distribución por tier1
    """
    service = get_taxonomy_labeler_service(db)
    return service.get_labeling_status()
