﻿# backend/app/api/admin_catalog.py
"""
Admin endpoints for catalog management operations.
Includes catalog cleaning, reindexing, duplicate detection, and orphan identification.
Requires admin authentication for all operations.
"""

import logging
from datetime import timedelta
from difflib import SequenceMatcher
from typing import Any, Dict, List, Literal, Optional

import structlog
from fastapi import APIRouter, Depends, HTTPException, Query, Request, status
from fastapi.responses import StreamingResponse
from uuid import UUID
from pydantic import BaseModel, Field
from sqlalchemy import and_, distinct, func, or_, text
from sqlalchemy.exc import SQLAlchemyError
from sqlalchemy.orm import Session

from app.utils.datetime_utils import utc_now

from ..api.deps import require_permission
from ..core.rate_limiting import admin_general_limit
from ..core.subscription_limits import Permission
from ..database import get_db
from ..models.product_catalog import ProductCatalog
from ..models.sales_data import SalesData
from ..models.user import User
from ..services.catalog_maintenance_service import CatalogMaintenanceService
from ..services.health_monitoring_service import log_to_developer_mode
from ..services.manual_review_service import ManualReviewService
from ..schemas.manual_review import (
    ManualReviewListResponse,
    ManualReviewStats,
    ManualReviewExportResponse,
)

logger = logging.getLogger(__name__)
struct_logger = structlog.get_logger(__name__)

router = APIRouter(prefix="/admin", tags=["admin-catalog"])


# Pydantic models for requests/responses
class CleanCatalogResponse(BaseModel):
    """Response for catalog cleaning operation."""

    deleted: int
    reason_breakdown: Dict[str, int]
    execution_time_ms: float


class ReindexResponse(BaseModel):
    """Response for table reindexing operation."""

    tables_reindexed: int
    time_ms: float
    tables: List[str]


class DuplicateGroup(BaseModel):
    """Group of duplicate products."""

    code: str
    products: List[Dict[str, Any]]
    confidence: float
    count: int


class DuplicatesResponse(BaseModel):
    """Response for duplicate detection."""

    total_duplicates: int
    groups: List[DuplicateGroup]


class OrphansResponse(BaseModel):
    """Response for orphan detection."""

    orphan_products: int
    orphan_sales: int
    details: Dict


class CatalogStatsResponse(BaseModel):
    """Response for catalog statistics (Issue #200)."""

    # Sección 1: Catálogos Externos (siempre visible)
    external_catalogs: Dict[str, Dict[str, Any]]  # CIMA y Nomenclátor con counts y fechas

    # Sección 2: Enriquecimiento de Ventas (solo si hay ventas)
    sales_enrichment: Optional[Dict[str, Any]] = None  # Stats de enriquecimiento

    # Sección 3: Última Actividad
    last_activity: Dict[str, Any]  # Timestamps de última sync, upload, etc.

    # Legacy fields (mantener compatibilidad)
    total_products: int
    data_sources: Dict[str, int]
    sync_status: Dict[str, int]
    enrichment_rate: Optional[float] = None
    last_update: Optional[str]
    nomenclator_local: Dict[str, Any]
    homogeneous_groups_master: Dict[str, int]
    system_sync_dates: Dict[str, Optional[str]]


@router.post("/clean-catalog", response_model=CleanCatalogResponse)
@admin_general_limit
async def clean_catalog(
    request: Request,
    dry_run: bool = Query(default=False, description="If true, only simulate the operation without deleting"),
    current_user: User = Depends(require_permission(Permission.MANAGE_DATABASE)),
    db: Session = Depends(get_db),
):
    """
    Clean obsolete products from the catalog.

    Removes:
    - Products without valid national code
    - Products marked as BAJA (discontinued) for >6 months
    - Products without any sales in >2 years

    Args:
        dry_run: If true, only simulate without actually deleting
        current_user: Authenticated admin user
        db: Database session

    Returns:
        CleanCatalogResponse with deletion statistics
    """
    start_time = utc_now()

    try:
        reason_breakdown = {"no_national_code": 0, "discontinued_old": 0, "no_recent_sales": 0}

        products_to_delete = []

        # 1. Find products without valid national code
        invalid_code_products = (
            db.query(ProductCatalog)
            .filter(
                or_(
                    ProductCatalog.national_code == None,
                    ProductCatalog.national_code == "",
                    func.length(ProductCatalog.national_code) < 6,
                )
            )
            .all()
        )

        for product in invalid_code_products:
            products_to_delete.append(product.id)
            reason_breakdown["no_national_code"] += 1

        # 2. Find products marked as BAJA (discontinued) for >6 months
        six_months_ago = utc_now() - timedelta(days=180)
        discontinued_products = (
            db.query(ProductCatalog)
            .filter(and_(ProductCatalog.cima_estado_registro == "BAJA", ProductCatalog.updated_at < six_months_ago))
            .all()
        )

        for product in discontinued_products:
            if product.id not in products_to_delete:
                products_to_delete.append(product.id)
                reason_breakdown["discontinued_old"] += 1

        # 3. Find products without sales in >2 years
        two_years_ago = utc_now() - timedelta(days=730)

        # Subquery to find products with recent sales
        recent_sales_products = (
            db.query(distinct(SalesData.codigo_nacional)).filter(SalesData.sale_date >= two_years_ago).subquery()
        )

        # Find products not in recent sales
        no_sales_products = (
            db.query(ProductCatalog)
            .filter(
                and_(ProductCatalog.national_code != None, ~ProductCatalog.national_code.in_(recent_sales_products))
            )
            .all()
        )

        for product in no_sales_products:
            if product.id not in products_to_delete:
                products_to_delete.append(product.id)
                reason_breakdown["no_recent_sales"] += 1

        total_deleted = len(products_to_delete)

        # Perform deletion if not dry run
        if not dry_run and total_deleted > 0:
            deleted_count = (
                db.query(ProductCatalog)
                .filter(ProductCatalog.id.in_(products_to_delete))
                .delete(synchronize_session=False)
            )

            db.commit()

            # Log the operation
            log_to_developer_mode(
                db=db,
                level="INFO",
                logger_name="admin_catalog",
                message=f"Catalog cleaned by {current_user.username}",
                context={
                    "deleted_count": deleted_count,
                    "reason_breakdown": reason_breakdown,
                    "user_id": str(current_user.id),
                },
            )

            struct_logger.info(
                "Limpieza de catálogo completada", deleted=deleted_count, dry_run=dry_run, user=current_user.username
            )

        execution_time = (utc_now() - start_time).total_seconds() * 1000

        return CleanCatalogResponse(
            deleted=total_deleted if not dry_run else 0,
            reason_breakdown=reason_breakdown,
            execution_time_ms=execution_time,
        )

    except SQLAlchemyError as e:
        db.rollback()
        logger.error(f"Database error during catalog cleaning: {str(e)}")
        raise HTTPException(status_code=500, detail=f"Database error during catalog cleaning: {str(e)}")
    except Exception as e:
        db.rollback()
        logger.error(f"Error during catalog cleaning: {str(e)}")
        raise HTTPException(status_code=500, detail=f"Error cleaning catalog: {str(e)}")


@router.post("/reindex-tables", response_model=ReindexResponse)
@admin_general_limit
async def reindex_tables(request: Request, current_user: User = Depends(require_permission(Permission.MANAGE_DATABASE)), db: Session = Depends(get_db)):
    """
    Rebuild PostgreSQL indexes and update statistics.

    Operations:
    - REINDEX TABLE product_catalog
    - REINDEX TABLE sales_data
    - ANALYZE to update table statistics

    Args:
        current_user: Authenticated admin user
        db: Database session

    Returns:
        ReindexResponse with operation statistics
    """
    start_time = utc_now()

    try:
        tables_reindexed = []

        # Whitelist of allowed tables (prevents SQL injection)
        ALLOWED_TABLES = {"product_catalog", "sales_data", "sales_enrichment", "pharmacies", "file_uploads"}

        for table in ALLOWED_TABLES:
            try:
                # Validate table name against whitelist to prevent SQL injection
                if table not in ALLOWED_TABLES:
                    logger.warning(f"Tabla {table} no está en la lista permitida")
                    continue

                # REINDEX each table safely
                db.execute(text(f"REINDEX TABLE {table}"))
                tables_reindexed.append(table)
                logger.info(f"Tabla reindexada: {table}")
            except HTTPException:
                raise  # Re-raise HTTPExceptions preserving their status codes
            except Exception as e:
                logger.warning(f"No se pudo reindexar la tabla {table}: {str(e)}")

        # Run ANALYZE to update statistics
        db.execute(text("ANALYZE"))
        db.commit()

        execution_time = (utc_now() - start_time).total_seconds() * 1000

        # Log the operation
        log_to_developer_mode(
            db=db,
            level="INFO",
            logger_name="admin_catalog",
            message=f"Tables reindexed by {current_user.username}",
            context={"tables": tables_reindexed, "execution_time_ms": execution_time, "user_id": str(current_user.id)},
            execution_time_ms=execution_time,
        )

        struct_logger.info(
            "Reindexación de tablas completada",
            tables_count=len(tables_reindexed),
            time_ms=execution_time,
            user=current_user.username,
        )

        return ReindexResponse(tables_reindexed=len(tables_reindexed), time_ms=execution_time, tables=tables_reindexed)

    except SQLAlchemyError as e:
        db.rollback()
        logger.error(f"Database error during reindexing: {str(e)}")
        raise HTTPException(status_code=500, detail=f"Database error during reindexing: {str(e)}")
    except Exception as e:
        db.rollback()
        logger.error(f"Error during reindexing: {str(e)}")
        raise HTTPException(status_code=500, detail=f"Error reindexing tables: {str(e)}")


@router.get("/catalog/duplicates", response_model=DuplicatesResponse)
async def detect_duplicates(
    limit: int = Query(default=50, le=200, description="Número máximo de grupos de duplicados a devolver"),
    min_confidence: float = Query(default=0.85, ge=0.5, le=1.0, description="Confianza mínima de similitud"),
    sample_size: int = Query(default=500, le=2000, description="Tamaño de muestra para comparación de nombres"),
    current_user: User = Depends(require_permission(Permission.VIEW_SYSTEM_STATS)),
    db: Session = Depends(get_db),
):
    """
    Detect duplicate products by national code and name similarity.

    Uses fuzzy matching to find products that:
    - Have the same national code
    - Have similar names (using SequenceMatcher)

    Args:
        limit: Maximum number of duplicate groups to return
        min_confidence: Minimum similarity score (0.5 to 1.0)
        current_user: Authenticated admin user
        db: Database session

    Returns:
        DuplicatesResponse with duplicate groups
    """
    try:
        duplicate_groups = []

        # 1. Find products with duplicate national codes
        duplicate_codes = (
            db.query(ProductCatalog.national_code, func.count(ProductCatalog.id).label("count"))
            .filter(ProductCatalog.national_code != None)
            .group_by(ProductCatalog.national_code)
            .having(func.count(ProductCatalog.id) > 1)
            .limit(limit)
            .all()
        )

        for code, count in duplicate_codes:
            # Get all products with this code
            products = db.query(ProductCatalog).filter(ProductCatalog.national_code == code).all()

            product_list = []
            for product in products:
                product_list.append(
                    {
                        "id": str(product.id),
                        "national_code": product.national_code,
                        "name": product.nomen_nombre or product.cima_nombre_comercial or "Unknown",
                        "laboratory": product.nomen_laboratorio or product.cima_laboratorio_titular,
                        "status": product.cima_estado_registro,
                        "last_updated": product.updated_at.isoformat() if product.updated_at else None,
                    }
                )

            duplicate_groups.append(
                DuplicateGroup(code=code, products=product_list, confidence=1.0, count=count)  # Exact code match
            )

        # 2. Find products with similar names (if we haven't reached limit)
        if len(duplicate_groups) < limit:
            # Use chunked processing to avoid memory issues on Render (512MB limit)
            # Process in smaller batches using generator pattern
            products_sample_query = (
                db.query(ProductCatalog).filter(ProductCatalog.nomen_nombre != None).limit(sample_size)
            )

            # Process in chunks to optimize memory usage
            chunk_size = 100
            seen_pairs = set()
            products_chunk = []

            for product in products_sample_query.yield_per(chunk_size):
                products_chunk.append(product)

                # Process when chunk is full
                if len(products_chunk) >= chunk_size:
                    for i, product1 in enumerate(products_chunk):
                        for product2 in products_chunk[i + 1 :]:
                            # Skip if same national code (already handled)
                            if product1.national_code == product2.national_code:
                                continue

                            # Skip if we've seen this pair
                            pair_key = tuple(sorted([str(product1.id), str(product2.id)]))
                            if pair_key in seen_pairs:
                                continue

                            # Calculate name similarity
                            name1 = product1.nomen_nombre or product1.cima_nombre_comercial or ""
                            name2 = product2.nomen_nombre or product2.cima_nombre_comercial or ""

                            if name1 and name2:
                                similarity = SequenceMatcher(None, name1.lower(), name2.lower()).ratio()

                                if similarity >= min_confidence:
                                    seen_pairs.add(pair_key)

                                    product_list = [
                                        {
                                            "id": str(product1.id),
                                            "national_code": product1.national_code,
                                            "name": name1,
                                            "laboratory": product1.nomen_laboratorio
                                            or product1.cima_laboratorio_titular,
                                            "status": product1.cima_estado_registro,
                                            "last_updated": (
                                                product1.updated_at.isoformat() if product1.updated_at else None
                                            ),
                                        },
                                        {
                                            "id": str(product2.id),
                                            "national_code": product2.national_code,
                                            "name": name2,
                                            "laboratory": product2.nomen_laboratorio
                                            or product2.cima_laboratorio_titular,
                                            "status": product2.cima_estado_registro,
                                            "last_updated": (
                                                product2.updated_at.isoformat() if product2.updated_at else None
                                            ),
                                        },
                                    ]

                                    duplicate_groups.append(
                                        DuplicateGroup(
                                            code=product1.national_code or "SIMILAR_NAME",
                                            products=product_list,
                                            confidence=round(similarity, 2),
                                            count=2,
                                        )
                                    )

                                    if len(duplicate_groups) >= limit:
                                        break

                        if len(duplicate_groups) >= limit:
                            break

                    if len(duplicate_groups) >= limit:
                        break

                    # Clear processed chunk to free memory
                    products_chunk.clear()

        # Sort by confidence and count
        duplicate_groups.sort(key=lambda x: (x.confidence, x.count), reverse=True)

        total_duplicates = sum(group.count for group in duplicate_groups)

        # Log the operation
        log_to_developer_mode(
            db=db,
            level="INFO",
            logger_name="admin_catalog",
            message=f"Duplicate detection run by {current_user.username}",
            context={
                "groups_found": len(duplicate_groups),
                "total_duplicates": total_duplicates,
                "min_confidence": min_confidence,
                "user_id": str(current_user.id),
            },
        )

        return DuplicatesResponse(total_duplicates=total_duplicates, groups=duplicate_groups[:limit])

    except HTTPException:
        raise  # Re-raise HTTPExceptions preserving their status codes
    except Exception as e:
        logger.error(f"Error detecting duplicates: {str(e)}")
        raise HTTPException(status_code=500, detail=f"Error detecting duplicates: {str(e)}")


@router.get("/catalog/orphans", response_model=OrphansResponse)
async def find_orphans(
    limit: int = Query(default=100, le=1000, description="Maximum orphan records to return details for"),
    current_user: User = Depends(require_permission(Permission.VIEW_SYSTEM_STATS)),
    db: Session = Depends(get_db),
):
    """
    Find orphan products and sales records.

    Identifies:
    - Products in catalog without any sales history
    - Sales records without corresponding product in catalog

    Args:
        limit: Maximum number of orphan records to return details for
        current_user: Authenticated admin user
        db: Database session

    Returns:
        OrphansResponse with orphan statistics and details
    """
    try:
        # 1. Find products without any sales
        # Subquery for products that have sales
        products_with_sales = (
            db.query(distinct(SalesData.codigo_nacional)).filter(SalesData.codigo_nacional != None).subquery()
        )

        # Count orphan products (products without sales)
        orphan_products_query = db.query(ProductCatalog).filter(
            and_(ProductCatalog.national_code != None, ~ProductCatalog.national_code.in_(products_with_sales))
        )

        orphan_products_count = orphan_products_query.count()

        # Get sample of orphan products
        orphan_products_sample = orphan_products_query.limit(limit).all()

        orphan_product_details = []
        for product in orphan_products_sample:
            orphan_product_details.append(
                {
                    "id": str(product.id),
                    "national_code": product.national_code,
                    "name": product.nomen_nombre or product.cima_nombre_comercial or "Unknown",
                    "laboratory": product.nomen_laboratorio or product.cima_laboratorio_titular,
                    "status": product.cima_estado_registro,
                    "created_at": product.created_at.isoformat() if product.created_at else None,
                    "last_updated": product.updated_at.isoformat() if product.updated_at else None,
                }
            )

        # 2. Find sales without products in catalog
        # Subquery for product codes in catalog
        catalog_codes = db.query(ProductCatalog.national_code).filter(ProductCatalog.national_code != None).subquery()

        # Count orphan sales (sales without product in catalog)
        orphan_sales_query = (
            db.query(
                SalesData.codigo_nacional,
                func.count(SalesData.id).label("count"),
                func.sum(SalesData.total_amount).label("total_amount"),
                func.max(SalesData.sale_date).label("last_sale_date"),
            )
            .filter(and_(SalesData.codigo_nacional != None, ~SalesData.codigo_nacional.in_(catalog_codes)))
            .group_by(SalesData.codigo_nacional)
        )

        orphan_sales_groups = orphan_sales_query.limit(limit).all()
        orphan_sales_count = orphan_sales_query.count()

        orphan_sales_details = []
        for code, count, total_amount, last_sale in orphan_sales_groups:
            orphan_sales_details.append(
                {
                    "codigo_nacional": code,  # Campo correcto según DATA_CATALOG.md
                    "sales_count": count,
                    "total_amount": float(total_amount) if total_amount else 0,
                    "last_sale_date": last_sale.isoformat() if last_sale else None,
                }
            )

        # Calculate total orphan sales records
        total_orphan_sales_records = (
            db.query(SalesData)
            .filter(and_(SalesData.codigo_nacional != None, ~SalesData.codigo_nacional.in_(catalog_codes)))
            .count()
        )

        # Log the operation
        log_to_developer_mode(
            db=db,
            level="INFO",
            logger_name="admin_catalog",
            message=f"Orphan detection run by {current_user.username}",
            context={
                "orphan_products": orphan_products_count,
                "orphan_sales_codes": orphan_sales_count,
                "orphan_sales_records": total_orphan_sales_records,
                "user_id": str(current_user.id),
            },
        )

        return OrphansResponse(
            orphan_products=orphan_products_count,
            orphan_sales=orphan_sales_count,
            details={
                "orphan_products_sample": orphan_product_details,
                "orphan_sales_sample": orphan_sales_details,
                "total_orphan_sales_records": total_orphan_sales_records,
                "sample_limit": limit,
            },
        )

    except HTTPException:
        raise  # Re-raise HTTPExceptions preserving their status codes
    except Exception as e:
        logger.error(f"Error finding orphans: {str(e)}")
        raise HTTPException(status_code=500, detail=f"Error finding orphans: {str(e)}")


@router.get("/catalog/stats", response_model=CatalogStatsResponse)
async def get_catalog_stats(current_user: User = Depends(require_permission(Permission.VIEW_SYSTEM_STATS)), db: Session = Depends(get_db)):
    """
    Get comprehensive statistics about the catalog (Issue #185).

    Returns statistics about:
    - Total products in catalog
    - Data source distribution (nomenclator, CIMA, both)
    - Synchronization status
    - Last update timestamps
    - Nomenclator local counts
    - Homogeneous groups master counts
    - System sync dates for each component

    **Cache**: Redis with 5-minute TTL (Issue #194). Invalidated automatically after:
    - CIMA synchronization
    - Re-enrichment operations
    - Nomenclator updates

    Requires admin authentication.

    Args:
        current_user: Authenticated admin user
        db: Database session

    Returns:
        CatalogStatsResponse with comprehensive catalog statistics

    Raises:
        HTTPException 403: If user is not admin
        HTTPException 500: If there's an error retrieving statistics
    """
    try:
        # Issue #194: Try Redis cache first (5 min TTL)
        import os

        from app.services.enrichment_cache import enrichment_cache

        # Cache key strategy (Issue #194):
        # - Single global key: "catalog:stats:global" (no user/pharmacy segmentation)
        # - Rationale: Stats are identical for all admins (same DB queries)
        # - Invalidation: Automatic after CIMA sync, nomenclator updates, re-enrichment
        # - TTL: 5 minutes (balance between freshness and DB load reduction)
        cache_key = "catalog:stats:global"

        # Try cache (returns None if Redis unavailable or cache miss)
        cached_stats = await enrichment_cache.get(cache_key)
        if cached_stats:
            logger.debug(f"[CACHE HIT] Returning cached catalog stats for {current_user.username}")
            return CatalogStatsResponse(**cached_stats)

        # Cache miss or Redis unavailable - compute stats from DB
        logger.debug(f"[CACHE MISS] Computing catalog stats from DB for {current_user.username}")

        # Issue #293: Use optimized query if feature flag enabled
        use_optimized = os.getenv("USE_OPTIMIZED_CATALOG_STATS", "true").lower() == "true"

        catalog_service = CatalogMaintenanceService()
        if use_optimized:
            logger.info("Using optimized catalog stats query (Issue #293)")
            stats = catalog_service.get_catalog_status_optimized(db)
        else:
            logger.info("Using legacy catalog stats query")
            stats = catalog_service.get_catalog_status(db)

        # Defensive validation: Ensure stats has expected structure (Issue #193 review)
        required_keys = {
            "catalog": [
                "total_products",
                "distribution",
                "sync_status",
                "last_update",
                "with_nomenclator_data",
                "with_cima_data",
            ],
            "nomenclator_local": ["total_products", "last_update"],
            "homogeneous_groups": ["total_groups"],
            "system_status": [],
        }

        for section, fields in required_keys.items():
            if section not in stats:
                raise ValueError(f"Missing required section '{section}' in catalog stats")
            if fields:  # Check nested fields if any
                for field in fields:
                    if field not in stats[section]:
                        raise ValueError(f"Missing required field '{field}' in stats['{section}']")

        # Validate distribution sub-structure
        if "distribution" in stats["catalog"]:
            dist_fields = ["only_nomenclator", "only_cima", "both_sources", "no_sources"]
            for field in dist_fields:
                if field not in stats["catalog"]["distribution"]:
                    raise ValueError(f"Missing required field '{field}' in stats['catalog']['distribution']")

        # Issue #200: Nueva estructura en 3 secciones
        # Sección 1: CATÁLOGOS EXTERNOS (siempre visible)
        # Issue #210: Contar solo productos con datos específicos de CIMA
        # Usar cima_nombre_comercial como indicador canónico de datos CIMA:
        # - Este campo se popula para todos los productos sincronizados desde la API CIMA
        # - Es el campo más confiable para identificar productos con información CIMA
        # - Verificado: 0 edge cases (productos con otros campos CIMA pero sin nombre comercial)
        cima_count = (
            db.query(func.count(ProductCatalog.id)).filter(ProductCatalog.cima_nombre_comercial != None).scalar() or 0
        )

        # Issue #XXX: Fallback a catalog_sync_history si system_status no tiene fechas
        # Mismo patrón que /api/v1/pharmacy/{id}/initial-data (que funciona correctamente)
        from app.models.catalog_sync_history import CatalogSyncHistory

        # CIMA: Intentar system_status primero, luego fallback a catalog_sync_history
        cima_last_sync = stats["system_status"].get("cima", {}).get("last_success_at")
        if not cima_last_sync:
            # IMPORTANTE: sync_type es enum case-sensitive, usar MAYÚSCULAS
            last_cima_record = (
                db.query(CatalogSyncHistory.sync_date)
                .filter(
                    CatalogSyncHistory.sync_type.in_(["CIMA", "cima"]),  # Ambos cases por seguridad
                    CatalogSyncHistory.status.in_(["success", "completed", "COMPLETED"]),
                )
                .order_by(CatalogSyncHistory.sync_date.desc())
                .first()
            )
            if last_cima_record:
                cima_last_sync = last_cima_record[0].isoformat()

        # Nomenclator: Intentar system_status primero, luego fallback a catalog_sync_history
        nomenclator_last_sync = stats["system_status"].get("nomenclator", {}).get("last_success_at")
        if not nomenclator_last_sync:
            last_nomenclator_record = (
                db.query(CatalogSyncHistory.sync_date)
                .filter(
                    CatalogSyncHistory.sync_type.in_(["NOMENCLATOR", "nomenclator"]),
                    CatalogSyncHistory.status.in_(["success", "completed", "COMPLETED"]),
                )
                .order_by(CatalogSyncHistory.sync_date.desc())
                .first()
            )
            if last_nomenclator_record:
                nomenclator_last_sync = last_nomenclator_record[0].isoformat()

        # Determinar status basado en si hay productos Y fecha de sync
        cima_status = stats["system_status"].get("cima", {}).get("status", "NEVER_RUN")
        cima_message = stats["system_status"].get("cima", {}).get("message", "No sincronizado")
        if cima_count > 0 and cima_last_sync:
            cima_status = "READY"
            cima_message = "Sincronizado"

        nomenclator_status = stats["system_status"].get("nomenclator", {}).get("status", "NEVER_RUN")
        nomenclator_message = stats["system_status"].get("nomenclator", {}).get("message", "No sincronizado")
        nomenclator_products = stats["nomenclator_local"]["total_products"]
        if nomenclator_products > 0 and nomenclator_last_sync:
            nomenclator_status = "READY"
            nomenclator_message = "Sincronizado"

        external_catalogs = {
            "cima": {
                "products_count": cima_count,
                "last_sync": cima_last_sync,
                "status": cima_status,
                "message": cima_message,
            },
            "nomenclator": {
                "products_count": nomenclator_products,
                "last_sync": nomenclator_last_sync,
                "status": nomenclator_status,
                "message": nomenclator_message,
            },
        }

        # Sección 2: ENRIQUECIMIENTO DE VENTAS (solo si hay ventas)
        # Obtener count de ventas y enriquecimientos
        from app.models.sales_data import SalesData
        from app.models.sales_enrichment import SalesEnrichment

        sales_count = db.query(func.count(SalesData.id)).scalar() or 0
        enriched_count = db.query(func.count(SalesEnrichment.id)).scalar() or 0

        sales_enrichment_section = None
        if sales_count > 0:
            # Calcular métricas de enriquecimiento
            enrichment_rate = (enriched_count / sales_count * 100) if sales_count > 0 else 0

            sales_enrichment_section = {
                "total_sales": sales_count,
                "enriched_sales": enriched_count,
                "enrichment_rate": round(enrichment_rate, 2),
                "not_enriched": sales_count - enriched_count,
            }

        # Sección 3: ÚLTIMA ACTIVIDAD
        last_activity = {
            "cima_sync": external_catalogs["cima"]["last_sync"],
            "nomenclator_sync": external_catalogs["nomenclator"]["last_sync"],
            "catalog_update": stats["catalog"]["last_update"],
        }

        # Legacy structure (mantener compatibilidad con frontend actual)
        stats_transformed: dict[str, Any] = {
            # Nuevas secciones (Issue #200)
            "external_catalogs": external_catalogs,
            "sales_enrichment": sales_enrichment_section,
            "last_activity": last_activity,
            # Legacy fields (compatibilidad)
            "total_products": stats["catalog"]["total_products"],
            "data_sources": {
                "only_nomenclator": stats["catalog"]["distribution"]["only_nomenclator"],
                "only_cima": stats["catalog"]["distribution"]["only_cima"],
                "both_sources": stats["catalog"]["distribution"]["both_sources"],
                "no_sources": stats["catalog"]["distribution"]["no_sources"],
                "with_nomenclator": stats["catalog"]["with_nomenclator_data"],
                "with_cima": stats["catalog"]["with_cima_data"],
            },
            "sync_status": stats["catalog"]["sync_status"],
            "last_update": stats["catalog"]["last_update"],
            "nomenclator_local": {
                "count": stats["nomenclator_local"]["total_products"],
                "last_update": stats["nomenclator_local"]["last_update"],
            },
            "homogeneous_groups_master": {"count": stats["homogeneous_groups"]["total_groups"]},
            "system_sync_dates": {
                component: status["last_success_at"]
                for component, status in stats["system_status"].items()
                if isinstance(status, dict) and "last_success_at" in status
            },
            "enrichment_rate": (
                stats["catalog"]["distribution"]["both_sources"] / stats["catalog"]["total_products"]
                if stats["catalog"]["total_products"] > 0
                else None
            ),
        }

        # Log the operation
        struct_logger.info(
            "Estadísticas de catálogo consultadas",
            total_products=stats_transformed["total_products"],
            enrichment_rate=stats_transformed["enrichment_rate"],
            user=current_user.username,
            user_id=str(current_user.id),
        )

        # Issue #194: Cache transformed stats with 5 min TTL
        await enrichment_cache.set(cache_key, stats_transformed, ttl=300)
        logger.debug("[CACHE SET] Cached catalog stats for 5 minutes")

        return CatalogStatsResponse(**stats_transformed)

    except HTTPException:
        raise  # Re-raise HTTPExceptions preserving their status codes
    except ValueError as e:
        logger.error(f"Invalid catalog data: {str(e)}")
        raise HTTPException(status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, detail=f"Invalid catalog data: {str(e)}")
    except Exception as e:
        logger.error(f"Error retrieving catalog stats: {str(e)}")
        raise HTTPException(status_code=500, detail=f"Error retrieving catalog statistics: {str(e)}")


@router.get(
    "/system/catalog/sync-history",
    response_model=Dict[str, Any],
    dependencies=[Depends(require_permission(Permission.ADMIN_CATALOG_MANAGE))]
)
@admin_general_limit
async def get_sync_history(
    request: Request,
    limit: int = Query(default=50, ge=1, le=100, description="Number of records to return"),
    offset: int = Query(default=0, ge=0, description="Number of records to skip"),
    sync_type: Optional[str] = Query(None, description="Filter by sync type (cima or nomenclator)"),
    status: Optional[str] = Query(None, description="Filter by status (success, failure, partial)"),
    db: Session = Depends(get_db),
    current_user: User = Depends(require_permission(Permission.ADMIN_CATALOG_MANAGE))
) -> Dict[str, Any]:
    """
    Get catalog synchronization history.

    Returns a list of past synchronization events with their status, duration,
    and number of records updated. Supports pagination and filtering by
    sync type and status.

    **Required permissions**: ADMIN_CATALOG_MANAGE

    **Query parameters:**
    - **limit**: Maximum number of records to return (1-100, default: 50)
    - **offset**: Number of records to skip for pagination (default: 0)
    - **sync_type**: Filter by sync type ("cima" or "nomenclator")
    - **status**: Filter by sync status ("success", "failure", or "partial")

    **Response:**
    - **history**: List of sync events with details
    - **total_count**: Total number of records matching filters

    **Note**: Results are ordered by sync_date descending (most recent first)

    Issue #349: Provides sync history for admin panel database tab
    """
    try:
        # Import the service here to avoid circular imports
        from app.services.catalog_sync_history_service import CatalogSyncHistoryService

        # Create service instance
        service = CatalogSyncHistoryService(db)

        # Ensure limit doesn't exceed maximum
        limit = min(limit, 100)

        # Get sync history from service
        result = service.get_sync_history(
            limit=limit,
            offset=offset,
            sync_type=sync_type,
            status=status
        )

        # Log the operation
        struct_logger.info(
            "Catalog sync history retrieved",
            limit=limit,
            offset=offset,
            sync_type=sync_type,
            status=status,
            records_returned=len(result.get("history", [])),
            total_count=result.get("total_count", 0),
            user=current_user.username,
            user_id=str(current_user.id)
        )

        return result

    except HTTPException:
        raise  # Re-raise HTTPExceptions preserving their status codes
    except Exception as e:
        logger.error(f"Error retrieving sync history: {str(e)}")
        raise HTTPException(
            status_code=500,
            detail=f"Error retrieving synchronization history: {str(e)}"
        )


@router.post("/catalog/clear-checkpoint/{component}")
async def clear_sync_checkpoint(
    component: str,
    current_user: User = Depends(require_permission(Permission.ADMIN_CATALOG_MANAGE)),
    db: Session = Depends(get_db),
) -> Dict[str, Any]:
    """
    Clear stuck checkpoint for a catalog component (CIMA, NOMENCLATOR, etc).

    Use this endpoint when a sync got stuck with a checkpoint but the data
    is actually complete (e.g., false positive STALL detection at end of data).

    Args:
        component: Component name (cima, nomenclator, catalog)
        current_user: Admin user
        db: Database session

    Returns:
        Status of the checkpoint clearing operation
    """
    from app.models.system_status import SystemComponent, SystemStatus, SystemStatusEnum

    # Validate component
    component_upper = component.upper()
    valid_components = ["CIMA", "NOMENCLATOR", "CATALOG"]

    if component_upper not in valid_components:
        raise HTTPException(
            status_code=400,
            detail=f"Invalid component. Valid options: {valid_components}"
        )

    try:
        # Map string to enum
        component_enum = getattr(SystemComponent, component_upper)

        # Find the status record
        status_record = db.query(SystemStatus).filter_by(component=component_enum).first()

        if not status_record:
            return {
                "status": "not_found",
                "message": f"No status record found for {component_upper}",
                "component": component_upper
            }

        # Store previous state for logging
        previous_state = {
            "status": status_record.status.value if status_record.status else None,
            "checkpoint_page": status_record.checkpoint_page,
            "checkpoint_data": status_record.checkpoint_data[:100] if status_record.checkpoint_data else None
        }

        # Clear checkpoint
        status_record.checkpoint_page = None
        status_record.checkpoint_data = None
        status_record.status = SystemStatusEnum.READY
        status_record.message = f"Checkpoint cleared manually by {current_user.username}"
        status_record.progress = 100
        status_record.updated_at = utc_now()

        db.commit()

        struct_logger.info(
            "Checkpoint cleared manually",
            component=component_upper,
            previous_state=previous_state,
            user=current_user.username,
            user_id=str(current_user.id)
        )

        return {
            "status": "success",
            "message": f"Checkpoint cleared for {component_upper}",
            "component": component_upper,
            "previous_state": previous_state,
            "new_status": "READY"
        }

    except Exception as e:
        logger.error(f"Error clearing checkpoint for {component}: {str(e)}")
        db.rollback()
        raise HTTPException(
            status_code=500,
            detail=f"Error clearing checkpoint: {str(e)}"
        )


@router.post("/catalog/enrich-nomenclator-only")
async def enrich_nomenclator_only_products(
    max_products: int = Query(default=500, ge=1, le=5000, description="Maximum products to enrich per run"),
    batch_size: int = Query(default=10, ge=1, le=50, description="Products per commit batch"),
    current_user: User = Depends(require_permission(Permission.ADMIN_CATALOG_MANAGE)),
    db: Session = Depends(get_db),
) -> Dict[str, Any]:
    """
    Enrich products that ONLY have nomenclator data (no CIMA).

    These products (~6,750) don't appear in CIMA's /presentaciones endpoint
    but DO have data available via /medicamento?cn=X endpoint.

    This process:
    1. Finds products with data_sources='nomenclator' (no CIMA data)
    2. For each product, calls CIMA's /medicamento?cn=X endpoint
    3. Updates CIMA fields (receta, laboratorio, principios_activos, etc.)
    4. Marks product as enriched with data_sources='nomenclator,cima'

    **Performance**: ~0.4s per product (100ms pause + API response time)
    - 500 products ≈ 3-4 minutes
    - 5000 products ≈ 35-40 minutes

    **Use cases**:
    - Products classified as "Venta Libre" incorrectly (missing cima_requiere_receta)
    - Products like TOUJEO, TRAJENTA, TRESIBA that need prescription data

    Args:
        max_products: Maximum products to process (default 500)
        batch_size: Products per database commit (default 10)
        current_user: Admin user with ADMIN_CATALOG_MANAGE permission
        db: Database session

    Returns:
        Statistics of the enrichment process
    """
    from app.external_data.cima_integration import cima_integration_service

    try:
        struct_logger.info(
            "Starting nomenclator-only enrichment",
            max_products=max_products,
            batch_size=batch_size,
            user=current_user.username,
            user_id=str(current_user.id)
        )

        # Run the async enrichment process
        stats = await cima_integration_service.enrich_nomenclator_only_products(
            db=db,
            max_products=max_products,
            batch_size=batch_size
        )

        # Log completion
        struct_logger.info(
            "Nomenclator-only enrichment completed",
            stats=stats,
            user=current_user.username
        )

        return stats

    except Exception as e:
        logger.error(f"Error in nomenclator-only enrichment: {str(e)}")
        db.rollback()
        raise HTTPException(
            status_code=500,
            detail=f"Error enriching nomenclator-only products: {str(e)}"
        )


# ============================================================================
# MANUAL REVIEW ENDPOINTS (Issue #447)
# ============================================================================


@router.get("/manual-review/stats", response_model=ManualReviewStats)
async def get_manual_review_stats(
    pharmacy_id: Optional[UUID] = Query(None, description="Filtrar por farmacia específica"),
    current_user: User = Depends(require_permission(Permission.VIEW_SYSTEM_STATS)),
    db: Session = Depends(get_db),
) -> ManualReviewStats:
    """
    Obtiene estadísticas de productos en manual_review (sin enriquecer).

    Issue #447: Permite al admin ver cuántos productos están pendientes de análisis.

    **Estadísticas incluidas:**
    - Total de productos únicos sin enriquecer
    - Total de líneas de venta sin enriquecer
    - Importe total sin analizar
    - Desglose por tipo de código (CN, EAN, INTERNAL)

    Args:
        pharmacy_id: Filtrar por farmacia específica (opcional)
        current_user: Usuario admin autenticado
        db: Sesión de base de datos

    Returns:
        ManualReviewStats con estadísticas agregadas
    """
    try:
        service = ManualReviewService(db)
        stats = service.get_stats(pharmacy_id=pharmacy_id)

        struct_logger.info(
            "manual_review.stats.fetched",
            total_products=stats.total_products,
            total_amount=float(stats.total_amount),
            user=current_user.username,
        )

        return stats

    except Exception as e:
        logger.error(f"Error fetching manual review stats: {str(e)}")
        raise HTTPException(
            status_code=500,
            detail=f"Error fetching manual review statistics: {str(e)}"
        )


@router.get("/manual-review/products", response_model=ManualReviewListResponse)
async def list_manual_review_products(
    pharmacy_id: Optional[UUID] = Query(None, description="Filtrar por farmacia"),
    min_sales: Optional[int] = Query(None, ge=1, description="Mínimo de ventas para incluir"),
    page: int = Query(1, ge=1, description="Número de página"),
    page_size: int = Query(100, ge=10, le=500, description="Tamano de pagina"),
    code_type: Optional[Literal["CN", "EAN", "INTERNAL"]] = Query(
        None,
        description="Filtrar por tipo de codigo: CN, EAN, INTERNAL",
    ),
    search: Optional[str] = Query(
        None,
        description="Buscar por nombre o codigo de producto",
        min_length=1,
        max_length=100
    ),
    order_by: Optional[str] = Query(
        None,
        description="Ordenamiento: amount_desc (default), sales_desc, name_asc",
    ),
    current_user: User = Depends(require_permission(Permission.VIEW_SYSTEM_STATS)),
    db: Session = Depends(get_db),
) -> ManualReviewListResponse:
    """
    Lista productos en manual_review con paginacion y filtros avanzados.

    Issue #447: Permite al admin ver productos pendientes de enriquecimiento.
    Issue #448: Soporte para filtros avanzados (code_type, search, order_by).

    **Ordenamiento:**
    - amount_desc (default): Por importe total descendente
    - sales_desc: Por numero de ventas descendente
    - name_asc: Por nombre ascendente

    **Filtros:**
    - code_type: Filtrar por tipo de codigo (CN, EAN, INTERNAL)
    - search: Busqueda por nombre o codigo (case-insensitive)

    Args:
        pharmacy_id: Filtrar por farmacia especifica
        min_sales: Solo incluir productos con al menos N ventas
        page: Numero de pagina (1-indexed)
        page_size: Productos por pagina (10-500)
        code_type: Filtrar por tipo de codigo
        search: Termino de busqueda
        order_by: Campo de ordenamiento
        current_user: Usuario admin autenticado
        db: Sesion de base de datos

    Returns:
        ManualReviewListResponse con productos paginados y estadisticas
    """
    try:
        service = ManualReviewService(db)

        # Calcular offset
        offset = (page - 1) * page_size

        # Obtener productos con nuevos filtros (Issue #448)
        products, total_count = service.get_manual_review_products(
            pharmacy_id=pharmacy_id,
            min_sales=min_sales,
            limit=page_size,
            offset=offset,
            code_type=code_type,
            search=search,
            order_by=order_by,
        )

        # Obtener stats globales
        stats = service.get_stats(pharmacy_id=pharmacy_id)

        struct_logger.info(
            "manual_review.products.listed",
            page=page,
            page_size=page_size,
            returned=len(products),
            total=total_count,
            user=current_user.username,
            code_type=code_type,
            search=search,
            order_by=order_by,
        )

        return ManualReviewListResponse(
            products=products,
            total_count=total_count,
            page=page,
            page_size=page_size,
            stats=stats,
        )

    except Exception as e:
        logger.error(f"Error listing manual review products: {str(e)}")
        raise HTTPException(
            status_code=500,
            detail=f"Error listing manual review products: {str(e)}"
        )


@router.get("/manual-review/export")
async def export_manual_review(
    format: Literal["csv", "json"] = Query("csv", description="Formato de exportación"),
    pharmacy_id: Optional[UUID] = Query(None, description="Filtrar por farmacia"),
    min_sales: Optional[int] = Query(None, ge=1, description="Mínimo de ventas para incluir"),
    current_user: User = Depends(require_permission(Permission.VIEW_SYSTEM_STATS)),
    db: Session = Depends(get_db),
) -> StreamingResponse:
    """
    Exporta productos en manual_review a CSV o JSON.

    Issue #447: Permite al admin descargar productos sin enriquecer para análisis offline.

    **Columnas del CSV:**
    - product_code: Código del producto (CN, EAN o interno)
    - product_name: Nombre del producto
    - code_type: Tipo de código (CN, EAN, INTERNAL)
    - pharmacy_count: Número de farmacias con este producto
    - sale_count: Líneas de venta totales
    - total_units: Unidades vendidas
    - total_amount: Importe total en euros
    - avg_price: Precio unitario promedio
    - first_sale: Fecha de primera venta
    - last_sale: Fecha de última venta

    Args:
        format: Formato de exportación (csv o json)
        pharmacy_id: Filtrar por farmacia específica
        min_sales: Solo incluir productos con al menos N ventas
        current_user: Usuario admin autenticado
        db: Sesión de base de datos

    Returns:
        StreamingResponse con archivo CSV o JSON
    """
    try:
        service = ManualReviewService(db)

        if format == "csv":
            # Exportar a CSV
            csv_output, row_count = service.export_to_csv(
                pharmacy_id=pharmacy_id,
                min_sales=min_sales,
            )

            filename = f"manual_review_{utc_now().strftime('%Y%m%d_%H%M%S')}.csv"

            struct_logger.info(
                "manual_review.export.csv",
                row_count=row_count,
                filename=filename,
                user=current_user.username,
            )

            return StreamingResponse(
                iter([csv_output.getvalue()]),
                media_type="text/csv",
                headers={
                    "Content-Disposition": f'attachment; filename="{filename}"',
                    "X-Row-Count": str(row_count),
                },
            )

        else:
            # Exportar a JSON
            products, total_count = service.get_manual_review_products(
                pharmacy_id=pharmacy_id,
                min_sales=min_sales,
                limit=None,
                offset=0,
            )

            import json

            # Convertir a JSON serializable
            json_data = {
                "exported_at": utc_now().isoformat(),
                "total_products": total_count,
                "products": [p.model_dump(mode="json") for p in products],
            }

            json_output = json.dumps(json_data, indent=2, ensure_ascii=False)
            filename = f"manual_review_{utc_now().strftime('%Y%m%d_%H%M%S')}.json"

            struct_logger.info(
                "manual_review.export.json",
                row_count=total_count,
                filename=filename,
                user=current_user.username,
            )

            return StreamingResponse(
                iter([json_output]),
                media_type="application/json",
                headers={
                    "Content-Disposition": f'attachment; filename="{filename}"',
                    "X-Row-Count": str(total_count),
                },
            )

    except Exception as e:
        logger.error(f"Error exporting manual review: {str(e)}")
        raise HTTPException(
            status_code=500,
            detail=f"Error exporting manual review data: {str(e)}"
        )


# ============================================================================
# VENTA LIBRE CLASSIFICATION ENDPOINT (Issue #476)
# ============================================================================


class VentaLibreClassifyResponse(BaseModel):
    """Response for VentaLibre classification operation."""

    classified: int
    skipped: int
    total_unclassified: int
    execution_time_ms: float
    by_category: Dict[str, int]


@router.post("/venta-libre/classify", response_model=VentaLibreClassifyResponse)
@admin_general_limit
async def classify_venta_libre_products(
    request: Request,
    dry_run: bool = Query(default=False, description="If true, only simulate without updating"),
    limit: int = Query(default=5000, ge=1, le=50000, description="Maximum products to classify"),
    current_user: User = Depends(require_permission(Permission.ADMIN_CATALOG_MANAGE)),
    db: Session = Depends(get_db),
):
    """
    Classify VentaLibre products that have ml_category='unknown' or NULL.

    Issue #476: Automatic classification using keyword rules.

    This endpoint:
    1. Finds VL products with ml_category='unknown' or NULL
    2. Applies keyword-based classification rules
    3. Updates ml_category and ml_confidence
    4. Returns statistics of the operation

    **Performance**: ~5000 products/second (pure keyword matching)

    **Classification rules**: Based on product name keywords
    - FRENADOL → gripe_resfriado
    - VOLTAREN → dolor_muscular
    - STREPSILS → dolor_garganta
    - etc. (200+ keyword rules)

    Args:
        dry_run: If true, only count matches without updating
        limit: Maximum products to process (default 5000)
        current_user: Admin user with ADMIN_CATALOG_MANAGE permission
        db: Database session

    Returns:
        VentaLibreClassifyResponse with classification statistics
    """
    from app.models import ProductCatalogVentaLibre
    from app.services.necesidad_classifier_service import classify_product_by_keywords

    start_time = utc_now()

    try:
        # Find products needing classification
        products_query = (
            db.query(ProductCatalogVentaLibre)
            .filter(
                or_(
                    ProductCatalogVentaLibre.ml_category == "unknown",
                    ProductCatalogVentaLibre.ml_category == None,
                )
            )
            .limit(limit)
        )

        products = products_query.all()
        total_unclassified = (
            db.query(func.count(ProductCatalogVentaLibre.id))
            .filter(
                or_(
                    ProductCatalogVentaLibre.ml_category == "unknown",
                    ProductCatalogVentaLibre.ml_category == None,
                )
            )
            .scalar()
            or 0
        )

        struct_logger.info(
            "venta_libre.classify.started",
            products_to_process=len(products),
            total_unclassified=total_unclassified,
            dry_run=dry_run,
            user=current_user.username,
        )

        # Classify products
        classified = 0
        skipped = 0
        by_category: Dict[str, int] = {}

        for product in products:
            ml_category, ml_confidence = classify_product_by_keywords(
                product.product_name_display,
                default_category="unknown",
                default_confidence=0.0,
            )

            if ml_category != "unknown":
                by_category[ml_category] = by_category.get(ml_category, 0) + 1

                if not dry_run:
                    product.ml_category = ml_category
                    product.ml_confidence = ml_confidence
                    product.updated_at = utc_now()

                classified += 1
            else:
                skipped += 1

        if not dry_run:
            db.commit()

        execution_time = (utc_now() - start_time).total_seconds() * 1000

        struct_logger.info(
            "venta_libre.classify.completed",
            classified=classified,
            skipped=skipped,
            execution_time_ms=execution_time,
            dry_run=dry_run,
            user=current_user.username,
        )

        # Sort categories by count
        sorted_categories = dict(
            sorted(by_category.items(), key=lambda x: x[1], reverse=True)
        )

        return VentaLibreClassifyResponse(
            classified=classified,
            skipped=skipped,
            total_unclassified=total_unclassified,
            execution_time_ms=execution_time,
            by_category=sorted_categories,
        )

    except Exception as e:
        db.rollback()
        logger.error(f"Error classifying VentaLibre products: {str(e)}")
        raise HTTPException(
            status_code=500,
            detail=f"Error classifying VentaLibre products: {str(e)}"
        )


# ============================================================================
# Issue #517: ATC Backfill Endpoints
# ============================================================================


class ATCBackfillRequest(BaseModel):
    """Request for ATC backfill operation."""
    batch_size: int = Field(default=100, ge=10, le=500, description="Products per batch")
    concurrent_requests: int = Field(default=5, ge=1, le=20, description="Concurrent API requests")
    incremental: bool = Field(default=False, description="Only process recent products (last 7 days)")
    max_products: Optional[int] = Field(default=5000, ge=1, le=50000, description="Maximum products to process")


class ATCBackfillResponse(BaseModel):
    """Response for ATC backfill operation."""
    status: str
    processed: int
    successful: int
    failed: int
    skipped: int
    duration_seconds: Optional[float]
    coverage: Dict[str, Any]


class ATCCoverageResponse(BaseModel):
    """Response for ATC coverage stats."""
    total_cima_products: int
    products_with_atc: int
    products_without_atc: int
    coverage_percentage: float
    target_percentage: float = 60.0
    target_reached: bool


@router.get(
    "/atc-coverage",
    response_model=ATCCoverageResponse,
    summary="Get ATC coverage statistics",
    description="Returns the current ATC code coverage in the product catalog.",
)
async def get_atc_coverage(
    db: Session = Depends(get_db),
    current_user: User = Depends(require_permission(Permission.VIEW_SYSTEM_STATS)),
):
    """
    Get ATC coverage statistics.

    Returns:
        ATCCoverageResponse with coverage stats
    """
    try:
        # Total products with CIMA data
        total_cima = db.query(ProductCatalog).filter(
            ProductCatalog.cima_nombre_comercial.isnot(None)
        ).count()

        # Products with ATC code
        with_atc = db.query(ProductCatalog).filter(
            ProductCatalog.cima_atc_code.isnot(None),
            ProductCatalog.cima_atc_code != "",
            ProductCatalog.cima_atc_code != "NO_ATC_DISPONIBLE"
        ).count()

        coverage = (with_atc / total_cima * 100) if total_cima > 0 else 0
        target = 60.0

        return ATCCoverageResponse(
            total_cima_products=total_cima,
            products_with_atc=with_atc,
            products_without_atc=total_cima - with_atc,
            coverage_percentage=round(coverage, 2),
            target_percentage=target,
            target_reached=coverage >= target
        )
    except HTTPException:
        raise  # Re-raise HTTPExceptions preserving their status codes
    except Exception as e:
        logger.error(f"Error getting ATC coverage: {str(e)}")
        raise HTTPException(
            status_code=500,
            detail=f"Error getting ATC coverage: {str(e)}"
        )


@router.post(
    "/atc-backfill",
    response_model=ATCBackfillResponse,
    summary="Trigger ATC backfill",
    description="Backfill ATC codes from CIMA API for products missing them.",
)
async def trigger_atc_backfill(
    request: ATCBackfillRequest,
    db: Session = Depends(get_db),
    current_user: User = Depends(require_permission(Permission.ADMIN_CATALOG_MANAGE)),
):
    """
    Trigger ATC backfill operation.

    This fetches ATC codes from CIMA API for products that don't have them.

    Args:
        request: ATCBackfillRequest with configuration

    Returns:
        ATCBackfillResponse with backfill results
    """
    from app.services.atc_backfill_service import atc_backfill_service

    struct_logger.info(
        "atc_backfill.triggered",
        user=current_user.username,
        batch_size=request.batch_size,
        concurrent_requests=request.concurrent_requests,
        incremental=request.incremental,
        max_products=request.max_products
    )

    try:
        result = await atc_backfill_service.backfill_atc_codes(
            batch_size=request.batch_size,
            concurrent_requests=request.concurrent_requests,
            incremental=request.incremental,
            max_products=request.max_products
        )

        struct_logger.info(
            "atc_backfill.completed",
            user=current_user.username,
            processed=result.get("processed", 0),
            successful=result.get("successful", 0),
            coverage=result.get("coverage", {}).get("coverage_percentage", 0)
        )

        return ATCBackfillResponse(
            status=result.get("status", "completed"),
            processed=result.get("processed", 0),
            successful=result.get("successful", 0),
            failed=result.get("failed", 0),
            skipped=result.get("skipped", 0),
            duration_seconds=result.get("duration_seconds"),
            coverage=result.get("coverage", {})
        )
    except Exception as e:
        logger.error(f"Error during ATC backfill: {str(e)}")
        raise HTTPException(
            status_code=500,
            detail=f"Error during ATC backfill: {str(e)}"
        )
