"""
Endpoint de métricas Prometheus para observabilidad del sistema.
Issue #114 - Fase 3: Observabilidad

Expone métricas para monitoreo externo con Prometheus/Grafana.
"""

import os
import time
from typing import Dict, List

import structlog
from fastapi import APIRouter, Depends, HTTPException, Request, Response

logger = structlog.get_logger(__name__)

# Prometheus client es opcional - no disponible en modo local (Windows desktop)
try:
    from prometheus_client import CONTENT_TYPE_LATEST, CollectorRegistry, Counter, Gauge, Histogram, generate_latest
    PROMETHEUS_AVAILABLE = True
except ImportError:
    PROMETHEUS_AVAILABLE = False
    logger.info("[METRICS] prometheus_client not available - metrics disabled (local mode)")

router = APIRouter()

# Si prometheus no está disponible, crear stubs y endpoint simple
if not PROMETHEUS_AVAILABLE:
    @router.get("/metrics", include_in_schema=False)
    async def prometheus_metrics_stub() -> Response:
        """Stub endpoint when prometheus is not available."""
        return Response(
            content=b"# Prometheus metrics not available in local mode\n",
            media_type="text/plain"
        )

    # Funciones stub para que los imports no fallen
    def record_stall_detected() -> None:
        pass

    def record_recovery_attempt(success: bool, duration_seconds: float = 0) -> None:
        pass

    def record_sync_success() -> None:
        pass

    def record_sync_failure() -> None:
        pass

# Prometheus disponible - implementación completa
if PROMETHEUS_AVAILABLE:
    from app.services.cima_sync_monitor import cima_sync_monitor

    # Importación con fallback para rate limiter
    try:
        from app.core.rate_limiter import RateLimitStore, get_client_ip
        RATE_LIMITER_AVAILABLE = True
    except ImportError as e:
        logger.warning(
            "rate_limiter.not_available", message="Rate limiter module not available, rate limiting disabled", error=str(e)
        )
        RATE_LIMITER_AVAILABLE = False

        class RateLimitStore:
            def get_request_counts(self, client_ip: str):
                return 0, 0
            def add_request(self, client_ip: str):
                pass

        def get_client_ip(request: Request) -> str:
            return request.client.host if request.client else "unknown"

    # Registro personalizado para evitar conflictos con default registry
    registry = CollectorRegistry()

    # Detectar entorno para labels
    ENVIRONMENT = os.getenv("ENVIRONMENT", "development")
    IS_RENDER = os.getenv("RENDER") == "true"

    # ============================================================================
    # MÉTRICAS PROMETHEUS
    # ============================================================================

    cima_stalls_total = Counter(
        "cima_stalls_total",
        "Total number of CIMA sync stalls detected",
        labelnames=["environment", "is_render"],
        registry=registry,
    )

    cima_recovery_attempts_total = Counter(
        "cima_recovery_attempts_total",
        "Total number of CIMA sync recovery attempts",
        labelnames=["environment", "is_render", "success"],
        registry=registry,
    )

    cima_heartbeat_age_seconds = Gauge(
        "cima_heartbeat_age_seconds",
        "Age of the last CIMA sync heartbeat in seconds",
        labelnames=["environment", "is_render"],
        registry=registry,
    )

    cima_recovery_duration_seconds = Histogram(
        "cima_recovery_duration_seconds",
        "Duration of CIMA sync recovery operations in seconds",
        labelnames=["environment", "is_render"],
        buckets=(1, 5, 10, 30, 60, 120, 300, 600),
        registry=registry,
    )

    cima_circuit_breaker_state = Gauge(
        "cima_circuit_breaker_state",
        "State of CIMA sync circuit breaker (0=CLOSED, 1=OPEN, 2=HALF_OPEN)",
        labelnames=["environment", "is_render"],
        registry=registry,
    )

    cima_circuit_breaker_failures = Gauge(
        "cima_circuit_breaker_failures",
        "Current failure count in CIMA sync circuit breaker",
        labelnames=["environment", "is_render"],
        registry=registry,
    )

    cima_sync_success_total = Counter(
        "cima_sync_success_total",
        "Total number of successful CIMA sync operations",
        labelnames=["environment", "is_render"],
        registry=registry,
    )

    cima_sync_failure_total = Counter(
        "cima_sync_failure_total",
        "Total number of failed CIMA sync operations",
        labelnames=["environment", "is_render"],
        registry=registry,
    )

    # ============================================================================
    # FUNCIONES AUXILIARES
    # ============================================================================

    def get_environment_labels() -> Dict[str, str]:
        """Obtener labels comunes de entorno para métricas"""
        return {"environment": ENVIRONMENT, "is_render": str(IS_RENDER).lower()}

    def update_metrics_from_monitor() -> None:
        """Actualizar métricas Prometheus desde el estado del monitor CIMA."""
        try:
            monitor_status = cima_sync_monitor.get_monitor_status()
            labels = get_environment_labels()

            heartbeat_data = monitor_status.get("sync_heartbeat")
            if heartbeat_data:
                heartbeat_timestamp = heartbeat_data.get("timestamp", 0)
                current_timestamp = int(time.time())
                age_seconds = current_timestamp - heartbeat_timestamp
                cima_heartbeat_age_seconds.labels(**labels).set(age_seconds)

            recovery_status = monitor_status.get("recovery_status", {})
            circuit_breaker_info = recovery_status.get("circuit_breaker", {})

            if circuit_breaker_info:
                state_value_map = {"CLOSED": 0, "OPEN": 1, "HALF_OPEN": 2}
                state = circuit_breaker_info.get("state", "CLOSED")
                state_value = state_value_map.get(state, 0)
                cima_circuit_breaker_state.labels(**labels).set(state_value)
                failure_count = circuit_breaker_info.get("failure_count", 0)
                cima_circuit_breaker_failures.labels(**labels).set(failure_count)

        except HTTPException:
            raise
        except Exception as e:
            logger.error("metrics.update_failed", error=str(e), exc_info=True)

    # ============================================================================
    # FUNCIONES PÚBLICAS PARA INCREMENTAR MÉTRICAS
    # ============================================================================

    def record_stall_detected() -> None:
        """Registrar detección de un stall en CIMA sync"""
        labels = get_environment_labels()
        cima_stalls_total.labels(**labels).inc()
        logger.info("metrics.stall_detected", event_type="metric_update")

    def record_recovery_attempt(success: bool, duration_seconds: float = 0) -> None:
        """Registrar un intento de recovery de CIMA sync."""
        labels = get_environment_labels()
        cima_recovery_attempts_total.labels(**labels, success=str(success).lower()).inc()
        if duration_seconds > 0:
            cima_recovery_duration_seconds.labels(**labels).observe(duration_seconds)
        logger.info("metrics.recovery_attempt_recorded", event_type="metric_update", success=success, duration_seconds=duration_seconds)

    def record_sync_success() -> None:
        """Registrar éxito de sincronización CIMA"""
        labels = get_environment_labels()
        cima_sync_success_total.labels(**labels).inc()
        logger.info("metrics.sync_success_recorded", event_type="metric_update")

    def record_sync_failure() -> None:
        """Registrar fallo de sincronización CIMA"""
        labels = get_environment_labels()
        cima_sync_failure_total.labels(**labels).inc()
        logger.info("metrics.sync_failure_recorded", event_type="metric_update")

    # ============================================================================
    # RATE LIMITING
    # ============================================================================

    METRICS_LIMIT_PER_MINUTE = 180

    def get_prometheus_allowed_ips() -> List[str]:
        """Obtener lista de IPs de Prometheus que pueden hacer bypass del rate limit."""
        allowed_ips_env = os.getenv("PROMETHEUS_ALLOWED_IPS", "")
        if not allowed_ips_env:
            return []
        return [ip.strip() for ip in allowed_ips_env.split(",") if ip.strip()]

    if RATE_LIMITER_AVAILABLE:
        metrics_rate_limit_store = RateLimitStore()
    else:
        metrics_rate_limit_store = None

    async def check_metrics_rate_limit(request: Request):
        """Rate limiting específico para el endpoint /metrics."""
        if not RATE_LIMITER_AVAILABLE:
            return

        client_ip = get_client_ip(request)
        allowed_ips = get_prometheus_allowed_ips()
        if client_ip in allowed_ips:
            return

        minute_count, _ = metrics_rate_limit_store.get_request_counts(client_ip)
        if minute_count >= METRICS_LIMIT_PER_MINUTE:
            logger.warning("metrics.rate_limit_exceeded", ip=client_ip, count=minute_count, limit=METRICS_LIMIT_PER_MINUTE)
            raise HTTPException(
                status_code=429,
                detail={"error": "Too many requests to /metrics endpoint", "retry_after": 60, "limit": f"{METRICS_LIMIT_PER_MINUTE} requests per minute"},
                headers={"Retry-After": "60"},
            )
        metrics_rate_limit_store.add_request(client_ip)

    # ============================================================================
    # ENDPOINT PROMETHEUS
    # ============================================================================

    @router.get("/metrics", include_in_schema=False, dependencies=[Depends(check_metrics_rate_limit)])
    async def prometheus_metrics() -> Response:
        """Endpoint para scraping de métricas por Prometheus."""
        try:
            update_metrics_from_monitor()
            metrics_output = generate_latest(registry)
            logger.debug("metrics.scraped", content_length=len(metrics_output))
            return Response(content=metrics_output, media_type=CONTENT_TYPE_LATEST)
        except HTTPException:
            raise
        except Exception as e:
            logger.error("metrics.generation_failed", error=str(e), exc_info=True)
            return Response(content=b"", media_type=CONTENT_TYPE_LATEST, status_code=500)
