# backend/app/schemas/laboratory_validation.py
"""
Schemas Pydantic para validación de laboratory mapping endpoints
Implementa validaciones de seguridad y formato según Issue #23 Fase 1.2
"""

import re
from typing import Dict, Generic, List, Optional, TypeVar

from pydantic import BaseModel, ConfigDict, Field, field_validator

# Para paginación genérica
T = TypeVar("T")


class LaboratoryCodeRequest(BaseModel):
    """Schema para validar requests con códigos de laboratorio"""

    codes: List[str] = Field(
        ..., min_length=1, max_length=20, description="Lista de códigos de laboratorio (máximo 20)"
    )

    @field_validator("codes")
    def validate_code_format(cls, v):
        """Validar formato de código: 1-4 dígitos numéricos"""
        if not isinstance(v, list):
            return v

        validated_codes = []
        for code in v:
            if not isinstance(code, str):
                raise ValueError("Código debe ser string")

            # Sanitizar: eliminar espacios
            code = code.strip()

            # Validar formato: 1-4 dígitos
            if not re.match(r"^\d{1,4}$", code):
                raise ValueError(
                    f"Código '{code}' debe ser numérico de 1-4 dígitos. " f"Formato válido: /^\\d{{1,4}}$/"
                )

            validated_codes.append(code)

        return validated_codes

    model_config = ConfigDict(
        json_schema_extra={"example": {"codes": ["111", "426", "863", "1079"]}},
        openapi_examples={
            "generic_leaders": {
                "summary": "Top Spanish generic laboratories",
                "description": "Most requested generic pharmaceutical laboratories in Spain",
                "value": {"codes": ["111", "426", "863", "644", "1079"]},
            },
            "specialty_pharma": {
                "summary": "Specialty pharmaceutical companies",
                "description": "Specialized laboratories for specific therapeutic areas",
                "value": {"codes": ["756", "892", "1245", "633"]},
            },
            "single_lab": {
                "summary": "Single laboratory lookup",
                "description": "Query specific laboratory information",
                "value": {"codes": ["111"]},
            },
        },
    )


class LaboratoryNameRequest(BaseModel):
    """Schema para validar requests con nombres de laboratorio"""

    names: List[str] = Field(
        ..., min_length=1, max_length=20, description="Lista de nombres de laboratorio (máximo 20)"
    )

    @field_validator("names")
    def validate_name_format(cls, v):
        """Validar formato de nombre: máximo 200 chars, sin caracteres peligrosos"""
        if not isinstance(v, list):
            return v

        validated_names = []
        for name in v:
            if not isinstance(name, str):
                raise ValueError("Nombre debe ser string")

            # Sanitizar: eliminar espacios extra
            name = name.strip()

            # Validar longitud
            if len(name) > 200:
                raise ValueError("Nombre de laboratorio no puede exceder 200 caracteres")

            if len(name) == 0:
                raise ValueError("Nombre de laboratorio no puede estar vacío")

            # Sanitización XSS: eliminar caracteres HTML peligrosos
            dangerous_chars = ["<", ">", "'", '"', "&", "javascript:", "script"]
            for char in dangerous_chars:
                if char.lower() in name.lower():
                    raise ValueError(f"Nombre contiene caracteres no permitidos: {char}")

            validated_names.append(name)

        return validated_names

    model_config = ConfigDict(
        json_schema_extra={"example": {"names": ["CINFA S.A.", "NORMON S.A.", "TEVA PHARMA, S.L.U"]}},
        openapi_examples={
            "spanish_generics": {
                "summary": "Spanish generic manufacturers",
                "description": "Leading generic pharmaceutical companies in Spain",
                "value": {"names": ["CINFA S.A.", "NORMON S.A.", "KERN PHARMA, S.L.", "SANDOZ FARMACEUTICA, S.A"]},
            },
            "international_brands": {
                "summary": "International pharmaceutical brands",
                "description": "Global pharmaceutical companies with Spanish presence",
                "value": {
                    "names": [
                        "TEVA PHARMA, S.L.U",
                        "PFIZER, S.L.U.",
                        "NOVARTIS FARMACEUTICA, S.A.",
                        "BAYER HISPANIA, S.L.",
                    ]
                },
            },
            "partial_search": {
                "summary": "Partial name search",
                "description": "Search using partial laboratory names",
                "value": {"names": ["CINFA", "NORMON", "TEVA"]},
            },
        },
    )


class GenericLaboratoriesRequest(BaseModel):
    """Schema para validar requests de laboratorios genéricos con paginación"""

    page: Optional[int] = Field(default=1, ge=1, le=1000, description="Número de página (1-1000)")

    per_page: Optional[int] = Field(default=50, ge=1, le=100, description="Elementos por página (1-100)")

    search: Optional[str] = Field(
        default=None, max_length=100, description="Término de búsqueda opcional (máximo 100 chars)"
    )

    @field_validator("search")
    def validate_search_term(cls, v):
        """Validar término de búsqueda: sanitización XSS"""
        if v is None:
            return v

        # Sanitizar: eliminar espacios extra
        v = v.strip()

        if len(v) == 0:
            return None

        # Sanitización XSS: eliminar caracteres HTML peligrosos
        dangerous_chars = ["<", ">", "'", '"', "&", "javascript:", "script"]
        for char in dangerous_chars:
            if char.lower() in v.lower():
                raise ValueError(f"Término de búsqueda contiene caracteres no permitidos: {char}")

        return v

    model_config = ConfigDict(
        json_schema_extra={"example": {"page": 1, "per_page": 50, "search": "CINFA"}},
        openapi_examples={
            "first_page": {
                "summary": "First page of generic laboratories",
                "description": "Get the first 20 generic laboratories for pagination display",
                "value": {"page": 1, "per_page": 20, "search": None},
            },
            "search_cinfa": {
                "summary": "Search for CINFA laboratories",
                "description": "Find all laboratories containing 'CINFA' in their name",
                "value": {"page": 1, "per_page": 10, "search": "CINFA"},
            },
            "high_volume": {
                "summary": "High volume pagination",
                "description": "Request large page size for bulk operations",
                "value": {"page": 2, "per_page": 100, "search": None},
            },
            "specific_search": {
                "summary": "Search for specific laboratory type",
                "description": "Find laboratories by therapeutic specialization",
                "value": {"page": 1, "per_page": 25, "search": "PHARMA"},
            },
        },
    )


class PaginationMetadata(BaseModel):
    """Metadata de paginación para respuestas"""

    total: int = Field(..., description="Número total de elementos")
    page: int = Field(..., ge=1, description="Página actual")
    per_page: int = Field(..., ge=1, le=200, description="Elementos por página")
    total_pages: int = Field(..., ge=1, description="Total de páginas")
    has_next: bool = Field(..., description="¿Existe página siguiente?")
    has_previous: bool = Field(..., description="¿Existe página anterior?")

    model_config = ConfigDict(
        json_schema_extra={
            "example": {
                "total": 1247,
                "page": 2,
                "per_page": 50,
                "total_pages": 25,
                "has_next": True,
                "has_previous": True,
            }
        },
        openapi_examples={
            "spanish_market": {
                "summary": "Spanish pharmaceutical market pagination",
                "description": "Realistic pagination for Spanish laboratories dataset",
                "value": {
                    "total": 1247,
                    "page": 3,
                    "per_page": 50,
                    "total_pages": 25,
                    "has_next": True,
                    "has_previous": True,
                },
            },
            "generic_subset": {
                "summary": "Generic laboratories only",
                "description": "Pagination metadata for generic laboratories subset",
                "value": {
                    "total": 156,
                    "page": 1,
                    "per_page": 20,
                    "total_pages": 8,
                    "has_next": True,
                    "has_previous": False,
                },
            },
            "search_results": {
                "summary": "Search result pagination",
                "description": "Pagination for filtered search results",
                "value": {
                    "total": 23,
                    "page": 1,
                    "per_page": 10,
                    "total_pages": 3,
                    "has_next": True,
                    "has_previous": False,
                },
            },
        },
    )


class PaginatedResponse(BaseModel, Generic[T]):
    """Respuesta genérica paginada para cualquier tipo de datos"""

    items: T = Field(..., description="Elementos de la página actual")
    pagination: PaginationMetadata = Field(..., description="Metadata de paginación")

    model_config = ConfigDict(
        json_schema_extra={
            "example": {
                "items": {"111": "CINFA S.A.", "426": "NORMON S.A.", "863": "KERN PHARMA, S.L."},
                "pagination": {
                    "total": 1247,
                    "page": 2,
                    "per_page": 50,
                    "total_pages": 25,
                    "has_next": True,
                    "has_previous": True,
                },
            }
        },
        openapi_examples={
            "codes_to_names_paginated": {
                "summary": "Laboratory codes to names with pagination",
                "description": "Standard mapping response with pagination metadata",
                "value": {
                    "items": {
                        "111": "CINFA S.A.",
                        "426": "NORMON S.A.",
                        "863": "KERN PHARMA, S.L.",
                        "644": "SANDOZ FARMACEUTICA, S.A",
                        "1079": "TEVA PHARMA, S.L.U",
                    },
                    "pagination": {
                        "total": 1247,
                        "page": 1,
                        "per_page": 5,
                        "total_pages": 250,
                        "has_next": True,
                        "has_previous": False,
                    },
                },
            },
            "names_to_codes_paginated": {
                "summary": "Laboratory names to codes with pagination",
                "description": "Reverse mapping response with pagination metadata",
                "value": {
                    "items": {
                        "CINFA S.A.": "111",
                        "NORMON S.A.": "426",
                        "KERN PHARMA, S.L.": "863",
                        "SANDOZ FARMACEUTICA, S.A": "644",
                    },
                    "pagination": {
                        "total": 1247,
                        "page": 2,
                        "per_page": 4,
                        "total_pages": 312,
                        "has_next": True,
                        "has_previous": True,
                    },
                },
            },
            "generic_labs_search": {
                "summary": "Generic laboratories search results",
                "description": "Filtered generic laboratories with search term",
                "value": {
                    "items": {"111": "CINFA S.A.", "426": "NORMON S.A.", "863": "KERN PHARMA, S.L."},
                    "pagination": {
                        "total": 156,
                        "page": 1,
                        "per_page": 3,
                        "total_pages": 52,
                        "has_next": True,
                        "has_previous": False,
                    },
                },
            },
        },
    )


class LaboratoryMappingResponse(BaseModel):
    """Schema para respuestas de mapeo de laboratorios (backward compatibility)"""

    mapping: Dict[str, str] = Field(..., description="Mapeo código ↔ nombre de laboratorio")

    total_count: Optional[int] = Field(
        default=None, description="Total de registros (para paginación) - DEPRECATED: usar pagination"
    )

    page: Optional[int] = Field(
        default=None, description="Página actual (para paginación) - DEPRECATED: usar pagination"
    )

    model_config = ConfigDict(
        json_schema_extra={
            "example": {
                "mapping": {"111": "CINFA S.A.", "426": "NORMON S.A.", "863": "KERN PHARMA, S.L."},
                "total_count": 150,
                "page": 1,
            }
        },
        openapi_examples={
            "legacy_codes_mapping": {
                "summary": "Legacy format - codes to names",
                "description": "Backward compatible format for laboratory code mapping",
                "value": {
                    "mapping": {
                        "111": "CINFA S.A.",
                        "426": "NORMON S.A.",
                        "863": "KERN PHARMA, S.L.",
                        "644": "SANDOZ FARMACEUTICA, S.A",
                        "1079": "TEVA PHARMA, S.L.U",
                    },
                    "total_count": 1247,
                    "page": 1,
                },
            },
            "legacy_names_mapping": {
                "summary": "Legacy format - names to codes",
                "description": "Backward compatible format for laboratory name mapping",
                "value": {
                    "mapping": {
                        "CINFA S.A.": "111",
                        "NORMON S.A.": "426",
                        "KERN PHARMA, S.L.": "863",
                        "SANDOZ FARMACEUTICA, S.A": "644",
                    },
                    "total_count": 1247,
                    "page": 1,
                },
            },
        },
    )


class LaboratoryValidationError(BaseModel):
    """Schema para errores de validación estructurados"""

    error_type: str = Field(..., description="Tipo de error")
    message: str = Field(..., description="Mensaje de error")
    details: Optional[Dict] = Field(default=None, description="Detalles adicionales")
    invalid_values: Optional[List[str]] = Field(default=None, description="Valores que causaron el error")

    model_config = ConfigDict(
        json_schema_extra={
            "example": {
                "error_type": "validation_error",
                "message": "Códigos de laboratorio con formato inválido",
                "details": {"expected_format": "1-4 dígitos numéricos", "regex": "^\\d{1,4}$"},
                "invalid_values": ["12a", "55555", "abc"],
            }
        },
        openapi_examples={
            "invalid_codes": {
                "summary": "Invalid laboratory codes format",
                "description": "Common validation errors for laboratory codes",
                "value": {
                    "error_type": "format_error",
                    "message": "Códigos de laboratorio con formato inválido",
                    "details": {
                        "expected_format": "1-4 dígitos numéricos",
                        "pattern": "^\\d{1,4}$",
                        "examples_valid": ["111", "426", "1079", "12"],
                    },
                    "invalid_values": ["CIN", "12a", "55555", "NORMON", "0000"],
                },
            },
            "invalid_names": {
                "summary": "Invalid laboratory names",
                "description": "Validation errors for laboratory names with dangerous characters",
                "value": {
                    "error_type": "format_error",
                    "message": "Nombres de laboratorio con formato inválido o caracteres no permitidos",
                    "details": {
                        "max_length": 200,
                        "forbidden_chars": ["<", ">", "'", '"', "&", "javascript:", "script"],
                        "examples_valid": ["CINFA S.A.", "NORMON S.A.", "KERN PHARMA, S.L."],
                    },
                    "invalid_values": ["<script>CINFA</script>", "NORMON'S LAB", "LAB&CO"],
                },
            },
            "pagination_error": {
                "summary": "Pagination parameter errors",
                "description": "Common pagination validation errors",
                "value": {
                    "error_type": "pagination_error",
                    "message": "Parámetros de paginación inválidos",
                    "details": {
                        "page_range": "1-10000",
                        "per_page_range": "1-200",
                        "current_values": {"page": 0, "per_page": 300},
                    },
                    "invalid_values": ["page: 0", "per_page: 300"],
                },
            },
            "too_many_codes": {
                "summary": "Too many codes in request",
                "description": "Request exceeds maximum allowed codes per request",
                "value": {
                    "error_type": "validation_error",
                    "message": "Demasiados códigos. Máximo permitido: 20",
                    "details": {
                        "max_codes": 20,
                        "received": 25,
                        "recommendation": "Split request into multiple batches",
                    },
                    "invalid_values": [],
                },
            },
        },
    )


class RateLimitExceededError(BaseModel):
    """Schema para errores de rate limiting"""

    error_type: str = Field(default="rate_limit_exceeded")
    message: str = Field(..., description="Mensaje de rate limit")
    retry_after: int = Field(..., description="Segundos hasta poder reintentar")
    limit_type: str = Field(..., description="Tipo de límite excedido")

    model_config = ConfigDict(
        json_schema_extra={
            "example": {
                "error_type": "rate_limit_exceeded",
                "message": "Demasiadas requests. Intente nuevamente en 60 segundos",
                "retry_after": 60,
                "limit_type": "per_minute",
            }
        },
        openapi_examples={
            "minute_limit": {
                "summary": "Per-minute rate limit exceeded",
                "description": "Standard per-minute rate limiting for laboratory mapping endpoints",
                "value": {
                    "error_type": "rate_limit_exceeded",
                    "message": "Rate limit excedido: 60 requests por minuto. Intente nuevamente en 45 segundos",
                    "retry_after": 45,
                    "limit_type": "per_minute",
                    "current_count": 62,
                    "limit": 60,
                },
            },
            "hourly_limit": {
                "summary": "Per-hour rate limit exceeded",
                "description": "Hourly rate limit exceeded for heavy usage",
                "value": {
                    "error_type": "rate_limit_exceeded",
                    "message": "Rate limit excedido: 500 requests por hora. Intente nuevamente en 1800 segundos",
                    "retry_after": 1800,
                    "limit_type": "per_hour",
                    "current_count": 502,
                    "limit": 500,
                },
            },
            "burst_protection": {
                "summary": "Burst protection triggered",
                "description": "Anti-abuse protection for rapid successive requests",
                "value": {
                    "error_type": "rate_limit_exceeded",
                    "message": "Protección anti-abuso activada. Demasiadas requests rápidas. Intente nuevamente en 120 segundos",
                    "retry_after": 120,
                    "limit_type": "burst_protection",
                    "recommendation": "Implement exponential backoff in your client",
                },
            },
        },
    )


# Constantes para validación
class LaboratoryValidationConstants:
    """Constantes de configuración para validación de laboratorios"""

    MAX_CODES_PER_REQUEST = 20
    MAX_NAMES_PER_REQUEST = 20
    MAX_NAME_LENGTH = 200
    MAX_SEARCH_LENGTH = 100

    # FASE 2.2: Constantes de paginación
    MAX_PAGE = 10000  # Incrementado para cursor-based pagination
    MAX_PER_PAGE = 200  # Permitir hasta 200 por página
    MIN_PER_PAGE = 1
    DEFAULT_PER_PAGE = 50
    CURSOR_BASED_THRESHOLD = 100  # Usar cursor-based después de página 100

    # Regex patterns
    LABORATORY_CODE_PATTERN = r"^\d{1,4}$"

    # Caracteres peligrosos para XSS
    # NOTA: Removidos '&', "'", '"' porque son legítimos en nombres de laboratorios
    # (ej: "JOHNSON & JOHNSON", "L'ORÉAL") y se codifican correctamente con urllib.parse.quote()
    DANGEROUS_CHARS = ["<", ">", "javascript:", "script"]

    # Rate limiting
    REQUESTS_PER_MINUTE = 100
    REQUESTS_PER_HOUR = 1000
    MAX_REQUEST_SIZE_KB = 51200  # 50MB en KB


# Utilidades de validación
def validate_laboratory_code(code: str) -> str:
    """Función utilitaria para validar un código de laboratorio individual"""
    if not isinstance(code, str):
        raise ValueError("Código debe ser string")

    code = code.strip()

    if not re.match(LaboratoryValidationConstants.LABORATORY_CODE_PATTERN, code):
        raise ValueError(f"Código '{code}' debe ser numérico de 1-4 dígitos")

    return code


def sanitize_laboratory_name(name: str) -> str:
    """Función utilitaria para sanitizar un nombre de laboratorio"""
    if not isinstance(name, str):
        raise ValueError("Nombre debe ser string")

    name = name.strip()

    if len(name) > LaboratoryValidationConstants.MAX_NAME_LENGTH:
        raise ValueError(f"Nombre excede {LaboratoryValidationConstants.MAX_NAME_LENGTH} caracteres")

    # Verificar caracteres peligrosos
    for char in LaboratoryValidationConstants.DANGEROUS_CHARS:
        if char.lower() in name.lower():
            raise ValueError(f"Nombre contiene caracteres no permitidos: {char}")

    return name


def validate_pagination_params(page: int, per_page: int) -> Dict[str, str]:
    """Validar parámetros de paginación y devolver errores si existen"""
    errors = {}

    if page < 1:
        errors["page"] = "Página debe ser mayor a 0"
    elif page > LaboratoryValidationConstants.MAX_PAGE:
        errors["page"] = f"Página no puede exceder {LaboratoryValidationConstants.MAX_PAGE}"

    if per_page < LaboratoryValidationConstants.MIN_PER_PAGE:
        errors["per_page"] = f"Elementos por página debe ser al menos {LaboratoryValidationConstants.MIN_PER_PAGE}"
    elif per_page > LaboratoryValidationConstants.MAX_PER_PAGE:
        errors["per_page"] = f"Elementos por página no puede exceder {LaboratoryValidationConstants.MAX_PER_PAGE}"

    return errors


def calculate_pagination_metadata(total: int, page: int, per_page: int) -> PaginationMetadata:
    """Calcular metadata de paginación"""
    import math

    total_pages = math.ceil(total / per_page) if total > 0 else 1
    has_next = page < total_pages
    has_previous = page > 1

    return PaginationMetadata(
        total=total, page=page, per_page=per_page, total_pages=total_pages, has_next=has_next, has_previous=has_previous
    )
