#!/usr/bin/env python3
"""
Script para poblar la tabla dim_date con datos de fecha.

Issue #503: Implementar tabla dim_date para Time Intelligence.

Genera fechas desde 2020-01-01 hasta 2030-12-31 (~4000 registros).
Incluye nombres españoles, festivos nacionales, y temporadas farmacéuticas.

Uso:
    # Desde el directorio raíz del proyecto
    DATABASE_URL="postgresql://..." python backend/scripts/populate_dim_date.py

    # O con docker-compose
    docker-compose exec backend python scripts/populate_dim_date.py
"""

import logging
import os
import sys
from datetime import date, timedelta

# Añadir path para imports
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))

from sqlalchemy import create_engine, text
from sqlalchemy.orm import sessionmaker

# Reutilizar módulo existente (evitar duplicación - Code Review)
from app.utils.spanish_holidays import get_holiday_name

logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s - %(levelname)s - %(message)s",
)
logger = logging.getLogger(__name__)


# ===== Constantes de nombres españoles =====

MONTH_NAMES = {
    1: ("Enero", "Ene"),
    2: ("Febrero", "Feb"),
    3: ("Marzo", "Mar"),
    4: ("Abril", "Abr"),
    5: ("Mayo", "May"),
    6: ("Junio", "Jun"),
    7: ("Julio", "Jul"),
    8: ("Agosto", "Ago"),
    9: ("Septiembre", "Sep"),
    10: ("Octubre", "Oct"),
    11: ("Noviembre", "Nov"),
    12: ("Diciembre", "Dic"),
}

WEEKDAY_NAMES = {
    1: ("Lunes", "Lun"),
    2: ("Martes", "Mar"),
    3: ("Miércoles", "Mié"),
    4: ("Jueves", "Jue"),
    5: ("Viernes", "Vie"),
    6: ("Sábado", "Sáb"),
    7: ("Domingo", "Dom"),
}

# NOTA: get_holiday_name se importa de app.utils.spanish_holidays
# Eliminada duplicación de código (Code Review Issue #503)


# ===== Funciones de generación de datos =====


def generate_dim_date_record(d: date) -> dict:
    """
    Genera un registro para dim_date a partir de una fecha.

    Args:
        d: Fecha a procesar

    Returns:
        Diccionario con todos los campos de dim_date
    """
    # ISO-8601: isocalendar() devuelve (iso_year, iso_week, iso_weekday)
    # iso_weekday: 1=Lunes, 7=Domingo (coincide con sales_data.weekday)
    iso_year, iso_week, iso_weekday = d.isocalendar()

    # Nombres españoles
    month_name, month_short = MONTH_NAMES[d.month]
    weekday_name, weekday_short = WEEKDAY_NAMES[iso_weekday]

    # Flags de calendario
    is_weekend = iso_weekday >= 6  # Sábado o Domingo
    holiday_name = get_holiday_name(d)
    is_holiday = holiday_name is not None

    # Temporadas farmacéuticas (basado en literatura médica española)
    # - Gripe: Oct-Mar (pico Nov-Feb)
    # - Alergia: Mar-Jun
    # - Verano: Jul-Ago
    is_flu_season = d.month in (10, 11, 12, 1, 2, 3)
    is_allergy_season = d.month in (3, 4, 5, 6)
    is_summer_season = d.month in (7, 8)

    # Claves de agrupación
    year_month = f"{d.year}-{d.month:02d}"
    year_quarter = f"{d.year}-Q{(d.month - 1) // 3 + 1}"

    return {
        "date_key": d,
        "year": d.year,
        "quarter": (d.month - 1) // 3 + 1,
        "month": d.month,
        "week_of_year": iso_week,
        "day_of_month": d.day,
        "day_of_year": d.timetuple().tm_yday,
        "weekday": iso_weekday,
        "month_name": month_name,
        "month_name_short": month_short,
        "weekday_name": weekday_name,
        "weekday_name_short": weekday_short,
        "is_weekend": is_weekend,
        "is_holiday": is_holiday,
        "holiday_name": holiday_name,
        "is_flu_season": is_flu_season,
        "is_allergy_season": is_allergy_season,
        "is_summer_season": is_summer_season,
        "year_month": year_month,
        "year_quarter": year_quarter,
    }


def populate_dim_date(
    engine,
    start_year: int = 2020,
    end_year: int = 2030,
    batch_size: int = 365,
) -> int:
    """
    Puebla la tabla dim_date con fechas en el rango especificado.

    Args:
        engine: SQLAlchemy engine
        start_year: Año inicial (inclusive)
        end_year: Año final (inclusive)
        batch_size: Tamaño del batch para inserts

    Returns:
        Número de registros insertados
    """
    Session = sessionmaker(bind=engine)
    session = Session()

    try:
        # Verificar si la tabla ya tiene datos
        result = session.execute(text("SELECT COUNT(*) FROM dim_date"))
        existing_count = result.scalar()

        if existing_count > 0:
            logger.info(f"dim_date already has {existing_count} records. Skipping.")
            return 0

        # Generar fechas
        start_date = date(start_year, 1, 1)
        end_date = date(end_year, 12, 31)
        total_days = (end_date - start_date).days + 1

        logger.info(f"Generating {total_days} date records ({start_year}-{end_year})...")

        records = []
        current_date = start_date
        inserted = 0

        while current_date <= end_date:
            record = generate_dim_date_record(current_date)
            records.append(record)

            if len(records) >= batch_size:
                _insert_batch(session, records)
                inserted += len(records)
                logger.info(f"  Inserted {inserted}/{total_days} records...")
                records = []

            current_date += timedelta(days=1)

        # Insertar registros restantes
        if records:
            _insert_batch(session, records)
            inserted += len(records)

        session.commit()
        logger.info(f"Successfully populated dim_date with {inserted} records")

        # Verificar algunos registros clave
        _verify_population(session)

        return inserted

    except Exception as e:
        session.rollback()
        logger.error(f"Error populating dim_date: {e}")
        raise
    finally:
        session.close()


def _insert_batch(session, records: list) -> None:
    """Inserta un batch de registros en dim_date usando bulk insert.

    Usa bulk_insert_mappings para mejor performance (~10x más rápido
    que inserts individuales). Code Review Issue #503.
    """
    from app.models.dim_date import DimDate

    session.bulk_insert_mappings(DimDate, records)


def _verify_population(session) -> None:
    """Verifica que los datos fueron insertados correctamente."""
    # Verificar conteo total
    result = session.execute(text("SELECT COUNT(*) FROM dim_date"))
    total = result.scalar()
    logger.info(f"  Total records: {total}")

    # Verificar festivos
    result = session.execute(
        text("SELECT COUNT(*) FROM dim_date WHERE is_holiday = true")
    )
    holidays = result.scalar()
    logger.info(f"  Holiday records: {holidays}")

    # Verificar ejemplo: Navidad 2025
    result = session.execute(
        text(
            "SELECT weekday_name, is_holiday, holiday_name "
            "FROM dim_date WHERE date_key = '2025-12-25'"
        )
    )
    row = result.fetchone()
    if row:
        logger.info(
            f"  Navidad 2025: {row.weekday_name}, holiday={row.is_holiday}, name={row.holiday_name}"
        )

    # Verificar temporada gripe
    result = session.execute(
        text(
            "SELECT COUNT(*) FROM dim_date "
            "WHERE is_flu_season = true AND year = 2025"
        )
    )
    flu_days = result.scalar()
    logger.info(f"  Flu season days (2025): {flu_days}")


def main():
    """Función principal."""
    # Obtener DATABASE_URL
    database_url = os.environ.get("DATABASE_URL")
    if not database_url:
        logger.error("DATABASE_URL environment variable not set")
        sys.exit(1)

    # Crear engine
    engine = create_engine(database_url)

    # Poblar tabla
    try:
        count = populate_dim_date(engine)
        if count > 0:
            logger.info(f"Done! Inserted {count} records into dim_date.")
        else:
            logger.info("Table already populated. No changes made.")
    except Exception as e:
        logger.error(f"Failed to populate dim_date: {e}")
        sys.exit(1)


if __name__ == "__main__":
    main()
