"""
Subcategory L2 Classifier Service - Issue #505

This service classifies products into L2 subcategories using a two-tier strategy:

1. TIER 1: Keyword-based classification (high confidence: 0.90)
   - Solar facial vs corporal: "facial", "fusion water", "color" vs "spray", "200ml"
   - Tratamiento avanzado: "retinol", "vitamina c", "serum"
   - etc.

2. TIER 2: Groq LLM for semantic fallback (moderate confidence: 0.70)
   - Used when Tier 1 doesn't match
   - Handles complex/ambiguous products

Exclusions:
- Accessories (envases, estuches, neceseres) -> "otros_no_clasificados"
- Empty containers without product content
- Products with ml_category NOT in L1_WITH_L2

Philosophy:
"No clasificamos por lo que el producto ES, sino por CÓMO se vende y qué margen deja."
"""

import logging
import os
import re
import time
from dataclasses import dataclass
from enum import Enum
from typing import Dict, List, Optional, Tuple

from ..schemas.symptom_taxonomy import (
    L1_TO_L2_CATEGORIES,
    L1_WITH_L2,
    get_l2_display_name,
    has_l2_subcategories,
)

logger = logging.getLogger(__name__)


class L2ClassificationSource(str, Enum):
    """Source of L2 classification."""
    TIER1_KEYWORDS = "tier1_keywords"  # Keyword match (high confidence)
    GROQ = "groq"                       # Groq LLM fallback
    HUMAN = "human"                     # Human validation
    CACHE = "cache"                     # From cache
    EXCLUDED = "excluded"               # Accessory/container exclusion
    NOT_APPLICABLE = "not_applicable"   # L1 category doesn't have L2


@dataclass
class L2ClassificationResult:
    """Result of L2 subcategory classification."""
    l2_category: Optional[str]
    confidence: float
    source: L2ClassificationSource
    matched_term: Optional[str] = None
    arbiter_reason: Optional[str] = None
    product_name: str = ""
    l1_category: str = ""
    processing_time_ms: int = 0

    def to_dict(self) -> Dict:
        """Convert to dictionary for API responses."""
        return {
            "l2_category": self.l2_category,
            "l2_display_name": get_l2_display_name(self.l2_category) if self.l2_category else None,
            "confidence": self.confidence,
            "source": self.source.value,
            "matched_term": self.matched_term,
            "arbiter_reason": self.arbiter_reason,
            "product_name": self.product_name,
            "l1_category": self.l1_category,
            "processing_time_ms": self.processing_time_ms,
        }


class SubcategoryClassifierService:
    """
    Two-tier L2 subcategory classifier.

    Tier 1: Keyword rules (0.90 confidence)
    Tier 2: Groq LLM fallback (0.70 confidence)
    """

    # Confidence thresholds
    TIER1_CONFIDENCE = 0.90
    TIER2_CONFIDENCE = 0.70
    EXCLUDED_CONFIDENCE = 0.95

    # ==========================================================================
    # TIER 1: Keyword Rules by L1 Category
    # ==========================================================================
    # Pattern: List of (pattern, l2_category) tuples
    # Patterns are regex (case-insensitive)

    L2_KEYWORD_RULES: Dict[str, List[Tuple[str, str]]] = {
        # ======================================================================
        # DERMOCOSMÉTICA (7 L2)
        # ======================================================================
        "dermocosmetica": [
            # SOLAR_FACIAL - Productos solares para cara (premium, alto margen)
            (r"(solar|spf|sun).*(facial|cara|face|rostro)", "solar_facial"),
            (r"fusion\s*water", "solar_facial"),
            (r"fluid[oe]?\s*(solar|spf)", "solar_facial"),
            (r"(color|pigment).*(spf|solar)", "solar_facial"),
            (r"(antiox|age\s*repair).*(spf|solar)", "solar_facial"),
            (r"(melascreen|pigmentclar|photoderm).*(spf|spot)", "solar_facial"),
            (r"spf\s*50.*(?:fluido|crema)(?!.*corporal)", "solar_facial"),

            # SOLAR_CORPORAL - Sprays, formatos grandes, aplicación corporal
            (r"(spray|bruma).*(solar|spf|sun)", "solar_corporal"),
            (r"(solar|spf|sun).*(spray|bruma)", "solar_corporal"),
            (r"(solar|spf).*(?:200|300|400)\s*ml", "solar_corporal"),
            (r"(leche|locion).*(solar|spf)", "solar_corporal"),
            (r"(solar|spf).*(corporal|body|cuerpo)", "solar_corporal"),
            (r"after\s*sun", "solar_corporal"),
            (r"broncea", "solar_corporal"),

            # TRATAMIENTO_AVANZADO - Activos premium, serums, ampollas
            (r"retinol", "tratamiento_avanzado"),
            (r"vitamina\s*c", "tratamiento_avanzado"),
            (r"(serum|suero)", "tratamiento_avanzado"),
            (r"ampolla", "tratamiento_avanzado"),
            (r"(acido\s*hialur|hyaluron)", "tratamiento_avanzado"),
            (r"(niacin|peptid|bakuchiol)", "tratamiento_avanzado"),
            (r"(antimanchas|despigment)", "tratamiento_avanzado"),
            (r"(concentrado|booster)", "tratamiento_avanzado"),
            (r"(retin|niacina|peptido)", "tratamiento_avanzado"),
            (r"(renovador|peeling|exfoli).*noche", "tratamiento_avanzado"),

            # HIDRATACION_BASICA - Cremas hidratantes diarias
            (r"(crema|emulsion).*hidrat", "hidratacion_basica"),
            (r"hidrat.*(crema|emulsion|gel)", "hidratacion_basica"),
            (r"(moisturiz|hidratante)", "hidratacion_basica"),
            (r"(balsamo|manteca).*corporal", "hidratacion_basica"),
            (r"(leche|locion).*corporal(?!.*solar)", "hidratacion_basica"),

            # HIGIENE_LIMPIEZA - Limpiadores, jabones, tónicos
            (r"(limpia|clean|gel\s*limpia)", "higiene_limpieza"),
            (r"(jabon|soap)", "higiene_limpieza"),
            (r"(tonico|toner)", "higiene_limpieza"),
            (r"(desmaqui|micelar)", "higiene_limpieza"),
            (r"(exfoliant|scrub)", "higiene_limpieza"),
            (r"(gel|espuma).*limpi", "higiene_limpieza"),
            (r"(ducha|shower|bano).*gel", "higiene_limpieza"),

            # CAPILAR_TRATANTE - Tratamientos capilares (no champús básicos)
            (r"(minoxidil|triphasic|quinina)", "capilar_tratante"),
            (r"(anticaida|anti.?caida)", "capilar_tratante"),
            (r"(champu|shampoo).*(tratante|medicado|anticaida)", "capilar_tratante"),
            (r"(serum|locion|ampolla).*cabello", "capilar_tratante"),
            (r"(mascarilla|tratamiento).*capilar", "capilar_tratante"),

            # ACNE_PIEL_GRASA - Productos para acné y piel grasa
            (r"(acne|antiacne|anti.?acne)", "acne_piel_grasa"),
            (r"(sebo|seborr|grasa)", "acne_piel_grasa"),
            (r"(effaclar|cleanance|keracnyl)", "acne_piel_grasa"),
            (r"(piel\s*grasa|oily\s*skin)", "acne_piel_grasa"),
            (r"(purificante|matificante)", "acne_piel_grasa"),
            (r"(espinillas|puntos\s*negros|comedones)", "acne_piel_grasa"),
        ],

        # ======================================================================
        # SUPLEMENTOS (6 L2)
        # ======================================================================
        "suplementos": [
            # DESCANSO_ESTRES - Sueño, relajación, estrés
            (r"(melaton|melamil|zzzquil)", "descanso_estres"),
            (r"(dormi|sueno|sleep)", "descanso_estres"),
            (r"(relax|tranquil|calm)", "descanso_estres"),
            (r"(estres|stress|ansiedad)", "descanso_estres"),
            (r"(valerian|pasiflora|tila)", "descanso_estres"),
            (r"(gaba|triptofano|5.?htp)", "descanso_estres"),

            # ENERGIA_VITALIDAD - Estimulantes, vitaminas energéticas
            (r"(energia|energy|vitalidad)", "energia_vitalidad"),
            (r"(jalea\s*real|royal\s*jelly)", "energia_vitalidad"),
            (r"(ginseng|guarana|maca)", "energia_vitalidad"),
            (r"(revital|supradyn|berocca)", "energia_vitalidad"),
            (r"(multivitamin|vitaminas?\s*(?:y|&)?\s*minerales)", "energia_vitalidad"),
            (r"(cansancio|fatiga).*(?:reductor|combat)", "energia_vitalidad"),

            # ARTICULAR_OSEA - Huesos, articulaciones, colágeno
            (r"(colageno|collagen)", "articular_osea"),
            (r"(articul|articulacion)", "articular_osea"),
            (r"(hueso|oseo|bone)", "articular_osea"),
            (r"(condroit|glucosam)", "articular_osea"),
            (r"(calcio\s*(?:y|&|\+)?\s*(?:vitamina\s*)?d)", "articular_osea"),
            (r"(epaplus|mobiflor|flexadin)", "articular_osea"),
            (r"(magnesio.*(?:muscular|articular))", "articular_osea"),

            # DEFENSAS_INMUNIDAD - Sistema inmune, propóleo, equinácea
            (r"(defensa|inmun|immune)", "defensas_inmunidad"),
            (r"(propoleo|propolis)", "defensas_inmunidad"),
            (r"(echinacea|equinacea)", "defensas_inmunidad"),
            (r"(vitamina\s*c(?:\s|\+|$))", "defensas_inmunidad"),
            (r"(zinc(?:\s|\+|$))", "defensas_inmunidad"),
            (r"(resfriado|gripe|catarro).*(?:prevenir|combat)", "defensas_inmunidad"),

            # DIGESTIVO_PROBIOTICOS - Flora intestinal, digestión
            (r"(probio|lactobac|bifidobac)", "digestivo_probioticos"),
            (r"(flora\s*intestin|microbiota)", "digestivo_probioticos"),
            (r"(digest|digestion)", "digestivo_probioticos"),
            (r"(enzyme|enzima).*digest", "digestivo_probioticos"),
            (r"(hinchaz|flatulen|gases)", "digestivo_probioticos"),
            (r"(colon|intestin).*(?:salud|equilibrio)", "digestivo_probioticos"),

            # CONTROL_PESO_L2 - Dieta, saciantes, quemagrasas
            (r"(adelgaz|dieta|diet)", "control_peso_l2"),
            (r"(saciant|saciedad)", "control_peso_l2"),
            (r"(quema.?gras|fat\s*burn)", "control_peso_l2"),
            (r"(detox|depurat|drenante)", "control_peso_l2"),
            (r"(metabol|termogen)", "control_peso_l2"),
            (r"(xls|reductor|redustat)", "control_peso_l2"),
        ],

        # ======================================================================
        # HIGIENE BUCAL (5 L2)
        # ======================================================================
        "higiene_bucal": [
            # HIGIENE_DIARIA_BASICA - Cepillos, pastas básicas, enjuagues
            (r"(cepillo|brush)", "higiene_diaria_basica"),
            (r"(dentifric|pasta\s*(?:de\s*)?dient)", "higiene_diaria_basica"),
            (r"(enjuague|colutorio)(?!.*sensib|.*encia)", "higiene_diaria_basica"),
            (r"(hilo|seda)\s*dental", "higiene_diaria_basica"),
            (r"(fluor|anticaries)", "higiene_diaria_basica"),

            # SENSIBILIDAD_ENCIAS - Sensodyne, encías sensibles
            (r"(sensib.*dent|sensodyne)", "sensibilidad_encias"),
            (r"(encia|gum).*(?:sensib|sangr|inflam)", "sensibilidad_encias"),
            (r"(gingivitis|periodont)", "sensibilidad_encias"),
            (r"(parodont|kin\s*forte)", "sensibilidad_encias"),
            (r"(lacer\s*(?:oros|encias))", "sensibilidad_encias"),

            # ESTETICA_BLANQUEAMIENTO - Blanqueadores, strips, kits
            (r"(blanque|whiten)", "estetica_blanqueamiento"),
            (r"(white|blanco).*(?:dient|teeth)", "estetica_blanqueamiento"),
            (r"(strips|tiras).*(?:blanque|white)", "estetica_blanqueamiento"),
            (r"(kit|tratamiento).*blanque", "estetica_blanqueamiento"),

            # TRATAMIENTO_BUCAL - Aftas, herpes labial, halitosis
            (r"(afta|ulcera\s*bucal)", "tratamiento_bucal"),
            (r"(herpes|labial).*(?:tratami|parche)", "tratamiento_bucal"),
            (r"(halitosis|mal\s*aliento)", "tratamiento_bucal"),
            (r"(protesis|adhesivo).*dental", "tratamiento_bucal"),
            (r"(boquera|queilitis)", "tratamiento_bucal"),

            # ORTODONCIA_INTERDENTALES - Interprox, cera ortodoncia, irrigadores
            (r"(interprox|interdental|tepe)", "ortodoncia_interdentales"),
            (r"(ortodoncia|brackets)", "ortodoncia_interdentales"),
            (r"(cera|protector).*ortod", "ortodoncia_interdentales"),
            (r"(irrigador|waterpik)", "ortodoncia_interdentales"),
            (r"(superfloss|ultra\s*floss)", "ortodoncia_interdentales"),
        ],

        # ======================================================================
        # INFANTIL (4 L2) - ADR-004: Added Q1 2026
        # ======================================================================
        "infantil": [
            # NUTRICION_INFANTIL - Leches, potitos, papillas, cereales
            (r"leche.*(continuacion|inicio|crecimiento|etapa|maternizada|infantil)", "nutricion_infantil"),
            (r"(blemil|almiron|nan|nutriben|nestle|enfamil|aptamil|hero\s*baby|puleva\s*bebe).*leche", "nutricion_infantil"),
            (r"(leche|formula).*(bebe|infantil|lactante|prematuro)", "nutricion_infantil"),
            (r"(potito|tarrito|papilla).*(fruta|pollo|ternera|verdura|pescado)", "nutricion_infantil"),
            (r"(cereales|papilla).*(bebe|infantil|sin\s*gluten)", "nutricion_infantil"),
            (r"(hero\s*baby|nestle|nutriben|blevit|almiron).*(?:potito|papilla|cereales)", "nutricion_infantil"),
            (r"(mi\s*primer|primeros\s*pasos).*(?:potito|papilla)", "nutricion_infantil"),

            # HIGIENE_BEBE - Pañales, toallitas, geles, champús
            (r"(toallita|wipe).*(bebe|infantil|baby|sensitive)", "higiene_bebe"),
            (r"(panal|panales|diaper).*(bebe|infantil|dodot|huggies|chelino)", "higiene_bebe"),
            (r"(dodot|huggies|chelino|pampers|moltex).*(?:panal|toallita)", "higiene_bebe"),
            (r"gel.*(bebe|infantil|baby|pediatrico|recien\s*nacido)", "higiene_bebe"),
            (r"(champu|shampoo).*(bebe|infantil|baby|suave)", "higiene_bebe"),
            (r"(mustela|weleda|isdin\s*baby|suavinex|chicco).*(?:gel|champu|baño|limpiador)", "higiene_bebe"),
            (r"(baño|bath).*(bebe|infantil|baby)", "higiene_bebe"),
            (r"(colonia|agua\s*de\s*colonia).*(bebe|infantil)", "higiene_bebe"),

            # CUIDADOS_ESPECIFICOS - Cremas pañal, atopia, irritaciones
            (r"(crema|pomada|balsamo).*(panal|culito|irritacion)", "cuidados_especificos"),
            (r"(eryplast|mitosyl|nutraisdin|bepanthol|weleda\s*calendula)", "cuidados_especificos"),
            (r"(atopia|atopica|dermatitis).*(bebe|infantil|baby)", "cuidados_especificos"),
            (r"(costra\s*lactea|craddle\s*cap)", "cuidados_especificos"),
            (r"(pasta\s*al\s*agua|oxido\s*de\s*zinc).*bebe", "cuidados_especificos"),
            (r"(piel\s*sensible|piel\s*reactiva).*(bebe|infantil)", "cuidados_especificos"),
            (r"(mustela\s*stelatopia|a-derma\s*exomega|bioderma\s*atoderm).*bebe", "cuidados_especificos"),

            # BIBERONERIA_ACCESORIOS - Biberones, chupetes, extractores
            (r"(biberon|bottle).*(bebe|infantil|baby|anticolico)", "biberoneria_accesorios"),
            (r"(chupete|pacifier|chupo).*(bebe|infantil|silicona|latex)", "biberoneria_accesorios"),
            (r"(tetina|nipple).*(biberon|silicona|latex)", "biberoneria_accesorios"),
            (r"(suavinex|avent|chicco|nuk|medela|dr\s*brown|mam|tommee\s*tippee)", "biberoneria_accesorios"),
            (r"(extractor|sacaleche|breast\s*pump)", "biberoneria_accesorios"),
            (r"(esterilizador|sterilizer)", "biberoneria_accesorios"),
            (r"(mordedor|teether)", "biberoneria_accesorios"),
            (r"(calienta\s*biberon|calientabiberones)", "biberoneria_accesorios"),
            (r"(limpia\s*biberon|escobilla).*biberon", "biberoneria_accesorios"),
        ],

        # ======================================================================
        # SEXUAL (3 L2) - ADR-004: Added Q1 2026
        # ======================================================================
        "sexual": [
            # SALUD_INTIMA_FEMENINA - Higiene íntima, probióticos vaginales, menopausia
            # NOTE: Must be BEFORE preservativos_lubricantes to avoid false positives
            (r"(vaginesil|lactacyd|saforelle|chilly)", "salud_intima_femenina"),
            (r"(gel|jabon|limpiador).*(higiene|intim).*(femeni|mujer|vaginal)", "salud_intima_femenina"),
            (r"(flora|probiotico).*(vaginal|intimo)", "salud_intima_femenina"),
            (r"(isdin\s*woman|cumlaude|ginecanesten)", "salud_intima_femenina"),
            (r"(sequedad|hidratante).*(vaginal|intima)", "salud_intima_femenina"),
            (r"(menopausia|climaterio).*(gel|crema|lubricante)", "salud_intima_femenina"),
            (r"(ovulo|comprimido).*(vaginal)", "salud_intima_femenina"),
            (r"(candidiasis|picor|irritacion).*(vaginal|intimo)", "salud_intima_femenina"),
            (r"(ph|equilibrio).*(intimo|vaginal)", "salud_intima_femenina"),
            (r"(toallita|wipe).*(intima|vaginal)", "salud_intima_femenina"),
            (r"higiene\s*intima", "salud_intima_femenina"),

            # PRESERVATIVOS_LUBRICANTES - Condones, lubricantes, geles íntimos
            (r"(preservativo|condon|profilactico)", "preservativos_lubricantes"),
            (r"(durex|control|prime|pasante|trojan)", "preservativos_lubricantes"),
            (r"lubricante(?!.*vaginal)", "preservativos_lubricantes"),
            (r"(gel|crema).*(erotico|placer)", "preservativos_lubricantes"),  # Removed "intimo" to avoid false positives
            (r"(play|sensilube|ky\s*jelly)", "preservativos_lubricantes"),
            (r"(retardante|prolongador)", "preservativos_lubricantes"),
            (r"(anillo|vibrador|estimul)", "preservativos_lubricantes"),
            (r"(masaje|massage).*(intimo|sensual|erotico)", "preservativos_lubricantes"),
            (r"(latex|ultrafino|sensitivo).*(preservativo|condon)", "preservativos_lubricantes"),

            # TEST_FERTILIDAD_EMBARAZO - Tests embarazo, ovulación, fertilidad
            (r"test.*(embarazo|gestacion|pregnancy)", "test_fertilidad_embarazo"),
            (r"(clearblue|predictor|first\s*response)", "test_fertilidad_embarazo"),
            (r"test.*(ovulacion|fertilidad|ovulation)", "test_fertilidad_embarazo"),
            (r"(monitor|lector).*(fertilidad|ovulacion)", "test_fertilidad_embarazo"),
            (r"(prueba|autotest).*(embarazo|ovulacion)", "test_fertilidad_embarazo"),
            (r"(tira|strip).*(ovulacion|lh|hcg)", "test_fertilidad_embarazo"),
            (r"(fsh|lh|beta.?hcg).*test", "test_fertilidad_embarazo"),
        ],

        # ======================================================================
        # CONTROL PESO (3 L2) - ADR-004: Added Q1 2026
        # ======================================================================
        "control_peso": [
            # SUSTITUTIVOS_COMIDA - Barritas, batidos sustitutivos, meal replacement
            (r"(bimanan|siken|optifast|modifast)", "sustitutivos_comida"),
            (r"(barrita|snack).*(sustitut|diet|proteic)", "sustitutivos_comida"),
            (r"(batido|shake).*(sustitut|diet|meal)", "sustitutivos_comida"),
            (r"(sustitut|reempla).*(comida|meal|cena|desayuno)", "sustitutivos_comida"),
            (r"(diet).*(barrita|batido|natilla|crema)", "sustitutivos_comida"),
            (r"(proteina|protein).*(diet|adelgaz)", "sustitutivos_comida"),
            (r"(natilla|crema|pudin).*(diet|hipocalorica)", "sustitutivos_comida"),
            (r"(comida|meal).*(completa|equilibrada).*(?:baja|light)", "sustitutivos_comida"),
            (r"(herbalife|exante|slim.?fast)", "sustitutivos_comida"),

            # QUEMAGRASAS_DRENANTES - XLS, L-Carnitina, drenantes, termogénicos
            (r"(xls\s*medical|redustat|alli)", "quemagrasas_drenantes"),
            (r"(quema.?gras|fat\s*burn|liporeductor)", "quemagrasas_drenantes"),
            (r"(l.?carnitina|carnitine)", "quemagrasas_drenantes"),
            (r"(drenante|drena|depurat|detox)", "quemagrasas_drenantes"),
            (r"(termogen|metabol|acelera)", "quemagrasas_drenantes"),
            (r"(retencion|elimina).*(liquid|agua)", "quemagrasas_drenantes"),
            (r"(piña|cola\s*de\s*caballo|hinojo|te\s*verde).*(?:dren|adelg)", "quemagrasas_drenantes"),
            (r"(cetona|cla|acido\s*linoleico)", "quemagrasas_drenantes"),
            (r"(arkofluido|draineur|turbodrain)", "quemagrasas_drenantes"),
            (r"(cafe\s*verde|green\s*coffee).*(?:extract|capsul)", "quemagrasas_drenantes"),

            # CONTROL_APETITO - Saciantes, bloqueadores, fibra adelgazante
            (r"(saciant|saciedad|hambre)", "control_apetito"),
            (r"(glucomanano|konjac)", "control_apetito"),
            (r"(chitosan|chitosano|capta.?gras)", "control_apetito"),
            (r"(bloqueador|bloquea).*(gras|carbohidrato|caloria)", "control_apetito"),
            (r"(fibra|psyllium|ispagula).*(?:adelgaz|saciant)", "control_apetito"),
            (r"(reductor|reduce).*(apetito|hambre|ansiedad)", "control_apetito"),
            (r"(ansiedad|antojos|picoteo)", "control_apetito"),
            (r"(nopal|fucus|espirulina).*(?:control|adelg|peso)", "control_apetito"),
            (r"(garcinia|hoodia|caralluma)", "control_apetito"),
        ],
    }

    # ==========================================================================
    # EXCLUSION PATTERNS - Accessories, containers, services
    # ==========================================================================
    EXCLUSION_PATTERNS: List[Tuple[str, str]] = [
        (r"^(estuche|neceser|bolsa)\b", "accessory"),
        (r"^envase\s*(vacio|recarga)", "container"),
        (r"(recambio|repuesto)(?!.*cepillo)", "refill_only"),  # Except toothbrush heads
        (r"(lote|pack|set)\s*\d", "pack"),  # Lote 3, Pack 2, etc.
        (r"^regalo\s", "gift"),
        (r"(muestrario|expositor|display)", "display"),
    ]

    def __init__(self, use_cache: bool = True):
        self._cache: Dict[str, L2ClassificationResult] = {}
        self._use_cache = use_cache
        self._groq_enabled = self._check_groq_enabled()

        # Compile regex patterns for performance
        self._compiled_rules: Dict[str, List[Tuple[re.Pattern, str]]] = {}
        for l1_cat, rules in self.L2_KEYWORD_RULES.items():
            self._compiled_rules[l1_cat] = [
                (re.compile(pattern, re.IGNORECASE), l2_cat)
                for pattern, l2_cat in rules
            ]

        self._compiled_exclusions = [
            (re.compile(pattern, re.IGNORECASE), reason)
            for pattern, reason in self.EXCLUSION_PATTERNS
        ]

    def _check_groq_enabled(self) -> bool:
        """Check if Groq API is available."""
        groq_key = os.environ.get("GROQ_API_KEY")
        if groq_key:
            return True
        logger.warning("GROQ_API_KEY not set - L2 classification will use Tier1 only")
        return False

    def classify(
        self,
        product_name: str,
        l1_category: str,
        brand: Optional[str] = None,
        skip_groq: bool = False,
        force_refresh: bool = False,
    ) -> L2ClassificationResult:
        """
        Classify a product into L2 subcategory.

        Args:
            product_name: Raw product name
            l1_category: L1 category (must be in L1_WITH_L2)
            brand: Optional detected brand (for context)
            skip_groq: Skip Groq LLM fallback
            force_refresh: Ignore cache

        Returns:
            L2ClassificationResult
        """
        start_time = time.time()
        cache_key = f"{l1_category}:{product_name.lower().strip()}"
        if brand:
            cache_key = f"{cache_key}:{brand.lower().strip()}"

        # Check cache
        if self._use_cache and not force_refresh and cache_key in self._cache:
            cached = self._cache[cache_key]
            # Return copy with updated source
            return L2ClassificationResult(
                l2_category=cached.l2_category,
                confidence=cached.confidence,
                source=L2ClassificationSource.CACHE,
                matched_term=cached.matched_term,
                arbiter_reason=cached.arbiter_reason,
                product_name=product_name,
                l1_category=l1_category,
                processing_time_ms=int((time.time() - start_time) * 1000),
            )

        # Check if L1 category has L2 subcategories
        if not has_l2_subcategories(l1_category):
            result = L2ClassificationResult(
                l2_category=None,
                confidence=1.0,
                source=L2ClassificationSource.NOT_APPLICABLE,
                arbiter_reason=f"L1 category '{l1_category}' has no L2 subcategories",
                product_name=product_name,
                l1_category=l1_category,
                processing_time_ms=int((time.time() - start_time) * 1000),
            )
            return result

        # Check exclusions (accessories, containers, etc.)
        exclusion = self._check_exclusions(product_name)
        if exclusion:
            pattern, reason = exclusion
            result = L2ClassificationResult(
                l2_category=None,
                confidence=self.EXCLUDED_CONFIDENCE,
                source=L2ClassificationSource.EXCLUDED,
                matched_term=pattern,
                arbiter_reason=f"EXCLUDED ({reason}): {product_name[:30]}",
                product_name=product_name,
                l1_category=l1_category,
                processing_time_ms=int((time.time() - start_time) * 1000),
            )
            if self._use_cache:
                self._cache[cache_key] = result
            return result

        # TIER 1: Keyword-based classification
        tier1_match = self._match_tier1(product_name, l1_category)
        if tier1_match:
            l2_category, matched_term = tier1_match
            result = L2ClassificationResult(
                l2_category=l2_category,
                confidence=self.TIER1_CONFIDENCE,
                source=L2ClassificationSource.TIER1_KEYWORDS,
                matched_term=matched_term,
                arbiter_reason=f"TIER1: {matched_term} -> {l2_category}",
                product_name=product_name,
                l1_category=l1_category,
                processing_time_ms=int((time.time() - start_time) * 1000),
            )
            if self._use_cache:
                self._cache[cache_key] = result
            return result

        # TIER 2: Groq LLM fallback
        if not skip_groq and self._groq_enabled:
            groq_result = self._classify_with_groq(product_name, l1_category, brand)
            if groq_result:
                l2_category, confidence, reason = groq_result
                result = L2ClassificationResult(
                    l2_category=l2_category,
                    confidence=min(confidence, self.TIER2_CONFIDENCE),  # Cap at 0.70
                    source=L2ClassificationSource.GROQ,
                    arbiter_reason=reason,
                    product_name=product_name,
                    l1_category=l1_category,
                    processing_time_ms=int((time.time() - start_time) * 1000),
                )
                if self._use_cache:
                    self._cache[cache_key] = result
                return result

        # No match - return None L2 (will need manual review or default)
        result = L2ClassificationResult(
            l2_category=None,
            confidence=0.0,
            source=L2ClassificationSource.TIER1_KEYWORDS,  # Attempted but failed
            arbiter_reason="No L2 match found",
            product_name=product_name,
            l1_category=l1_category,
            processing_time_ms=int((time.time() - start_time) * 1000),
        )
        if self._use_cache:
            self._cache[cache_key] = result
        return result

    def _check_exclusions(self, product_name: str) -> Optional[Tuple[str, str]]:
        """Check if product matches exclusion patterns."""
        for pattern, reason in self._compiled_exclusions:
            if pattern.search(product_name):
                return (pattern.pattern, reason)
        return None

    def _match_tier1(
        self, product_name: str, l1_category: str
    ) -> Optional[Tuple[str, str]]:
        """
        Match product against Tier 1 keyword rules.

        Returns (l2_category, matched_term) or None.
        """
        rules = self._compiled_rules.get(l1_category, [])
        if not rules:
            return None

        for pattern, l2_category in rules:
            match = pattern.search(product_name)
            if match:
                return (l2_category, match.group(0))

        return None

    def _classify_with_groq(
        self,
        product_name: str,
        l1_category: str,
        brand: Optional[str] = None,
    ) -> Optional[Tuple[str, float, str]]:
        """
        Classify using Groq LLM.

        Returns (l2_category, confidence, reason) or None.
        """
        try:
            from groq import Groq

            client = Groq()

            # Get valid L2 options for this L1
            l2_options = L1_TO_L2_CATEGORIES.get(l1_category, [])
            if not l2_options:
                return None

            l2_options_str = ", ".join(l2_options)

            prompt = f"""Clasifica el siguiente producto farmacéutico en una subcategoría L2.

Producto: {product_name}
Categoría L1: {l1_category}
{f"Marca detectada: {brand}" if brand else ""}

Opciones L2 válidas: {l2_options_str}

IMPORTANTE:
- Si es un accesorio, envase vacío o no es un producto real, responde: otros_no_clasificados
- Responde SOLO con el código L2 exacto (ej: solar_facial, descanso_estres)
- Si no estás seguro, responde: sin_clasificar

Respuesta (solo el código):"""

            response = client.chat.completions.create(
                model="llama-3.3-70b-versatile",
                messages=[{"role": "user", "content": prompt}],
                temperature=0.1,
                max_tokens=50,
            )

            l2_response = response.choices[0].message.content.strip().lower()

            # Validate response
            if l2_response in l2_options:
                return (l2_response, 0.70, f"GROQ: {l2_response}")
            elif l2_response in ("otros_no_clasificados", "sin_clasificar"):
                return None
            else:
                logger.warning(f"Invalid Groq L2 response: {l2_response}")
                return None

        except Exception as e:
            logger.error(f"Groq L2 classification error: {e}")
            return None

    def classify_batch(
        self,
        products: List[Tuple[str, str, Optional[str]]],
        skip_groq: bool = False,
        progress_callback: Optional[callable] = None,
    ) -> List[L2ClassificationResult]:
        """
        Classify a batch of products.

        Args:
            products: List of (product_name, l1_category, brand) tuples
            skip_groq: Skip Groq for all products
            progress_callback: Called with (current, total) after each product

        Returns:
            List of L2ClassificationResult
        """
        results = []
        total = len(products)

        for i, (name, l1_cat, brand) in enumerate(products):
            result = self.classify(name, l1_cat, brand=brand, skip_groq=skip_groq)
            results.append(result)

            if progress_callback:
                progress_callback(i + 1, total)

        return results

    def get_stats(self) -> Dict:
        """Get classification statistics from cache."""
        if not self._cache:
            return {"cached_products": 0}

        source_counts: Dict[str, int] = {}
        l2_counts: Dict[str, int] = {}
        total_confidence = 0.0
        classified_count = 0

        for result in self._cache.values():
            source_counts[result.source.value] = (
                source_counts.get(result.source.value, 0) + 1
            )
            if result.l2_category:
                l2_counts[result.l2_category] = (
                    l2_counts.get(result.l2_category, 0) + 1
                )
                total_confidence += result.confidence
                classified_count += 1

        return {
            "cached_products": len(self._cache),
            "classified_count": classified_count,
            "avg_confidence": (
                total_confidence / classified_count if classified_count > 0 else 0
            ),
            "source_distribution": source_counts,
            "top_l2_categories": sorted(
                l2_counts.items(), key=lambda x: -x[1]
            )[:10],
        }

    def clear_cache(self):
        """Clear the classification cache."""
        self._cache.clear()
        logger.info("L2 classification cache cleared")

    def health_check(self) -> Dict:
        """Check service health."""
        return {
            "status": "healthy",
            "groq_enabled": self._groq_enabled,
            "cached_products": len(self._cache),
            "l1_categories_supported": list(L1_WITH_L2),
            "total_l2_categories": sum(
                len(cats) for cats in L1_TO_L2_CATEGORIES.values()
            ),
        }


# Singleton
_service: Optional[SubcategoryClassifierService] = None


def get_subcategory_classifier() -> SubcategoryClassifierService:
    """Get singleton instance."""
    global _service
    if _service is None:
        _service = SubcategoryClassifierService()
    return _service
