# backend/app/api/admin_duplicates.py
"""
Admin endpoints for ProductCatalogVentaLibre duplicate management.

Issue #477: Admin UI para revisión de duplicados dudosos ProductCatalogVentaLibre

Endpoints:
- GET /duplicates/pending - Lista grupos de duplicados pendientes de revisión
- GET /duplicates/stats - Estadísticas de duplicados
- POST /duplicates/merge - Fusionar productos (seleccionar primario)
- POST /duplicates/reject - Marcar como productos diferentes
- POST /duplicates/skip - Posponer decisión
"""

import logging
import uuid
from datetime import datetime, timezone
from enum import Enum
from typing import Any, Dict, List, Optional

import structlog
from fastapi import APIRouter, Depends, HTTPException, Query
from fastapi import status as http_status
from pydantic import BaseModel, Field
from sqlalchemy import and_, func, or_, update
from sqlalchemy.orm import Session
from uuid import UUID


# === Enums for validated query parameters ===

class SortByOption(str, Enum):
    """Valid options for sorting duplicate groups."""
    similarity = "similarity"
    sales = "sales"
    date = "date"


class SortOrderOption(str, Enum):
    """Valid sort order options."""
    asc = "asc"
    desc = "desc"

from ..api.deps import require_permission
from ..core.subscription_limits import Permission
from ..database import get_db
from ..models import ProductCatalogVentaLibre, SalesEnrichment, User
from ..services.product_matching_service import ProductMatchingService

logger = logging.getLogger(__name__)
struct_logger = structlog.get_logger(__name__)

router = APIRouter(prefix="/admin/duplicates", tags=["admin-duplicates"])


# === Pydantic Schemas ===

class DuplicateProductInfo(BaseModel):
    """Information about a product in a duplicate group."""
    id: UUID
    product_name_display: str
    product_name_normalized: str
    cn_codes: List[str] = []
    ean13: Optional[str] = None
    total_sales_count: int = 0
    pharmacies_count: int = 0
    ml_category: Optional[str] = None
    ml_confidence: Optional[float] = None
    human_verified: bool = False
    first_seen_at: Optional[datetime] = None

    class Config:
        from_attributes = True


class DuplicateGroup(BaseModel):
    """A group of potentially duplicate products."""
    group_id: UUID
    products: List[DuplicateProductInfo]
    similarity_score: float = Field(ge=0.0, le=1.0, description="Similarity between products")
    tokens: str = Field(description="Normalized tokens used for matching")
    suggested_primary_id: UUID = Field(description="Recommended product to keep")
    created_at: Optional[datetime] = None


class DuplicatesListResponse(BaseModel):
    """Response for listing pending duplicates."""
    groups: List[DuplicateGroup]
    total_groups: int
    total_products_affected: int
    page: int
    page_size: int
    has_more: bool


class DuplicateStatsResponse(BaseModel):
    """Statistics about duplicates in the catalog."""
    total_with_tokens: int = Field(description="Products with fuzzy matching tokens")
    total_without_tokens: int = Field(description="Products pending token generation")
    pending_review: int = Field(description="Groups pending admin review")
    confirmed_different: int = Field(description="Groups marked as different")
    merged: int = Field(description="Products merged into others")
    coverage_pct: float = Field(description="Percentage with tokens")


class MergeRequest(BaseModel):
    """Request to merge products."""
    primary_id: UUID = Field(description="ID of the product to keep")
    secondary_ids: List[UUID] = Field(description="IDs of products to merge into primary")


class MergeResponse(BaseModel):
    """Response after merging products."""
    success: bool
    primary_id: UUID
    merged_count: int
    sales_redirected: int
    message: str


class RejectRequest(BaseModel):
    """Request to mark products as different (not duplicates)."""
    product_ids: List[UUID] = Field(description="IDs of products to mark as different")
    group_id: Optional[UUID] = Field(None, description="Group ID if rejecting entire group")


class RejectResponse(BaseModel):
    """Response after rejecting duplicate candidates."""
    success: bool
    products_updated: int
    message: str


class SkipRequest(BaseModel):
    """Request to skip/postpone a duplicate group."""
    group_id: UUID = Field(description="ID of the group to skip")


class SkipResponse(BaseModel):
    """Response after skipping a group."""
    success: bool
    message: str


class FilterOptionsResponse(BaseModel):
    """Available filter options for the duplicates UI."""
    brands: List[str] = Field(description="Available brands with duplicates")
    statuses: List[str] = Field(
        default=["pending_review", "confirmed_different", "merged"],
        description="Available status values"
    )


# === Search Schemas (Issue #475) ===

class SearchRequest(BaseModel):
    """Request to search for similar products."""
    query: str = Field(min_length=3, max_length=500, description="Product name to search for")
    threshold: float = Field(default=0.6, ge=0.0, le=1.0, description="Minimum similarity score")
    limit: int = Field(default=10, ge=1, le=50, description="Maximum results to return")
    exclude_id: Optional[UUID] = Field(None, description="Product ID to exclude from results")


class SearchResultItem(BaseModel):
    """A single search result with similarity info."""
    id: UUID
    product_name_display: str
    product_name_normalized: str
    similarity_score: float = Field(ge=0.0, le=1.0)
    match_method: str = Field(description="How the match was found: 'exact_tokens' or 'fuzzy'")
    cn_codes: List[str] = []
    ean13: Optional[str] = None
    ean_codes: List[str] = []
    ml_category: Optional[str] = None
    total_sales_count: int = 0
    pharmacies_count: int = 0

    class Config:
        from_attributes = True


class SearchResponse(BaseModel):
    """Response for duplicate search."""
    query: str
    results: List[SearchResultItem]
    total_found: int
    threshold_used: float


# === Helper Functions ===

def select_primary_product(products: List[ProductCatalogVentaLibre]) -> ProductCatalogVentaLibre:
    """
    Select the "primary" product from a group (the one to keep).

    Priority criteria:
    1. human_verified = True
    2. Has valid EAN13
    3. Has CN codes
    4. Higher total_sales_count
    5. Older (first_seen_at)
    """
    def score(p: ProductCatalogVentaLibre) -> tuple:
        return (
            p.human_verified or False,
            bool(p.ean13),
            len(p.cn_codes or []) > 0,
            p.total_sales_count or 0,
            -(p.first_seen_at.timestamp() if p.first_seen_at else 0)
        )

    return max(products, key=score)


def get_duplicate_groups(
    db: Session,
    status_filter: Optional[str] = "pending_review",
    limit: int = 20,
    offset: int = 0,
    brand_filter: Optional[str] = None,
    pharmacy_id_filter: Optional[UUID] = None,
    date_from_filter: Optional[str] = None,
    sort_by: SortByOption = SortByOption.similarity,
    sort_order: SortOrderOption = SortOrderOption.desc
) -> List[DuplicateGroup]:
    """
    Get groups of potential duplicates from the database.

    Groups are formed by products with identical tokens (100% match)
    or products explicitly grouped by duplicate_group_id.

    Filters:
    - brand_filter: Filter by detected_brand (case-insensitive partial match)
    - pharmacy_id_filter: Filter by pharmacy_id in pharmacy_ids_seen
    - date_from_filter: Filter by first_seen_at >= date (YYYY-MM-DD)

    Sorting:
    - similarity: By number of duplicates in group (default)
    - sales: By total_sales_count
    - date: By first_seen_at
    """
    groups = []

    # Strategy 1: Find products with identical tokens (exact duplicates)
    duplicate_tokens = (
        db.query(
            ProductCatalogVentaLibre.product_name_tokens,
            func.count(ProductCatalogVentaLibre.id).label('count'),
            func.sum(ProductCatalogVentaLibre.total_sales_count).label('total_sales'),
            func.min(ProductCatalogVentaLibre.first_seen_at).label('oldest_date')
        )
        .filter(ProductCatalogVentaLibre.is_active.is_(True))
        .filter(ProductCatalogVentaLibre.product_name_tokens.isnot(None))
        .filter(ProductCatalogVentaLibre.merged_into_id.is_(None))
    )

    # Apply status filter
    if status_filter == "pending_review":
        duplicate_tokens = duplicate_tokens.filter(
            or_(
                ProductCatalogVentaLibre.duplicate_review_status.is_(None),
                ProductCatalogVentaLibre.duplicate_review_status == 'pending_review'
            )
        )
    elif status_filter:
        duplicate_tokens = duplicate_tokens.filter(
            ProductCatalogVentaLibre.duplicate_review_status == status_filter
        )

    # Apply brand filter (case-insensitive partial match)
    if brand_filter:
        duplicate_tokens = duplicate_tokens.filter(
            func.lower(ProductCatalogVentaLibre.detected_brand).contains(brand_filter.lower())
        )

    # Apply pharmacy_id filter (check if pharmacy is in the ARRAY)
    if pharmacy_id_filter:
        duplicate_tokens = duplicate_tokens.filter(
            ProductCatalogVentaLibre.pharmacy_ids_seen.any(str(pharmacy_id_filter))
        )

    # Apply date filter
    if date_from_filter:
        try:
            date_obj = datetime.strptime(date_from_filter, "%Y-%m-%d")
            duplicate_tokens = duplicate_tokens.filter(
                ProductCatalogVentaLibre.first_seen_at >= date_obj
            )
        except ValueError:
            logger.warning(f"Invalid date format received: {date_from_filter}, expected YYYY-MM-DD")

    duplicate_tokens = duplicate_tokens.group_by(ProductCatalogVentaLibre.product_name_tokens)
    duplicate_tokens = duplicate_tokens.having(func.count(ProductCatalogVentaLibre.id) > 1)

    # Apply sorting
    if sort_by == SortByOption.sales:
        order_col = func.sum(ProductCatalogVentaLibre.total_sales_count)
    elif sort_by == SortByOption.date:
        order_col = func.min(ProductCatalogVentaLibre.first_seen_at)
    else:  # similarity (default) - more duplicates = higher priority
        order_col = func.count(ProductCatalogVentaLibre.id)

    if sort_order == SortOrderOption.asc:
        duplicate_tokens = duplicate_tokens.order_by(order_col.asc())
    else:
        duplicate_tokens = duplicate_tokens.order_by(order_col.desc())

    duplicate_tokens = (
        duplicate_tokens
        .offset(offset)
        .limit(limit)
        .all()
    )

    for row in duplicate_tokens:
        tokens = row[0]  # product_name_tokens
        # row[1] = count, row[2] = total_sales, row[3] = oldest_date (for sorting)

        products = (
            db.query(ProductCatalogVentaLibre)
            .filter(ProductCatalogVentaLibre.product_name_tokens == tokens)
            .filter(ProductCatalogVentaLibre.is_active.is_(True))
            .filter(ProductCatalogVentaLibre.merged_into_id.is_(None))
            .order_by(ProductCatalogVentaLibre.total_sales_count.desc())
            .all()
        )

        if len(products) > 1:
            primary = select_primary_product(products)

            # Generate or use existing group_id
            group_id = products[0].duplicate_group_id
            if not group_id:
                group_id = uuid.uuid4()
                # Update products with group_id
                for p in products:
                    p.duplicate_group_id = group_id
                    if p.duplicate_review_status is None:
                        p.duplicate_review_status = 'pending_review'
                db.commit()

            groups.append(DuplicateGroup(
                group_id=group_id,
                products=[DuplicateProductInfo.model_validate(p) for p in products],
                similarity_score=1.0,  # Exact token match
                tokens=tokens,
                suggested_primary_id=primary.id,
                created_at=min(p.first_seen_at for p in products if p.first_seen_at) if any(p.first_seen_at for p in products) else None
            ))

    return groups


# === Endpoints ===

@router.get("/pending", response_model=DuplicatesListResponse)
async def list_pending_duplicates(
    page: int = Query(1, ge=1, description="Page number"),
    page_size: int = Query(20, ge=1, le=100, description="Items per page"),
    status: str = Query("pending_review", description="Filter by status"),
    brand: Optional[str] = Query(None, description="Filter by detected brand"),
    pharmacy_id: Optional[UUID] = Query(None, description="Filter by pharmacy ID"),
    date_from: Optional[str] = Query(None, description="Filter by detection date (YYYY-MM-DD)"),
    sort_by: SortByOption = Query(SortByOption.similarity, description="Sort by: similarity, sales, date"),
    sort_order: SortOrderOption = Query(SortOrderOption.desc, description="Sort order: asc, desc"),
    db: Session = Depends(get_db),
    current_user: User = Depends(require_permission(Permission.ADMIN_CATALOG_MANAGE))
):
    """
    List groups of potential duplicates pending admin review.

    Returns paginated list of duplicate groups with:
    - Product details for comparison
    - Suggested primary product
    - Similarity score

    Filters:
    - brand: Filter by detected brand (case-insensitive partial match)
    - pharmacy_id: Filter groups containing products from this pharmacy
    - date_from: Filter by first seen date (YYYY-MM-DD)

    Sorting:
    - similarity: By number of duplicates in group (default)
    - sales: By total sales count
    - date: By first seen date
    """
    offset = (page - 1) * page_size

    try:
        groups = get_duplicate_groups(
            db,
            status_filter=status,
            limit=page_size + 1,  # +1 to check if there are more
            offset=offset,
            brand_filter=brand,
            pharmacy_id_filter=pharmacy_id,
            date_from_filter=date_from,
            sort_by=sort_by,
            sort_order=sort_order
        )

        has_more = len(groups) > page_size
        if has_more:
            groups = groups[:page_size]

        total_products = sum(len(g.products) for g in groups)

        # Get total count for pagination (apply same filters as main query)
        total_query = (
            db.query(func.count(func.distinct(ProductCatalogVentaLibre.product_name_tokens)))
            .filter(ProductCatalogVentaLibre.is_active.is_(True))
            .filter(ProductCatalogVentaLibre.product_name_tokens.isnot(None))
            .filter(ProductCatalogVentaLibre.merged_into_id.is_(None))
        )

        if status == "pending_review":
            total_query = total_query.filter(
                or_(
                    ProductCatalogVentaLibre.duplicate_review_status.is_(None),
                    ProductCatalogVentaLibre.duplicate_review_status == 'pending_review'
                )
            )

        # Apply same filters as main query for consistent count
        if brand:
            total_query = total_query.filter(
                func.lower(ProductCatalogVentaLibre.detected_brand).contains(brand.lower())
            )
        if date_from:
            try:
                date_obj = datetime.strptime(date_from, "%Y-%m-%d")
                total_query = total_query.filter(
                    ProductCatalogVentaLibre.first_seen_at >= date_obj
                )
            except ValueError:
                pass  # Already warned in get_duplicate_groups

        # Count groups with duplicates (count > 1)
        # This is an approximation based on current page; actual count requires complex subquery
        total_groups = len(groups) + offset if not has_more else offset + page_size + 1

        return DuplicatesListResponse(
            groups=groups,
            total_groups=total_groups,
            total_products_affected=total_products,
            page=page,
            page_size=page_size,
            has_more=has_more
        )

    except Exception as e:
        logger.exception("Error listing pending duplicates")
        raise HTTPException(
            status_code=http_status.HTTP_500_INTERNAL_SERVER_ERROR,
            detail="Error retrieving duplicates. Please try again or contact support."
        )


@router.get("/stats", response_model=DuplicateStatsResponse)
async def get_duplicate_stats(
    db: Session = Depends(get_db),
    current_user: User = Depends(require_permission(Permission.ADMIN_CATALOG_MANAGE))
):
    """
    Get statistics about duplicates in the VentaLibre catalog.
    """
    try:
        matching_service = ProductMatchingService(db)
        base_stats = matching_service.get_duplicate_statistics()

        # Count by status
        pending = (
            db.query(func.count(ProductCatalogVentaLibre.id))
            .filter(ProductCatalogVentaLibre.is_active.is_(True))
            .filter(
                or_(
                    ProductCatalogVentaLibre.duplicate_review_status.is_(None),
                    ProductCatalogVentaLibre.duplicate_review_status == 'pending_review'
                )
            )
            .filter(ProductCatalogVentaLibre.duplicate_group_id.isnot(None))
            .scalar()
        ) or 0

        confirmed_different = (
            db.query(func.count(ProductCatalogVentaLibre.id))
            .filter(ProductCatalogVentaLibre.duplicate_review_status == 'confirmed_different')
            .scalar()
        ) or 0

        merged = (
            db.query(func.count(ProductCatalogVentaLibre.id))
            .filter(ProductCatalogVentaLibre.merged_into_id.isnot(None))
            .scalar()
        ) or 0

        return DuplicateStatsResponse(
            total_with_tokens=base_stats["total_with_tokens"],
            total_without_tokens=base_stats["total_without_tokens"],
            pending_review=pending,
            confirmed_different=confirmed_different,
            merged=merged,
            coverage_pct=base_stats["coverage_pct"]
        )

    except Exception as e:
        logger.exception("Error getting duplicate stats")
        raise HTTPException(
            status_code=http_status.HTTP_500_INTERNAL_SERVER_ERROR,
            detail="Error retrieving statistics. Please try again or contact support."
        )


@router.get("/filter-options", response_model=FilterOptionsResponse)
async def get_filter_options(
    db: Session = Depends(get_db),
    current_user: User = Depends(require_permission(Permission.ADMIN_CATALOG_MANAGE))
):
    """
    Get available filter options for the duplicates UI.

    Returns distinct brands that have products with duplicate tokens.
    """
    try:
        # Get distinct brands from products that have duplicate tokens
        brands_query = (
            db.query(func.distinct(ProductCatalogVentaLibre.detected_brand))
            .filter(ProductCatalogVentaLibre.is_active.is_(True))
            .filter(ProductCatalogVentaLibre.product_name_tokens.isnot(None))
            .filter(ProductCatalogVentaLibre.detected_brand.isnot(None))
            .filter(ProductCatalogVentaLibre.merged_into_id.is_(None))
            .order_by(ProductCatalogVentaLibre.detected_brand)
            .all()
        )

        brands = [b[0] for b in brands_query if b[0]]

        return FilterOptionsResponse(
            brands=brands,
            statuses=["pending_review", "confirmed_different", "merged"]
        )

    except Exception as e:
        logger.exception("Error getting filter options")
        raise HTTPException(
            status_code=http_status.HTTP_500_INTERNAL_SERVER_ERROR,
            detail="Error retrieving filter options. Please try again or contact support."
        )


@router.post("/merge", response_model=MergeResponse)
async def merge_products(
    request: MergeRequest,
    db: Session = Depends(get_db),
    current_user: User = Depends(require_permission(Permission.ADMIN_CATALOG_MANAGE))
):
    """
    Merge secondary products into a primary product.

    This operation:
    1. Combines product_name_variants from all products
    2. Combines cn_codes from all products
    3. Combines pharmacy_ids_seen
    4. Redirects all SalesEnrichment references to primary
    5. Marks secondary products as merged (is_active=False)
    """
    try:
        # Validate primary exists and is active
        primary = db.query(ProductCatalogVentaLibre).filter(
            ProductCatalogVentaLibre.id == request.primary_id,
            ProductCatalogVentaLibre.is_active.is_(True)
        ).first()

        if not primary:
            raise HTTPException(
                status_code=http_status.HTTP_404_NOT_FOUND,
                detail=f"Primary product {request.primary_id} not found or inactive"
            )

        # Validate all secondary products exist
        secondaries = db.query(ProductCatalogVentaLibre).filter(
            ProductCatalogVentaLibre.id.in_(request.secondary_ids),
            ProductCatalogVentaLibre.is_active.is_(True)
        ).all()

        if len(secondaries) != len(request.secondary_ids):
            found_ids = {s.id for s in secondaries}
            missing = [str(sid) for sid in request.secondary_ids if sid not in found_ids]
            raise HTTPException(
                status_code=http_status.HTTP_404_NOT_FOUND,
                detail=f"Secondary products not found or inactive: {', '.join(missing)}"
            )

        total_sales_redirected = 0

        for secondary in secondaries:
            # 1. Combine variants
            current_variants = list(primary.product_name_variants or [])
            if secondary.product_name_display not in current_variants:
                current_variants.append(secondary.product_name_display)
            for variant in (secondary.product_name_variants or []):
                if variant not in current_variants and len(current_variants) < 20:
                    current_variants.append(variant)
            primary.product_name_variants = current_variants

            # 2. Combine CN codes
            current_cns = list(primary.cn_codes or [])
            for cn in (secondary.cn_codes or []):
                if cn not in current_cns:
                    current_cns.append(cn)
            primary.cn_codes = current_cns

            # 3. Combine pharmacy_ids_seen
            current_pharmacies = list(primary.pharmacy_ids_seen or [])
            for pharm_id in (secondary.pharmacy_ids_seen or []):
                if pharm_id not in current_pharmacies:
                    current_pharmacies.append(pharm_id)
            primary.pharmacy_ids_seen = current_pharmacies
            primary.pharmacies_count = len(current_pharmacies)

            # 4. Use EAN if primary doesn't have one
            if not primary.ean13 and secondary.ean13:
                primary.ean13 = secondary.ean13

            # 5. Sum sales count
            primary.total_sales_count = (primary.total_sales_count or 0) + (secondary.total_sales_count or 0)

            # 6. Redirect SalesEnrichment
            sales_count = (
                db.query(func.count(SalesEnrichment.id))
                .filter(SalesEnrichment.venta_libre_product_id == secondary.id)
                .scalar()
            ) or 0

            if sales_count > 0:
                db.execute(
                    update(SalesEnrichment)
                    .where(SalesEnrichment.venta_libre_product_id == secondary.id)
                    .values(venta_libre_product_id=primary.id)
                )
                total_sales_redirected += sales_count

            # 7. Mark secondary as merged
            secondary.is_active = False
            secondary.merged_into_id = primary.id
            secondary.merged_at = datetime.now(timezone.utc)
            secondary.merged_by = current_user.id
            secondary.duplicate_review_status = 'merged'

        # Update primary status
        primary.duplicate_review_status = None  # No longer in review
        primary.duplicate_group_id = None

        db.commit()

        struct_logger.info(
            "products_merged",
            primary_id=str(primary.id),
            merged_count=len(secondaries),
            sales_redirected=total_sales_redirected,
            user_id=str(current_user.id)
        )

        return MergeResponse(
            success=True,
            primary_id=primary.id,
            merged_count=len(secondaries),
            sales_redirected=total_sales_redirected,
            message=f"Successfully merged {len(secondaries)} products into primary. {total_sales_redirected} sales redirected."
        )

    except HTTPException:
        raise
    except Exception as e:
        db.rollback()
        logger.exception("Error merging products")
        raise HTTPException(
            status_code=http_status.HTTP_500_INTERNAL_SERVER_ERROR,
            detail="Error merging products. Please try again or contact support."
        )


@router.post("/reject", response_model=RejectResponse)
async def reject_as_different(
    request: RejectRequest,
    db: Session = Depends(get_db),
    current_user: User = Depends(require_permission(Permission.ADMIN_CATALOG_MANAGE))
):
    """
    Mark products as different (not duplicates).

    This prevents them from being suggested as duplicates again.
    """
    try:
        products = db.query(ProductCatalogVentaLibre).filter(
            ProductCatalogVentaLibre.id.in_(request.product_ids),
            ProductCatalogVentaLibre.is_active.is_(True)
        ).all()

        if not products:
            raise HTTPException(
                status_code=http_status.HTTP_404_NOT_FOUND,
                detail="No active products found with provided IDs"
            )

        for product in products:
            product.duplicate_review_status = 'confirmed_different'
            product.duplicate_group_id = None  # Remove from group

        db.commit()

        struct_logger.info(
            "duplicates_rejected",
            product_ids=[str(p.id) for p in products],
            user_id=str(current_user.id)
        )

        return RejectResponse(
            success=True,
            products_updated=len(products),
            message=f"Marked {len(products)} products as different (not duplicates)"
        )

    except HTTPException:
        raise
    except Exception as e:
        db.rollback()
        logger.exception("Error rejecting duplicates")
        raise HTTPException(
            status_code=http_status.HTTP_500_INTERNAL_SERVER_ERROR,
            detail="Error rejecting duplicates. Please try again or contact support."
        )


@router.post("/skip", response_model=SkipResponse)
async def skip_group(
    request: SkipRequest,
    db: Session = Depends(get_db),
    current_user: User = Depends(require_permission(Permission.ADMIN_CATALOG_MANAGE))
):
    """
    Skip/postpone decision on a duplicate group.

    The group will remain in the queue but won't appear at the top.
    """
    try:
        products = db.query(ProductCatalogVentaLibre).filter(
            ProductCatalogVentaLibre.duplicate_group_id == request.group_id,
            ProductCatalogVentaLibre.is_active.is_(True)
        ).all()

        if not products:
            raise HTTPException(
                status_code=http_status.HTTP_404_NOT_FOUND,
                detail=f"No products found with group ID {request.group_id}"
            )

        # Just update the group_id to push it down in the queue
        # Could also add a "skipped_at" field for more control
        new_group_id = uuid.uuid4()
        for product in products:
            product.duplicate_group_id = new_group_id

        db.commit()

        return SkipResponse(
            success=True,
            message=f"Skipped group with {len(products)} products"
        )

    except HTTPException:
        raise
    except Exception as e:
        db.rollback()
        logger.exception("Error skipping group")
        raise HTTPException(
            status_code=http_status.HTTP_500_INTERNAL_SERVER_ERROR,
            detail="Error skipping group. Please try again or contact support."
        )


@router.post("/search", response_model=SearchResponse)
async def search_similar_products(
    request: SearchRequest,
    db: Session = Depends(get_db),
    current_user: User = Depends(require_permission(Permission.ADMIN_CATALOG_MANAGE))
):
    """
    Search for products similar to a given query.

    Issue #475: Manual duplicate search functionality.

    Uses fuzzy matching to find products with similar names.
    Useful for:
    - Finding potential duplicates of a specific product
    - Validating before manual merge
    - Investigating reported duplicates
    """
    try:
        matching_service = ProductMatchingService(db)

        # Use the existing find_similar_products method
        matches = matching_service.find_similar_products(
            product_name=request.query,
            threshold=request.threshold,
            limit=request.limit,
            exclude_id=request.exclude_id
        )

        # Convert MatchResult to SearchResultItem
        # MatchResult has: .product (full object), .similarity (float), .match_type (str)
        results = []
        for match in matches:
            product = match.product  # Direct access from MatchResult
            if product and product.is_active:
                results.append(SearchResultItem(
                    id=product.id,
                    product_name_display=product.product_name_display or "",
                    product_name_normalized=product.product_name_normalized or "",
                    similarity_score=match.similarity,  # Was match.similarity_score
                    match_method=match.match_type,  # Was match.match_method
                    cn_codes=product.cn_codes or [],
                    ean13=product.ean13,
                    ean_codes=product.ean_codes or [],
                    ml_category=product.ml_category,
                    total_sales_count=product.total_sales_count or 0,
                    pharmacies_count=product.pharmacies_count or 0
                ))

        struct_logger.info(
            "duplicate_search_completed",
            query=request.query[:50],
            threshold=request.threshold,
            results_found=len(results),
            user_id=str(current_user.id)
        )

        return SearchResponse(
            query=request.query,
            results=results,
            total_found=len(results),
            threshold_used=request.threshold
        )

    except Exception as e:
        logger.exception("Error searching for similar products")
        raise HTTPException(
            status_code=http_status.HTTP_500_INTERNAL_SERVER_ERROR,
            detail="Error searching for similar products. Please try again."
        )
