"""
Update Engine Service

This module handles selective field updates for duplicate records.
Users can choose which fields to update in existing MongoDB records.

Key features:
- Field-level update selection
- Preview updates before applying
- Support for nested field updates
- Preserve unchanged fields
"""

import pandas as pd
from typing import Dict, Any, List, Optional
from services.json_flattener import JSONFlattener


class UpdateEngine:
    """
    Engine for handling selective field updates on duplicate records.
    
    Allows users to choose which fields to update in MongoDB
    when JSON contains updated information for existing records.
    """
    
    def __init__(self):
        """Initialize update engine."""
        self.flattener = JSONFlattener()
    
    def _normalize_value(self, value):
        """
        Normalize a value for comparison (same as DiffEngine).
        
        Args:
            value: Value to normalize
            
        Returns:
            Normalized value
        """
        if value is None:
            return None
        
        # Boolean normalization (must check before int/float since bool is subclass of int)
        if isinstance(value, bool):
            return value
        
        # String boolean to actual boolean
        if isinstance(value, str) and value in ('True', 'False'):
            return value == 'True'
        
        # Numeric normalization (int/float)
        if isinstance(value, (int, float)):
            return float(value)
        
        return value
    
    def build_update_preview_dataframe(
        self,
        duplicates: List[Dict[str, Any]]
    ) -> pd.DataFrame:
        """
        Build a DataFrame for update preview with field selection.
        
        Args:
            duplicates: List of duplicate records with json_record and db_record
            
        Returns:
            DataFrame with columns:
            - _record_index: Index of the duplicate record
            - _db_id: MongoDB _id for update operation
            - field: Field name
            - status: 'new', 'different', 'missing_in_json', 'match'
            - json_value: Value from JSON
            - db_value: Value from MongoDB
            - _update_decision: 'skip', 'update' (user choice)
        """
        rows = []
        
        for idx, dup in enumerate(duplicates):
            json_record = dup['json_record']
            db_record = dup['db_record']
            db_id = db_record.get('_id')
            
            # Get all fields from both records (excluding metadata)
            json_fields = {k: v for k, v in json_record.items() if not k.startswith('_')}
            db_fields = {k: v for k, v in db_record.items() if not k.startswith('_')}
            
            all_fields = set(json_fields.keys()) | set(db_fields.keys())
            
            for field in sorted(all_fields):
                json_value = json_fields.get(field)
                db_value = db_fields.get(field)
                
                # Determine status
                if field in json_fields and field not in db_fields:
                    status = 'new'
                    default_decision = 'update'  # Default: add new fields
                elif field not in json_fields and field in db_fields:
                    status = 'missing_in_json'
                    default_decision = 'skip'  # Default: keep existing fields
                else:
                    # Normalize values for comparison (same as DiffEngine)
                    normalized_json = self._normalize_value(json_value)
                    normalized_db = self._normalize_value(db_value)
                    
                    if normalized_json != normalized_db:
                        status = 'different'
                        default_decision = 'skip'  # Default: don't overwrite (user must choose)
                    else:
                        status = 'match'
                        default_decision = 'skip'  # No need to update matching fields
                
                rows.append({
                    '_record_index': idx,
                    '_db_id': str(db_id) if db_id else None,
                    'field': field,
                    'status': status,
                    'json_value': str(json_value) if json_value is not None else '',
                    'db_value': str(db_value) if db_value is not None else '',
                    '_update_decision': default_decision
                })
        
        return pd.DataFrame(rows)
    
    def extract_update_decisions(
        self,
        preview_df: pd.DataFrame
    ) -> Dict[str, List[int]]:
        """
        Extract update decisions from preview DataFrame.
        
        Args:
            preview_df: Preview DataFrame with update decisions
            
        Returns:
            Dictionary with 'update' and 'skip' lists of row indices
        """
        decisions = {
            'update': [],
            'skip': []
        }
        
        for idx, row in preview_df.iterrows():
            decision = row.get('_update_decision', 'skip')
            if decision == 'update':
                decisions['update'].append(idx)
            else:
                decisions['skip'].append(idx)
        
        return decisions
    
    def prepare_update_operations(
        self,
        preview_df: pd.DataFrame,
        duplicates: List[Dict[str, Any]]
    ) -> List[Dict[str, Any]]:
        """
        Prepare MongoDB update operations based on user selections.
        
        Args:
            preview_df: Preview DataFrame with update decisions
            duplicates: Original duplicate records
            
        Returns:
            List of update operations for MongoDB bulk_write
            Each operation: {
                'filter': {'_id': ObjectId},
                'update': {'$set': {field: value, ...}},
                'record_index': int
            }
        """
        # Group updates by record
        updates_by_record = {}
        
        for idx, row in preview_df.iterrows():
            if row['_update_decision'] != 'update':
                continue
            
            record_idx = row['_record_index']
            field = row['field']
            json_value = row['json_value']
            
            if record_idx not in updates_by_record:
                updates_by_record[record_idx] = {
                    'db_id': row['_db_id'],
                    'fields': {}
                }
            
            # Convert string back to appropriate type
            # For now, keep as string - will unflatten later
            updates_by_record[record_idx]['fields'][field] = json_value
        
        # Build update operations
        operations = []
        
        for record_idx, update_info in updates_by_record.items():
            if not update_info['fields']:
                continue
            
            # Get original JSON record for unflattening
            json_record = duplicates[record_idx]['json_record']
            
            # Unflatten the fields to update
            fields_to_update = update_info['fields'].copy()
            
            # Remove metadata fields
            fields_to_update = {k: v for k, v in fields_to_update.items() if not k.startswith('_')}
            
            # Unflatten to nested structure
            unflattened_updates = self.flattener.unflatten(fields_to_update)
            
            operations.append({
                'filter': {'_id': update_info['db_id']},
                'update': {'$set': unflattened_updates},
                'record_index': record_idx
            })
        
        return operations
    
    def mark_all_for_update(
        self,
        df: pd.DataFrame,
        status_filter: Optional[List[str]] = None
    ) -> pd.DataFrame:
        """
        Mark all fields (or filtered fields) for update.
        
        Args:
            df: Preview DataFrame
            status_filter: Optional list of statuses to update (e.g., ['new', 'different'])
            
        Returns:
            Updated DataFrame
        """
        df = df.copy()
        
        if status_filter:
            mask = df['status'].isin(status_filter)
            df.loc[mask, '_update_decision'] = 'update'
        else:
            df['_update_decision'] = 'update'
        
        return df
    
    def mark_all_for_skip(
        self,
        df: pd.DataFrame
    ) -> pd.DataFrame:
        """
        Mark all fields to skip (no updates).
        
        Args:
            df: Preview DataFrame
            
        Returns:
            Updated DataFrame
        """
        df = df.copy()
        df['_update_decision'] = 'skip'
        return df
    
    def get_update_summary(
        self,
        preview_df: pd.DataFrame
    ) -> Dict[str, Any]:
        """
        Get summary of pending updates.
        
        Args:
            preview_df: Preview DataFrame with update decisions
            
        Returns:
            Dictionary with update statistics
        """
        # Count by status
        status_counts = preview_df['status'].value_counts().to_dict()
        
        # Count by decision
        decision_counts = preview_df['_update_decision'].value_counts().to_dict()
        
        # Count records affected
        records_to_update = preview_df[preview_df['_update_decision'] == 'update']['_record_index'].nunique()
        
        # Count fields to update
        fields_to_update = len(preview_df[preview_df['_update_decision'] == 'update'])
        
        return {
            'total_fields': len(preview_df),
            'fields_to_update': fields_to_update,
            'fields_to_skip': decision_counts.get('skip', 0),
            'records_to_update': records_to_update,
            'new_fields': status_counts.get('new', 0),
            'different_fields': status_counts.get('different', 0),
            'missing_fields': status_counts.get('missing_in_json', 0),
            'matching_fields': status_counts.get('match', 0)
        }