"""
Test Update Preview DataFrame

Test that update preview correctly identifies fields to update.
"""

from services.mongo import MongoService
from services.json_loader import JSONLoader
from services.diff_engine import DiffEngine
from services.update_engine import UpdateEngine
from services.settings_service import SettingsService

def test_update_preview():
    """Test update preview DataFrame generation."""
    print("=" * 80)
    print("Test Update Preview DataFrame")
    print("=" * 80)
    
    settings_service = SettingsService()
    config = settings_service.config
    
    mongo_service = MongoService()
    if not mongo_service.connect():
        print("❌ Failed to connect")
        return
    
    loader = JSONLoader(
        config['json_sources']['base_path'],
        config['collections']
    )
    
    diff_engine = DiffEngine(config['collections'])
    update_engine = UpdateEngine()
    
    # Load people data
    collection_name = 'people'
    source_name = 'ba_glf_2018'
    
    print(f"\nLoading {collection_name} from {source_name}...")
    json_records = loader.load_collection_from_source(collection_name, source_name)
    print(f"Loaded {len(json_records)} JSON records")
    
    collection = mongo_service.get_collection(collection_name)
    db_records = list(collection.find())
    print(f"Loaded {len(db_records)} MongoDB records")
    
    # Run comparison
    print("\nRunning comparison...")
    results = diff_engine.compare_collections(
        collection_name,
        json_records,
        db_records
    )
    
    print(f"\nComparison Results:")
    print(f"  Exact matches: {results['summary']['exact_match_count']}")
    print(f"  Duplicates: {results['summary']['duplicate_count']}")
    print(f"  New records: {results['summary']['new_record_count']}")
    
    duplicates = results['duplicates']
    
    if not duplicates:
        print("\n⚠️  No duplicates found")
        return
    
    # Build update preview
    print(f"\nBuilding update preview for {len(duplicates)} duplicates...")
    preview_df = update_engine.build_update_preview_dataframe(duplicates)
    
    print(f"\nPreview DataFrame:")
    print(f"  Total rows (fields): {len(preview_df)}")
    print(f"  Unique records: {preview_df['_record_index'].nunique()}")
    
    # Count by status
    status_counts = preview_df['status'].value_counts()
    print(f"\nField status breakdown:")
    for status, count in status_counts.items():
        print(f"  {status}: {count}")
    
    # Count default decisions
    decision_counts = preview_df['_update_decision'].value_counts()
    print(f"\nDefault update decisions:")
    for decision, count in decision_counts.items():
        print(f"  {decision}: {count}")
    
    # Simulate "UPDATE ALL FIELDS" button
    print(f"\n" + "=" * 80)
    print("Simulating 'UPDATE ALL FIELDS' button...")
    print("=" * 80)
    
    updated_df = update_engine.mark_all_for_update(
        preview_df,
        status_filter=['new', 'different']
    )
    
    # Count records with updates
    records_with_updates = updated_df[updated_df['_update_decision'] == 'update']['_record_index'].nunique()
    total_fields_to_update = (updated_df['_update_decision'] == 'update').sum()
    
    print(f"\nAfter marking all for update:")
    print(f"  Records with fields to update: {records_with_updates}")
    print(f"  Total fields to update: {total_fields_to_update}")
    
    # Prepare operations
    print(f"\nPreparing update operations...")
    operations = update_engine.prepare_update_operations(updated_df, duplicates)
    
    print(f"\nUpdate operations prepared:")
    print(f"  Total operations (records to update): {len(operations)}")
    
    # Count fields per operation
    total_fields = sum(len(op['update']['$set']) for op in operations)
    print(f"  Total fields to update: {total_fields}")
    
    # Analysis
    print(f"\n" + "=" * 80)
    print("ANALYSIS")
    print("=" * 80)
    
    print(f"\nExpected: {len(duplicates)} records")
    print(f"Got:      {len(operations)} operations")
    
    if len(operations) < len(duplicates):
        records_without_updates = len(duplicates) - len(operations)
        print(f"\n⚠️  {records_without_updates} records have NO fields to update")
        print(f"   This means all their fields are either:")
        print(f"   - Status 'match' (identical values)")
        print(f"   - Status 'missing_in_json' (only in DB)")
        
        # Find records without updates
        records_with_ops = {op['record_index'] for op in operations}
        records_without_ops = [i for i in range(len(duplicates)) if i not in records_with_ops]
        
        if records_without_ops:
            print(f"\n   Sample records without updates (first 5):")
            for idx in records_without_ops[:5]:
                dup = duplicates[idx]
                comparison = dup.get('comparison', {})
                differences = comparison.get('differences', {})
                new_fields = comparison.get('new_fields', [])
                
                print(f"\n   Record #{idx}:")
                print(f"     New fields: {len(new_fields)}")
                print(f"     Different fields: {len([d for d in differences.values() if d['status'] == 'different'])}")
                print(f"     Total differences: {len(differences)}")
    else:
        print(f"\n✅ All {len(duplicates)} records have fields to update!")
    
    print("\n" + "=" * 80)


if __name__ == "__main__":
    try:
        test_update_preview()
    except Exception as e:
        print(f"\n❌ Error: {e}")
        import traceback
        traceback.print_exc()
