"""
Consolidate Athletes Script

This script consolidates duplicate athletes in the people collection
by merging records with the same name and birth date, storing all their
competition codes in a codes array.

Strategy:
- Group athletes by (given_name, family_name, birth_date)
- For each group, merge all codes into a single record
- Keep the most complete record as the base
"""

from services.mongo import MongoService
from collections import defaultdict
from datetime import datetime

def consolidate_athletes():
    """Consolidate duplicate athletes and merge their codes."""
    print("=" * 80)
    print("Consolidate Athletes - Merge Multiple Competition Codes")
    print("=" * 80)
    
    # Initialize MongoDB service
    print("\n1. Connecting to MongoDB...")
    mongo_service = MongoService()
    
    if not mongo_service.connect():
        print("❌ Failed to connect to MongoDB")
        return False
    
    print("✅ Connected to MongoDB")
    
    # Load all people records
    print("\n2. Loading people records...")
    people_collection = mongo_service.db['people']
    all_people = list(people_collection.find())
    
    print(f"✅ Found {len(all_people)} people records")
    
    # Group by identity (name + birth_date)
    print("\n3. Grouping athletes by identity (name + birth_date)...")
    
    identity_groups = defaultdict(list)
    
    for person in all_people:
        # Create identity key
        given_name = person.get('given_name', '').strip().lower()
        family_name = person.get('family_name', '').strip().lower()
        birth_date = person.get('birth_date', '')
        
        # Skip records without proper identity
        if not given_name or not family_name:
            continue
        
        identity_key = (given_name, family_name, birth_date)
        identity_groups[identity_key].append(person)
    
    # Find duplicates
    duplicates = {k: v for k, v in identity_groups.items() if len(v) > 1}
    
    print(f"✅ Found {len(identity_groups)} unique identities")
    print(f"⚠️  Found {len(duplicates)} identities with multiple records")
    
    if len(duplicates) == 0:
        print("\n✅ No duplicates found. All athletes are unique.")
        return True
    
    # Show examples
    print("\n📋 Examples of duplicate athletes:")
    for i, (identity, records) in enumerate(list(duplicates.items())[:5]):
        given, family, birth = identity
        codes = [r.get('code', 'N/A') for r in records]
        print(f"   {i+1}. {given.title()} {family.title()} ({birth})")
        print(f"      Codes: {', '.join(str(c) for c in codes)}")
        print(f"      Total records: {len(records)}")
    
    # Ask for confirmation
    print("\n" + "=" * 80)
    print("This script will:")
    print("1. Merge duplicate records for each athlete")
    print("2. Store all competition codes in a 'codes' array")
    print("3. Keep the most complete record as the base")
    print("4. Delete duplicate records")
    print("=" * 80)
    
    response = input("\nDo you want to proceed with consolidation? (yes/no): ")
    
    if response.lower() not in ['yes', 'y']:
        print("❌ Operation cancelled")
        return False
    
    # Consolidate duplicates
    print("\n4. Consolidating duplicate athletes...")
    
    consolidated_count = 0
    deleted_count = 0
    
    for identity, records in duplicates.items():
        try:
            # Collect all codes
            all_codes = set()
            for record in records:
                code = record.get('code')
                if code:
                    # Convert to string for consistency
                    code_str = str(int(code)) if isinstance(code, (int, float)) else str(code)
                    all_codes.add(code_str)
            
            # Find the most complete record (most fields filled)
            base_record = max(records, key=lambda r: sum(1 for v in r.values() if v not in [None, '', []]))
            
            # Update base record with codes array
            codes_list = sorted(list(all_codes))
            
            update_data = {
                'codes': codes_list,  # Array of all competition codes
                'code': codes_list[0] if codes_list else None,  # Primary code (first one)
                'consolidated_at': datetime.utcnow().isoformat(),
                'original_record_count': len(records)
            }
            
            # Update the base record
            people_collection.update_one(
                {'_id': base_record['_id']},
                {'$set': update_data}
            )
            
            # Delete other records
            other_ids = [r['_id'] for r in records if r['_id'] != base_record['_id']]
            if other_ids:
                people_collection.delete_many({'_id': {'$in': other_ids}})
                deleted_count += len(other_ids)
            
            consolidated_count += 1
            
            if consolidated_count % 10 == 0:
                print(f"   Processed {consolidated_count}/{len(duplicates)} identities...")
        
        except Exception as e:
            print(f"   ⚠️  Error consolidating {identity}: {e}")
            continue
    
    print(f"\n✅ Consolidated {consolidated_count} athletes")
    print(f"✅ Deleted {deleted_count} duplicate records")
    
    # Verify
    final_count = people_collection.count_documents({})
    print(f"\n📊 Final people count: {final_count}")
    
    # Show sample of consolidated records
    print("\n5. Sample of consolidated records:")
    sample = people_collection.find({'codes': {'$exists': True}}).limit(5)
    
    for i, person in enumerate(sample, 1):
        codes = person.get('codes', [])
        print(f"   {i}. {person.get('given_name', '')} {person.get('family_name', '')}")
        print(f"      Codes: {', '.join(codes)}")
        print(f"      Birth date: {person.get('birth_date', 'N/A')}")
    
    print("\n" + "=" * 80)
    print("SUCCESS! Athletes have been consolidated.")
    print("=" * 80)
    print("\nEach athlete now has:")
    print("- 'code': Primary competition code (first one)")
    print("- 'codes': Array of ALL competition codes they used")
    print("\nThis allows tracking the same person across different competitions!")
    
    return True

if __name__ == "__main__":
    try:
        consolidate_athletes()
    except Exception as e:
        print(f"\n❌ Error: {e}")
        import traceback
        traceback.print_exc()
