"""
Test Consolidation Logic

This script tests the new consolidation logic by:
1. Checking current state of people collection
2. Showing examples of records with codes array
3. Verifying no duplicates exist
"""

from services.mongo import MongoService
import json

def test_consolidation():
    """Test consolidation results."""
    print("=" * 80)
    print("Consolidation Test")
    print("=" * 80)
    
    # Initialize MongoDB service
    print("\n1. Connecting to MongoDB...")
    mongo_service = MongoService()
    
    if not mongo_service.connect():
        print("❌ Failed to connect to MongoDB")
        return False
    
    print("✅ Connected to MongoDB")
    
    people_collection = mongo_service.db['people']
    
    # Statistics
    print("\n2. Collection Statistics:")
    total = people_collection.count_documents({})
    with_codes = people_collection.count_documents({'codes': {'$exists': True}})
    with_code = people_collection.count_documents({'code': {'$exists': True}})
    
    print(f"   Total people: {total}")
    print(f"   With 'codes' array: {with_codes}")
    print(f"   With 'code' field: {with_code}")
    
    # Check for duplicates
    print("\n3. Checking for duplicates (same name + birthdate):")
    
    pipeline = [
        {
            '$match': {
                'given_name': {'$exists': True},
                'family_name': {'$exists': True}
            }
        },
        {
            '$group': {
                '_id': {
                    'given_name': {'$toLower': '$given_name'},
                    'family_name': {'$toLower': '$family_name'},
                    'birth_date': '$birth_date'
                },
                'count': {'$sum': 1},
                'ids': {'$push': '$_id'},
                'codes': {'$push': '$code'}
            }
        },
        {
            '$match': {
                'count': {'$gt': 1}
            }
        },
        {
            '$limit': 10
        }
    ]
    
    duplicates = list(people_collection.aggregate(pipeline))
    
    if duplicates:
        print(f"   ⚠️  Found {len(duplicates)} potential duplicates:")
        for dup in duplicates[:5]:
            identity = dup['_id']
            print(f"\n   - {identity['given_name'].title()} {identity['family_name'].title()} ({identity['birth_date']})")
            print(f"     Records: {dup['count']}")
            print(f"     Codes: {dup['codes']}")
    else:
        print("   ✅ No duplicates found!")
    
    # Show examples with codes array
    print("\n4. Examples of people with codes array:")
    
    people_with_codes = people_collection.find({'codes': {'$exists': True}}).limit(5)
    
    count = 0
    for person in people_with_codes:
        count += 1
        print(f"\n   {count}. {person.get('given_name', 'N/A')} {person.get('family_name', 'N/A')}")
        print(f"      Birth date: {person.get('birth_date', 'N/A')}")
        print(f"      Code: {person.get('code', 'N/A')}")
        print(f"      Codes array: {person.get('codes', [])}")
        print(f"      Organisation: {person.get('organisation', 'N/A')}")
    
    if count == 0:
        print("   ℹ️  No people with codes array found yet")
        print("   💡 Import will create codes array on next import")
    
    # Show field coverage
    print("\n5. Field Coverage Analysis:")
    
    sample_athlete = people_collection.find_one({
        'given_name': {'$exists': True},
        'family_name': {'$exists': True}
    })
    
    if sample_athlete:
        athlete_fields = set(sample_athlete.keys())
        print(f"   Sample athlete has {len(athlete_fields)} fields")
        
        sample_old = people_collection.find_one({
            'given_name': {'$exists': False}
        })
        
        if sample_old:
            old_fields = set(sample_old.keys())
            print(f"   Sample old record has {len(old_fields)} fields")
            
            missing_in_old = athlete_fields - old_fields
            missing_in_athlete = old_fields - athlete_fields
            
            if missing_in_old:
                print(f"\n   Fields in athletes but not in old records ({len(missing_in_old)}):")
                for field in sorted(list(missing_in_old)[:10]):
                    print(f"      - {field}")
            
            if missing_in_athlete:
                print(f"\n   Fields in old records but not in athletes ({len(missing_in_athlete)}):")
                for field in sorted(list(missing_in_athlete)[:10]):
                    print(f"      - {field}")
    
    print("\n" + "=" * 80)
    print("Test Complete")
    print("=" * 80)
    
    return True

if __name__ == "__main__":
    try:
        test_consolidation()
    except Exception as e:
        print(f"\n❌ Error: {e}")
        import traceback
        traceback.print_exc()
