"""
Check Business Keys for ODF Collections

This script checks what fields should be used as business keys
for proper duplicate detection.
"""

from services.mongo import MongoService
import json

def check_business_keys():
    """Check business keys for odf_cumulative_results."""
    print("=" * 80)
    print("Business Keys Check for ODF Cumulative Results")
    print("=" * 80)
    
    # Initialize MongoDB service
    print("\n1. Connecting to MongoDB...")
    mongo_service = MongoService()
    
    if not mongo_service.connect():
        print("❌ Failed to connect to MongoDB")
        return False
    
    print("✅ Connected to MongoDB")
    
    # Get collection
    collection = mongo_service.db['odf_cumulative_results']
    
    # Get sample documents
    print("\n2. Analyzing sample documents...")
    docs = list(collection.find().limit(5))
    
    print(f"\n   Found {len(docs)} sample documents")
    
    for i, doc in enumerate(docs, 1):
        print(f"\n   Document {i}:")
        
        # competitor.code
        comp_code = doc.get('competitor', {}).get('code')
        print(f"     competitor.code: {comp_code}")
        
        # odf_body fields
        odf_body = doc.get('odf_body', {})
        print(f"     odf_body.competition_code: {odf_body.get('competition_code')}")
        print(f"     odf_body.document_code: {odf_body.get('document_code')}")
        
        # Check if there's athlete_code
        if 'athlete_code' in odf_body:
            print(f"     odf_body.athlete_code: {odf_body.get('athlete_code')}")
        else:
            print(f"     odf_body.athlete_code: NOT FOUND")
        
        # Check competitor.composition
        comp = doc.get('competitor', {})
        composition = comp.get('composition', {})
        if 'athlete' in composition:
            athletes = composition['athlete']
            if isinstance(athletes, list):
                print(f"     competitor.composition.athlete: LIST with {len(athletes)} athletes")
                for j, athlete in enumerate(athletes):
                    if isinstance(athlete, dict):
                        print(f"       [{j}] code: {athlete.get('code')}")
            elif isinstance(athletes, dict):
                print(f"     competitor.composition.athlete: DICT")
                print(f"       code: {athletes.get('code')}")
    
    # Check for unique combinations
    print("\n3. Checking uniqueness of different key combinations...")
    
    # Test 1: competitor.code + odf_body.competition_code
    pipeline1 = [
        {
            '$group': {
                '_id': {
                    'comp_code': '$competitor.code',
                    'competition': '$odf_body.competition_code'
                },
                'count': {'$sum': 1}
            }
        },
        {'$match': {'count': {'$gt': 1}}},
        {'$limit': 5}
    ]
    
    duplicates1 = list(collection.aggregate(pipeline1))
    print(f"\n   competitor.code + odf_body.competition_code:")
    if duplicates1:
        print(f"     ❌ Found {len(duplicates1)} duplicate combinations")
        for dup in duplicates1[:3]:
            print(f"       {dup['_id']} appears {dup['count']} times")
    else:
        print(f"     ✅ UNIQUE - No duplicates found!")
    
    # Test 2: odf_body.document_code
    pipeline2 = [
        {
            '$group': {
                '_id': '$odf_body.document_code',
                'count': {'$sum': 1}
            }
        },
        {'$match': {'count': {'$gt': 1}}},
        {'$limit': 5}
    ]
    
    duplicates2 = list(collection.aggregate(pipeline2))
    print(f"\n   odf_body.document_code:")
    if duplicates2:
        print(f"     ❌ Found {len(duplicates2)} duplicate values")
    else:
        print(f"     ✅ UNIQUE - No duplicates found!")
    
    # Recommendation
    print("\n" + "=" * 80)
    print("RECOMMENDATION")
    print("=" * 80)
    
    if not duplicates1:
        print("\n✅ Use these business keys:")
        print("   - competitor.code")
        print("   - odf_body.competition_code")
        print("\nThis combination uniquely identifies each record.")
    elif not duplicates2:
        print("\n✅ Use this business key:")
        print("   - odf_body.document_code")
        print("\nThis field uniquely identifies each record.")
    else:
        print("\n⚠️  Need to investigate further - no unique combination found")
    
    return True

if __name__ == "__main__":
    try:
        check_business_keys()
    except Exception as e:
        print(f"\n❌ Error: {e}")
        import traceback
        traceback.print_exc()
