"""
Check ODF Rankings Structure

This script inspects the structure of odf_rankings collection
to understand the actual field names.
"""

from services.mongo import MongoService
from services.json_flattener import JSONFlattener
import json

def check_rankings_structure():
    """Check the structure of odf_rankings."""
    print("=" * 80)
    print("ODF Rankings Structure Check")
    print("=" * 80)
    
    # Initialize MongoDB service
    print("\n1. Connecting to MongoDB...")
    mongo_service = MongoService()
    
    if not mongo_service.connect():
        print("❌ Failed to connect to MongoDB")
        return False
    
    print("✅ Connected to MongoDB")
    
    # Get collection
    collection = mongo_service.db['odf_rankings']
    
    # Count documents
    count = collection.count_documents({})
    print(f"\n2. Total documents in odf_rankings: {count}")
    
    if count == 0:
        print("❌ No documents found in collection")
        return False
    
    # Get sample document
    print("\n3. Fetching sample document...")
    doc = collection.find_one()
    
    # Show top-level fields
    print("\n4. Top-level fields:")
    for field in sorted(doc.keys()):
        if field != '_id':
            value = doc[field]
            value_type = type(value).__name__
            print(f"   - {field}: {value_type}")
    
    # Show competitor structure
    print("\n5. Competitor structure:")
    if 'competitor' in doc:
        competitor = doc['competitor']
        print(f"   Type: {type(competitor).__name__}")
        if isinstance(competitor, dict):
            for key, value in competitor.items():
                print(f"   - competitor.{key}: {type(value).__name__}")
    
    # Show odf_body structure
    print("\n6. ODF Body structure:")
    if 'odf_body' in doc:
        odf_body = doc['odf_body']
        print(f"   Type: {type(odf_body).__name__}")
        if isinstance(odf_body, dict):
            for key in sorted(odf_body.keys()):
                print(f"   - odf_body.{key}")
    
    # Flatten and show all fields with 'code'
    print("\n7. All flattened fields containing 'code':")
    flattener = JSONFlattener()
    flat = flattener.flatten(doc)
    
    for field in sorted(flat.keys()):
        if 'code' in field.lower():
            print(f"   - {field}: {flat[field]}")
    
    # Check for unique combinations
    print("\n8. Testing uniqueness of different key combinations...")
    
    # Test 1: competitor.code + odf_body.competition_code + rank
    pipeline1 = [
        {
            '$group': {
                '_id': {
                    'comp_code': '$competitor.code',
                    'competition': '$odf_body.competition_code',
                    'rank': '$rank'
                },
                'count': {'$sum': 1}
            }
        },
        {'$match': {'count': {'$gt': 1}}},
        {'$limit': 5}
    ]
    
    duplicates1 = list(collection.aggregate(pipeline1))
    print(f"\n   competitor.code + odf_body.competition_code + rank:")
    if duplicates1:
        print(f"     ❌ Found {len(duplicates1)} duplicate combinations")
    else:
        print(f"     ✅ UNIQUE - No duplicates found!")
    
    # Test 2: odf_body.document_code
    pipeline2 = [
        {
            '$group': {
                '_id': '$odf_body.document_code',
                'count': {'$sum': 1}
            }
        },
        {'$match': {'count': {'$gt': 1}}},
        {'$limit': 5}
    ]
    
    duplicates2 = list(collection.aggregate(pipeline2))
    print(f"\n   odf_body.document_code:")
    if duplicates2:
        print(f"     ❌ Found {len(duplicates2)} duplicate values")
    else:
        print(f"     ✅ UNIQUE - No duplicates found!")
    
    # Show sample document
    print("\n9. Sample document (first 800 chars):")
    del doc['_id']
    doc_str = json.dumps(doc, indent=2, default=str)
    print(doc_str[:800])
    
    # Recommendation
    print("\n" + "=" * 80)
    print("RECOMMENDATION")
    print("=" * 80)
    
    if not duplicates1:
        print("\n✅ Use these business keys:")
        print("   - competitor.code")
        print("   - odf_body.competition_code")
        print("   - rank")
    elif not duplicates2:
        print("\n✅ Use this business key:")
        print("   - odf_body.document_code")
    else:
        print("\n⚠️  Need to investigate further")
    
    return True

if __name__ == "__main__":
    try:
        check_rankings_structure()
    except Exception as e:
        print(f"\n❌ Error: {e}")
        import traceback
        traceback.print_exc()
