"""
Test Business Key Matching for Nested Fields

This script tests if the DiffEngine correctly matches records
using nested field names like competitor.code
"""

from services.mongo import MongoService
from services.json_loader import JSONLoader
from services.diff_engine import DiffEngine
from services.settings_service import SettingsService
import json

def test_matching():
    """Test business key matching with nested fields."""
    print("=" * 80)
    print("Business Key Matching Test")
    print("=" * 80)
    
    # Initialize services
    print("\n1. Initializing services...")
    settings_service = SettingsService()
    config = settings_service.config
    
    mongo_service = MongoService()
    if not mongo_service.connect():
        print("❌ Failed to connect to MongoDB")
        return False
    
    json_loader = JSONLoader(
        config['json_sources']['base_path'],
        config['collections']
    )
    
    diff_engine = DiffEngine(config['collections'])
    
    print("✅ Services initialized")
    
    # Load JSON data
    print("\n2. Loading JSON data for odf_cumulative_results...")
    collection_name = 'odf_cumulative_results'
    
    # Get available sources
    sources = json_loader.get_available_sources()
    print(f"   Available sources: {sources}")
    
    if not sources:
        print("❌ No JSON sources found")
        return False
    
    # Use first source
    source_name = sources[0]
    print(f"   Using source: {source_name}")
    
    json_records = json_loader.load_collection_from_source(
        collection_name,
        source_name
    )
    
    print(f"   Loaded {len(json_records)} JSON records")
    
    if not json_records:
        print("❌ No JSON records loaded")
        return False
    
    # Show sample JSON record keys
    print("\n3. Sample JSON record structure:")
    sample = json_records[0]
    print(f"   Keys containing 'competitor': {[k for k in sample.keys() if 'competitor' in k][:5]}")
    print(f"   Keys containing 'odf_body': {[k for k in sample.keys() if 'odf_body' in k][:5]}")
    
    # Check business keys
    print(f"\n   competitor.code: {sample.get('competitor.code', 'NOT FOUND')}")
    print(f"   odf_body.competition_code: {sample.get('odf_body.competition_code', 'NOT FOUND')}")
    
    # Load MongoDB data
    print("\n4. Loading MongoDB data...")
    collection = mongo_service.get_collection(collection_name)
    db_records = list(collection.find().limit(100))
    print(f"   Loaded {len(db_records)} MongoDB records")
    
    # Run comparison
    print("\n5. Running comparison...")
    results = diff_engine.compare_collections(
        collection_name,
        json_records,
        db_records
    )
    
    # Show results
    print("\n6. Comparison Results:")
    print(f"   JSON records: {results['json_count']}")
    print(f"   MongoDB records: {results['db_count']}")
    print(f"   Exact matches: {results['summary']['exact_match_count']}")
    print(f"   Duplicates (JSON vs DB): {results['summary']['duplicate_count']}")
    print(f"   New records: {results['summary']['new_record_count']}")
    
    # Show business keys used
    print(f"\n   Business keys used: {results['matching_keys']}")
    print(f"   Matching strategy: {results['matching_strategy']}")
    
    # Check if matching worked
    print("\n" + "=" * 80)
    if results['summary']['duplicate_count'] > 0:
        print("✅ SUCCESS! Business keys are working correctly!")
        print(f"   Found {results['summary']['duplicate_count']} duplicates (records in both JSON and DB)")
        
        # Show sample duplicate
        if results['duplicates']:
            dup = results['duplicates'][0]
            print(f"\n   Sample duplicate:")
            print(f"     Match method: {dup.get('match_method')}")
            print(f"     competitor.code: {dup['json_record'].get('competitor.code')}")
            print(f"     odf_body.competition_code: {dup['json_record'].get('odf_body.competition_code')}")
    elif results['summary']['exact_match_count'] > 0:
        print("✅ SUCCESS! Found exact matches!")
        print(f"   {results['summary']['exact_match_count']} records are identical in JSON and DB")
    elif results['summary']['new_record_count'] == results['json_count']:
        print("⚠️  WARNING! All records shown as NEW")
        print("   This means business keys are NOT matching correctly")
        print("\n   Debugging info:")
        print(f"   - Business keys: {results['matching_keys']}")
        
        # Check if keys exist in records
        sample_json = json_records[0]
        sample_db = db_records[0] if db_records else {}
        
        print(f"\n   JSON record has these keys:")
        for key in results['matching_keys']:
            value = sample_json.get(key)
            print(f"     {key}: {value if value is not None else 'NOT FOUND'}")
        
        print(f"\n   DB record has these keys:")
        for key in results['matching_keys']:
            value = sample_db.get(key)
            print(f"     {key}: {value if value is not None else 'NOT FOUND'}")
    
    print("=" * 80)
    
    return True

if __name__ == "__main__":
    try:
        test_matching()
    except Exception as e:
        print(f"\n❌ Error: {e}")
        import traceback
        traceback.print_exc()
