"""
Test Boolean Fix with Real Data

Test with odf_rankings data that has boolean fields.
"""

from services.mongo import MongoService
from services.json_loader import JSONLoader
from services.diff_engine import DiffEngine
from services.settings_service import SettingsService

def test_boolean_fix():
    """Test boolean comparison with real data."""
    print("=" * 80)
    print("Test Boolean Comparison Fix")
    print("=" * 80)
    
    settings_service = SettingsService()
    config = settings_service.config
    
    mongo_service = MongoService()
    if not mongo_service.connect():
        print("❌ Failed to connect")
        return
    
    loader = JSONLoader(
        config['json_sources']['base_path'],
        config['collections']
    )
    
    diff_engine = DiffEngine(config['collections'])
    
    # Test normalization directly
    print("\n1. Testing normalization function:")
    print("=" * 80)
    
    test_cases = [
        (True, True, "bool True vs bool True"),
        (False, False, "bool False vs bool False"),
        (True, "True", "bool True vs string 'True'"),
        ("True", True, "string 'True' vs bool True"),
        (False, "False", "bool False vs string 'False'"),
        ("False", False, "string 'False' vs bool False"),
        (1, 1.0, "int 1 vs float 1.0"),
        (180, 180.0, "int 180 vs float 180.0"),
        (1002236, 1002236, "int vs int (same)"),
        (1002236, 1002236.0, "int vs float (same value)"),
    ]
    
    all_passed = True
    
    for val1, val2, desc in test_cases:
        norm1 = diff_engine._normalize_value(val1)
        norm2 = diff_engine._normalize_value(val2)
        match = norm1 == norm2
        
        if match:
            print(f"✅ {desc}")
            print(f"   {val1} ({type(val1).__name__}) → {norm1}")
            print(f"   {val2} ({type(val2).__name__}) → {norm2}")
        else:
            print(f"❌ {desc}")
            print(f"   {val1} ({type(val1).__name__}) → {norm1}")
            print(f"   {val2} ({type(val2).__name__}) → {norm2}")
            all_passed = False
    
    print("\n2. Testing with real data:")
    print("=" * 80)
    
    # Load odf_rankings data
    collection_name = 'odf_rankings'
    source_name = 'ba_glf_2018'
    
    print(f"\nLoading {collection_name} from {source_name}...")
    json_records = loader.load_collection_from_source(collection_name, source_name)
    print(f"Loaded {len(json_records)} JSON records")
    
    collection = mongo_service.get_collection(collection_name)
    db_records = list(collection.find())
    print(f"Loaded {len(db_records)} MongoDB records")
    
    # Run comparison
    print("\nRunning comparison...")
    results = diff_engine.compare_collections(
        collection_name,
        json_records,
        db_records
    )
    
    print(f"\nResults:")
    print(f"  Exact matches: {results['summary']['exact_match_count']}")
    print(f"  Duplicates: {results['summary']['duplicate_count']}")
    print(f"  New records: {results['summary']['new_record_count']}")
    
    if results['summary']['exact_match_count'] == len(json_records):
        print(f"\n✅ All {len(json_records)} records matched exactly!")
        print(f"   Boolean and numeric normalization is working correctly!")
    else:
        print(f"\n⚠️  Not all records matched")
    
    # Test with people data
    print("\n3. Testing with people data:")
    print("=" * 80)
    
    collection_name = 'people'
    
    print(f"\nLoading {collection_name} from {source_name}...")
    json_records = loader.load_collection_from_source(collection_name, source_name)
    print(f"Loaded {len(json_records)} JSON records")
    
    collection = mongo_service.get_collection(collection_name)
    db_records = list(collection.find())
    print(f"Loaded {len(db_records)} MongoDB records")
    
    # Run comparison
    print("\nRunning comparison...")
    results = diff_engine.compare_collections(
        collection_name,
        json_records,
        db_records
    )
    
    print(f"\nResults:")
    print(f"  Exact matches: {results['summary']['exact_match_count']}")
    print(f"  Duplicates: {results['summary']['duplicate_count']}")
    print(f"  New records: {results['summary']['new_record_count']}")
    
    # Check for boolean fields in duplicates
    if results['duplicates']:
        print(f"\n  Analyzing duplicates for boolean fields...")
        
        dup = results['duplicates'][0]
        json_rec = dup['json_record']
        db_rec = dup['db_record']
        comparison = dup.get('comparison', {})
        differences = comparison.get('differences', {})
        
        # Find boolean fields
        bool_fields = []
        for field, value in json_rec.items():
            if isinstance(value, bool) or (isinstance(value, str) and value in ('True', 'False')):
                bool_fields.append(field)
        
        print(f"  Boolean fields in record: {bool_fields}")
        
        # Check if any are marked as different
        bool_diffs = [f for f in bool_fields if f in differences]
        
        if bool_diffs:
            print(f"\n  ❌ Boolean fields incorrectly marked as different:")
            for field in bool_diffs:
                diff = differences[field]
                print(f"    {field}:")
                print(f"      JSON: {diff['json_value']} (type: {type(diff['json_value']).__name__})")
                print(f"      DB:   {diff['db_value']} (type: {type(diff['db_value']).__name__})")
        else:
            print(f"\n  ✅ No boolean fields marked as different!")
    
    print("\n" + "=" * 80)
    
    if all_passed:
        print("\n🎉 ALL TESTS PASSED!")
    else:
        print("\n⚠️  Some tests failed")
    
    print("=" * 80)


if __name__ == "__main__":
    try:
        test_boolean_fix()
    except Exception as e:
        print(f"\n❌ Error: {e}")
        import traceback
        traceback.print_exc()
