"""
Debug New Records Issue

Check why records are showing as new instead of exact matches.
"""

from services.mongo import MongoService
from services.json_loader import JSONLoader
from services.diff_engine import DiffEngine
from services.settings_service import SettingsService

def debug_new_records():
    """Debug why records show as new."""
    print("=" * 80)
    print("Debug New Records Issue")
    print("=" * 80)
    
    # Initialize services
    settings_service = SettingsService()
    config = settings_service.config
    
    mongo_service = MongoService()
    if not mongo_service.connect():
        print("❌ Failed to connect")
        return
    
    loader = JSONLoader(
        config['json_sources']['base_path'],
        config['collections']
    )
    
    diff_engine = DiffEngine(config['collections'])
    
    # Load data
    collection_name = 'odf_rankings'
    source_name = 'ba_glf_2018'
    
    print(f"\n1. Loading JSON from {source_name}...")
    json_records = loader.load_collection_from_source(collection_name, source_name)
    print(f"   Loaded {len(json_records)} JSON records")
    
    print(f"\n2. Loading MongoDB data...")
    collection = mongo_service.get_collection(collection_name)
    db_records = list(collection.find())
    print(f"   Loaded {len(db_records)} MongoDB records")
    
    # Run comparison
    print(f"\n3. Running comparison...")
    results = diff_engine.compare_collections(
        collection_name,
        json_records,
        db_records
    )
    
    print(f"\n4. Results:")
    print(f"   Exact matches: {results['summary']['exact_match_count']}")
    print(f"   Duplicates: {results['summary']['duplicate_count']}")
    print(f"   New records: {results['summary']['new_record_count']}")
    
    # Check business keys
    print(f"\n5. Business keys used: {results['matching_keys']}")
    
    # Sample comparison
    if results['new_records']:
        print(f"\n6. Sample 'new' record (should be a match):")
        new_rec_raw = results['new_records'][0]
        
        # Check if wrapped
        if isinstance(new_rec_raw, dict) and 'json_record' in new_rec_raw:
            new_rec = new_rec_raw['json_record']
            print(f"   (Record is wrapped in json_record)")
        else:
            new_rec = new_rec_raw
            print(f"   (Record is plain)")
        
        print(f"\n   JSON record business key values:")
        for key in results['matching_keys']:
            print(f"     {key}: {new_rec.get(key)}")
        
        # Find corresponding DB record
        print(f"\n   Looking for matching DB record...")
        comp_code = new_rec.get('competitor.code')
        comp_code_val = new_rec.get('odf_body.competition_code')
        rank_val = new_rec.get('rank')
        
        db_match = collection.find_one({
            'competitor.code': comp_code,
            'odf_body.competition_code': comp_code_val,
            'rank': rank_val
        })
        
        if db_match:
            print(f"   ✅ Found matching DB record!")
            print(f"   DB record business key values:")
            from services.json_flattener import JSONFlattener
            flattener = JSONFlattener()
            flat_db = flattener.flatten(db_match)
            
            for key in results['matching_keys']:
                print(f"     {key}: {flat_db.get(key)}")
            
            # Compare hashes
            import hashlib
            import json as json_lib
            
            json_clean = {k: v for k, v in new_rec.items() if not k.startswith('_')}
            db_clean = {k: v for k, v in flat_db.items() if not k.startswith('_')}
            
            print(f"\n   Field counts:")
            print(f"     JSON fields: {len(json_clean)}")
            print(f"     DB fields:   {len(db_clean)}")
            
            # Find fields only in JSON or only in DB
            json_only = set(json_clean.keys()) - set(db_clean.keys())
            db_only = set(db_clean.keys()) - set(json_clean.keys())
            
            if json_only:
                print(f"\n   Fields only in JSON ({len(json_only)}):")
                for key in sorted(list(json_only)[:10]):
                    print(f"     - {key}: {json_clean[key]}")
            
            if db_only:
                print(f"\n   Fields only in DB ({len(db_only)}):")
                for key in sorted(list(db_only)[:10]):
                    print(f"     - {key}: {db_clean[key]}")
            
            json_hash = hashlib.sha256(json_lib.dumps(json_clean, sort_keys=True, default=str).encode()).hexdigest()
            db_hash = hashlib.sha256(json_lib.dumps(db_clean, sort_keys=True, default=str).encode()).hexdigest()
            
            print(f"\n   Hash comparison:")
            print(f"     JSON hash: {json_hash[:16]}...")
            print(f"     DB hash:   {db_hash[:16]}...")
            print(f"     Match: {json_hash == db_hash}")
            
            if json_hash != db_hash:
                print(f"\n   ⚠️  Hashes don't match - records have different data")
                print(f"   This is why it shows as 'new' instead of 'exact match'")
                
                # Find differences
                all_keys = set(json_clean.keys()) | set(db_clean.keys())
                diffs = []
                type_only_diffs = []
                
                for key in sorted(all_keys):
                    json_val = json_clean.get(key)
                    db_val = db_clean.get(key)
                    
                    if json_val != db_val:
                        # Check if it's just a type difference
                        if str(json_val) == str(db_val) and type(json_val) != type(db_val):
                            type_only_diffs.append((key, json_val, db_val, type(json_val).__name__, type(db_val).__name__))
                        else:
                            diffs.append((key, json_val, db_val))
                
                if type_only_diffs:
                    print(f"\n   ⚠️  Type-only differences ({len(type_only_diffs)} fields):")
                    print(f"   (Values are same but types differ - this causes hash mismatch)")
                    for key, json_val, db_val, json_type, db_type in type_only_diffs[:10]:
                        print(f"     {key}: {json_val} ({json_type}) vs {db_val} ({db_type})")
                
                if diffs:
                    print(f"\n   Value differences ({len(diffs)} fields):")
                    for key, json_val, db_val in diffs[:10]:
                        print(f"     {key}:")
                        print(f"       JSON: {json_val} (type: {type(json_val).__name__})")
                        print(f"       DB:   {db_val} (type: {type(db_val).__name__})")
        else:
            print(f"   ❌ No matching DB record found!")
            print(f"   This means business keys are not matching correctly")
    
    print("\n" + "=" * 80)

if __name__ == "__main__":
    try:
        debug_new_records()
    except Exception as e:
        print(f"\n❌ Error: {e}")
        import traceback
        traceback.print_exc()
