"""Check people collection and test comparison."""
from services.mongo import MongoService
from services.json_loader import JSONLoader
from services.diff_engine import DiffEngine
from services.settings_service import SettingsService

s = SettingsService()
config = s.config

m = MongoService()
m.connect()
c = m.get_collection('people')

print(f"Total records: {c.count_documents({})}")
print(f"With 'code': {c.count_documents({'code': {'$exists': True, '$ne': None}})}")

# Load JSON and compare
loader = JSONLoader(config['json_sources']['base_path'], config['collections'])
diff = DiffEngine(config['collections'])

print("\nLoading JSON from tokyo_2020...")
json_records = loader.load_collection_from_source('people', 'tokyo_2020')
print(f"Loaded {len(json_records)} JSON records")

# Get only records with code from DB
print("\nLoading MongoDB records with code...")
db_records = list(c.find({'code': {'$exists': True}}))
print(f"Loaded {len(db_records)} MongoDB records with code")

print("\nComparing...")
results = diff.compare_collections('people', json_records, db_records)

print(f"\nResults:")
print(f"  Business keys: {results['matching_keys']}")
print(f"  Exact matches: {results['summary']['exact_match_count']}")
print(f"  Duplicates: {results['summary']['duplicate_count']}")
print(f"  New records: {results['summary']['new_record_count']}")

# Check why no matches - compare codes
print("\nSample JSON codes:")
json_codes = set()
for r in json_records[:10]:
    code = r.get('code')
    json_codes.add(code)
    print(f"  {code}")

print("\nSample DB codes:")
db_codes = set()
for r in db_records[:10]:
    from services.json_flattener import JSONFlattener
    f = JSONFlattener()
    flat = f.flatten(r)
    code = flat.get('code')
    db_codes.add(code)
    print(f"  {code}")

# Check overlap
all_json_codes = set(r.get('code') for r in json_records if r.get('code'))
all_db_codes = set()
f = JSONFlattener()
for r in db_records:
    flat = f.flatten(r)
    if flat.get('code'):
        all_db_codes.add(flat.get('code'))

overlap = all_json_codes & all_db_codes
print(f"\nJSON codes count: {len(all_json_codes)}")
print(f"DB codes count: {len(all_db_codes)}")
print(f"Overlapping codes: {len(overlap)}")

if overlap:
    print(f"Sample overlapping: {list(overlap)[:5]}")
