"""
JSON Loader Service

This module loads JSON records from the ODF directory structure.
It respects folder hierarchy and extracts records relevant to selected collections.

Key principles:
- NO normalization - preserve all fields as-is
- NO field removal - keep all keys
- Support multiple JSON files per collection
- Respect ODF folder structure
"""

import json
import os
from pathlib import Path
from typing import List, Dict, Any, Optional, Tuple
from services.json_flattener import JSONFlattener


class JSONLoader:
    """
    Service for loading JSON records from ODF directory structure.
    
    Loads JSON files while preserving all original fields and structure.
    All mappings are read from configuration.
    """
    
    # REMOVED: Document type validation mapping
    # NEW APPROACH: Validation is now based on folder structure, not document_type values.
    # Files in the correct folder (e.g., participants/) are automatically valid for that collection.
    # This makes the system flexible and independent of specific document_type values.
    # No need to maintain a hardcoded mapping that can become outdated.
    
    def __init__(self, base_path: str, collections_config: Dict[str, Any]):
        """
        Initialize JSON loader with base path and collections config.
        
        Args:
            base_path: Base path to JSON files (e.g., 'idf-json-sample')
            collections_config: Collections configuration from app_config.yaml
        """
        self.base_path = Path(base_path)
        self.collections_config = collections_config
        self.flattener = JSONFlattener()
    
    def get_available_sources(self) -> List[str]:
        """
        Get list of available JSON source directories.
        
        Returns:
            List of directory names (e.g., ['tokyo_2020', 'paris_2024'])
        """
        if not self.base_path.exists():
            return []
        
        sources = []
        for item in self.base_path.iterdir():
            if item.is_dir():
                sources.append(item.name)
        
        return sorted(sources)
    
    def get_collection_folder(self, collection_name: str) -> Optional[str]:
        """
        Get the ODF folder name for a collection from config.
        
        Args:
            collection_name: Name of the collection
            
        Returns:
            Folder name or None if not configured
        """
        collection_config = self.collections_config.get(collection_name, {})
        return collection_config.get('json_folder')
    
    def get_expected_document_types(self, collection_name: str) -> List[str]:
        """
        Get expected document types for a collection.
        
        NOTE: With folder-based validation, we no longer track specific document types.
        This method now returns an empty list, indicating validation is folder-based.
        
        Args:
            collection_name: Name of the collection
            
        Returns:
            Empty list (validation is now folder-based, not type-based)
        """
        return []
    
    def load_json_file(self, file_path: Path) -> Optional[Dict[str, Any]]:
        """
        Load and parse a single JSON file.
        
        Args:
            file_path: Path to JSON file
            
        Returns:
            Parsed JSON data or None if error
        """
        try:
            with open(file_path, 'r', encoding='utf-8') as f:
                return json.load(f)
        except Exception as e:
            print(f"Error loading {file_path}: {e}")
            return None
    
    def validate_document_type(self, json_data: Dict[str, Any], collection_name: str, file_path: Path) -> tuple[bool, str]:
        """
        Validate that JSON file is in the correct folder for the target collection.
        
        NEW APPROACH: Validation based on folder structure, not document_type.
        Files in the correct folder are automatically valid, regardless of document_type.
        This makes the system flexible and independent of specific document type values.
        
        Args:
            json_data: Parsed JSON data
            collection_name: Target collection name
            file_path: Path to JSON file (for validation)
            
        Returns:
            Tuple of (is_valid, error_message)
        """
        # Get the folder name from the collection config
        collection_config = self.collections_config.get(collection_name, {})
        expected_folder = collection_config.get('json_folder')
        
        if not expected_folder:
            # No folder mapping - skip validation
            return True, ""
        
        # Get the actual folder name from the file path
        # File path structure: base_path/source/folder/file.json
        # We need to check the parent directory name
        actual_folder = file_path.parent.name
        
        # Validate that file is in the correct folder
        if actual_folder != expected_folder:
            error_msg = (
                f"VALIDATION ERROR: File '{file_path.name}' is in folder '{actual_folder}' "
                f"but collection '{collection_name}' expects files from folder '{expected_folder}'. "
                f"This file should NOT be loaded into this collection."
            )
            return False, error_msg
        
        # File is in the correct folder - it's valid
        return True, ""
    
    def load_collection_from_source(
        self,
        collection_name: str,
        source_name: str
    ) -> List[Dict[str, Any]]:
        """
        Load all JSON records for a collection from a source directory.
        
        This method:
        1. Finds the appropriate ODF folder
        2. Loads all JSON files in that folder
        3. Flattens ALL nested fields recursively
        4. Returns flattened records with ALL fields preserved
        
        CRITICAL: NO fields are lost. ALL nested data is flattened.
        
        Args:
            collection_name: Name of the collection
            source_name: Name of the source directory (e.g., 'tokyo_2020')
            
        Returns:
            List of flattened records with ALL fields from JSON
        """
        folder_name = self.get_collection_folder(collection_name)
        
        if folder_name is None:
            return []
        
        # Build path: base_path / source_name / folder_name
        folder_path = self.base_path / source_name / folder_name
        
        if not folder_path.exists() or not folder_path.is_dir():
            return []
        
        all_records = []
        validation_errors = []
        skipped_files = []
        
        # Load all JSON files in the folder
        for file_path in folder_path.glob('*.json'):
            data = self.load_json_file(file_path)
            
            if data:
                # CRITICAL: Validate document type matches collection
                is_valid, error_msg = self.validate_document_type(data, collection_name, file_path)
                
                if not is_valid:
                    # Document type mismatch - skip this file
                    validation_errors.append(error_msg)
                    skipped_files.append(file_path.name)
                    print(f"⚠️ SKIPPED: {file_path.name} - Wrong document type for {collection_name}")
                    continue
                
                # Flatten the JSON structure completely
                # This extracts ALL fields at ANY depth
                flattened_records = self.flattener.flatten_json_file(data, extract_arrays=True)
                
                # Add metadata to each record
                for record in flattened_records:
                    record['_source_file'] = file_path.name
                    record['_source_path'] = str(file_path)
                    record['_source_competition'] = source_name
                    record['_target_collection'] = collection_name  # CRITICAL: Mark which collection this belongs to
                    record['_validation_passed'] = True  # Mark as validated
                    all_records.append(record)
        
        # Store validation results for UI display
        if validation_errors:
            print(f"\n⚠️ VALIDATION SUMMARY for {collection_name}:")
            print(f"   - Loaded: {len(all_records)} records")
            print(f"   - Skipped: {len(skipped_files)} files (wrong document type)")
            for error in validation_errors:
                print(f"   - {error}")
        
        return all_records
    
    def count_files_in_source(
        self,
        collection_name: str,
        source_name: str
    ) -> int:
        """
        Count JSON files for a collection in a source directory.
        
        Args:
            collection_name: Name of the collection
            source_name: Name of the source directory
            
        Returns:
            Number of JSON files found
        """
        folder_name = self.get_collection_folder(collection_name)
        
        if folder_name is None:
            return 0
        
        folder_path = self.base_path / source_name / folder_name
        
        if not folder_path.exists() or not folder_path.is_dir():
            return 0
        
        return len(list(folder_path.glob('*.json')))
    
    def get_validation_info(self, collection_name: str) -> Dict[str, Any]:
        """
        Get validation information for a collection.
        
        Args:
            collection_name: Name of the collection
            
        Returns:
            Dictionary with validation rules
        """
        collection_config = self.collections_config.get(collection_name, {})
        folder_name = collection_config.get('json_folder')
        
        return {
            'collection': collection_name,
            'expected_folder': folder_name,
            'validation_enabled': folder_name is not None,
            'description': f"Only files from folder '{folder_name}/' will be loaded" if folder_name else "No folder validation"
        }
    
    def get_collection_info(
        self,
        collection_name: str,
        source_name: str
    ) -> Dict[str, Any]:
        """
        Get information about a collection in a source.
        
        Args:
            collection_name: Name of the collection
            source_name: Name of the source directory
            
        Returns:
            Dictionary with collection info
        """
        folder_name = self.get_collection_folder(collection_name)
        
        if folder_name is None:
            return {
                'supported': False,
                'folder': None,
                'file_count': 0
            }
        
        folder_path = self.base_path / source_name / folder_name
        
        return {
            'supported': True,
            'folder': folder_name,
            'path': str(folder_path),
            'exists': folder_path.exists(),
            'file_count': self.count_files_in_source(collection_name, source_name)
        }