o
    ג[i^+                     @   sZ   d Z ddlZddlZddlmZ ddlmZmZmZm	Z	m
Z
 ddlmZ G dd dZdS )aZ  
JSON Loader Service

This module loads JSON records from the ODF directory structure.
It respects folder hierarchy and extracts records relevant to selected collections.

Key principles:
- NO normalization - preserve all fields as-is
- NO field removal - keep all keys
- Support multiple JSON files per collection
- Respect ODF folder structure
    N)Path)ListDictAnyOptionalTuple)JSONFlattenerc                
   @   s   e Zd ZdZdedeeef fddZdee fddZ	d	ede
e fd
dZd	edee fddZdede
eeef  fddZdeeef d	ededeeef fddZd	ededeeeef  fddZd	ededefddZd	edeeef fddZd	ededeeef fddZdS )
JSONLoaderz
    Service for loading JSON records from ODF directory structure.
    
    Loads JSON files while preserving all original fields and structure.
    All mappings are read from configuration.
    	base_pathcollections_configc                 C   s   t || _|| _t | _dS )z
        Initialize JSON loader with base path and collections config.
        
        Args:
            base_path: Base path to JSON files (e.g., 'idf-json-sample')
            collections_config: Collections configuration from app_config.yaml
        N)r   r
   r   r   	flattener)selfr
   r    r   0/var/www/html/IGF-ODF-V3/services/json_loader.py__init__#   s   
zJSONLoader.__init__returnc                 C   s>   | j  sg S g }| j  D ]}| r||j qt|S )z
        Get list of available JSON source directories.
        
        Returns:
            List of directory names (e.g., ['tokyo_2020', 'paris_2024'])
        )r
   existsiterdiris_dirappendnamesorted)r   sourcesitemr   r   r   get_available_sources/   s   
z JSONLoader.get_available_sourcescollection_namec                 C   s   | j |i }|dS )z
        Get the ODF folder name for a collection from config.
        
        Args:
            collection_name: Name of the collection
            
        Returns:
            Folder name or None if not configured
        json_folderr   get)r   r   collection_configr   r   r   get_collection_folder@   s   

z JSONLoader.get_collection_folderc                 C   s   g S )a  
        Get expected document types for a collection.
        
        NOTE: With folder-based validation, we no longer track specific document types.
        This method now returns an empty list, indicating validation is folder-based.
        
        Args:
            collection_name: Name of the collection
            
        Returns:
            Empty list (validation is now folder-based, not type-based)
        r   )r   r   r   r   r   get_expected_document_typesM   s   z&JSONLoader.get_expected_document_types	file_pathc              
   C   s|   z t |ddd}t|W  d   W S 1 sw   Y  W dS  ty= } ztd| d|  W Y d}~dS d}~ww )z
        Load and parse a single JSON file.
        
        Args:
            file_path: Path to JSON file
            
        Returns:
            Parsed JSON data or None if error
        rzutf-8)encodingNzError loading z: )openjsonload	Exceptionprint)r   r"   fer   r   r   load_json_file\   s   
(zJSONLoader.load_json_file	json_datac              	   C   s\   | j |i }|d}|sdS |jj}||kr,d|j d| d| d| d	}d|fS dS )	ab  
        Validate that JSON file is in the correct folder for the target collection.
        
        NEW APPROACH: Validation based on folder structure, not document_type.
        Files in the correct folder are automatically valid, regardless of document_type.
        This makes the system flexible and independent of specific document type values.
        
        Args:
            json_data: Parsed JSON data
            collection_name: Target collection name
            file_path: Path to JSON file (for validation)
            
        Returns:
            Tuple of (is_valid, error_message)
        r   )T zVALIDATION ERROR: File 'z' is in folder 'z' but collection 'z' expects files from folder 'z7'. This file should NOT be loaded into this collection.F)r   r   parentr   )r   r-   r   r"   r   expected_folderactual_folder	error_msgr   r   r   validate_document_typem   s   
z!JSONLoader.validate_document_typesource_namec                 C   sP  |  |}|du rg S | j| | }| r| sg S g }g }g }|dD ]T}| |}	|	r{| |	||\}
}|
sR|| ||j t	d|j d|  q'| j
j|	dd}|D ]}|j|d< t||d< ||d	< ||d
< d|d< || q\q'|rt	d| d t	dt| d t	dt| d |D ]	}t	d|  q|S )a|  
        Load all JSON records for a collection from a source directory.
        
        This method:
        1. Finds the appropriate ODF folder
        2. Loads all JSON files in that folder
        3. Flattens ALL nested fields recursively
        4. Returns flattened records with ALL fields preserved
        
        CRITICAL: NO fields are lost. ALL nested data is flattened.
        
        Args:
            collection_name: Name of the collection
            source_name: Name of the source directory (e.g., 'tokyo_2020')
            
        Returns:
            List of flattened records with ALL fields from JSON
        N*.jsonu   ⚠️ SKIPPED: z - Wrong document type for T)extract_arrays_source_file_source_path_source_competition_target_collection_validation_passedu   
⚠️ VALIDATION SUMMARY for :z   - Loaded: z recordsz   - Skipped: z files (wrong document type)z   - )r    r
   r   r   globr,   r3   r   r   r)   r   flatten_json_filestrlen)r   r   r4   folder_namefolder_pathall_recordsvalidation_errorsskipped_filesr"   datais_validr2   flattened_recordsrecorderrorr   r   r   load_collection_from_source   sD   



z&JSONLoader.load_collection_from_sourcec                 C   sJ   |  |}|du rdS | j| | }| r| sdS tt|dS )a  
        Count JSON files for a collection in a source directory.
        
        Args:
            collection_name: Name of the collection
            source_name: Name of the source directory
            
        Returns:
            Number of JSON files found
        Nr   r5   )r    r
   r   r   r@   listr=   r   r   r4   rA   rB   r   r   r   count_files_in_source   s   
z JSONLoader.count_files_in_sourcec                 C   s>   | j |i }|d}|||du|rd| ddS ddS )z
        Get validation information for a collection.
        
        Args:
            collection_name: Name of the collection
            
        Returns:
            Dictionary with validation rules
        r   NzOnly files from folder 'z/' will be loadedzNo folder validation)
collectionr0   validation_enableddescriptionr   )r   r   r   rA   r   r   r   get_validation_info   s   

zJSONLoader.get_validation_infoc                 C   sL   |  |}|du rddddS | j| | }d|t|| | ||dS )a  
        Get information about a collection in a source.
        
        Args:
            collection_name: Name of the collection
            source_name: Name of the source directory
            
        Returns:
            Dictionary with collection info
        NFr   )	supportedfolder
file_countT)rS   rT   pathr   rU   )r    r
   r?   r   rN   rM   r   r   r   get_collection_info  s   

zJSONLoader.get_collection_infoN)__name__
__module____qualname____doc__r?   r   r   r   r   r   r   r    r!   r   r,   tupleboolr3   rK   intrN   rR   rW   r   r   r   r   r	      s<    *)
L

r	   )r[   r&   ospathlibr   typingr   r   r   r   r   services.json_flattenerr   r	   r   r   r   r   <module>   s    