o
    ג[i6                  	   @   s   d Z ddlmZmZmZmZmZ ddlZG dd dZ	ddeee	ef ee f de	d	ee	ef fd
dZ
	ddee	ef de	d	ee	ef fddZdS )aQ  
Universal JSON Flattener

This module provides recursive JSON flattening functionality to convert
deeply nested JSON structures into flat key-value pairs suitable for
tabular display and editing.

Key principles:
- NO data loss - ALL fields at ANY depth are preserved
- NO hardcoded field names - works with ANY JSON structure
- Recursive parsing of nested objects and arrays
- Dot notation for nested paths (e.g., "parent.child.0.field")
- Type preservation for all values

CRITICAL: This is a data integrity critical component.
Every field from source JSON MUST appear in flattened output.
    )DictAnyListUnionOptionalNc                   @   s&  e Zd ZdZd defddZ		d!deeeef e	e f d	ed
e
deeef fddZde	e d	ed
e
deeef fddZdeeef deeef fddZde	eeef  de	e fddZ	d"de	eeef  de
de	eeef  fddZ	d"deeef de
de	eeef  fddZdS )#JSONFlattenera  
    Universal JSON flattener that converts nested structures to flat key-value pairs.
    
    Handles:
    - Nested dictionaries (unlimited depth)
    - Arrays of primitives
    - Arrays of objects
    - Mixed nested structures
    - Null values
    - All primitive types
    .	separatorc                 C   s
   || _ dS )z
        Initialize flattener.
        
        Args:
            separator: Character to use for path separation (default: ".")
        N)r	   )selfr	    r   3/var/www/html/IGF-ODF-V3/services/json_flattener.py__init__$   s   
zJSONFlattener.__init__ Fdata
parent_keypreserve_arraysreturnc                 C   s   i }t |trC| D ]5\}}|r| | j | n|}t |tr,|| ||| qt |tr<|| ||| q|||< q|S t |trT|| ||| |S |r\|||< |S ||d< |S )a  
        Recursively flatten a nested JSON structure.
        
        Args:
            data: Input data (dict or list)
            parent_key: Current path prefix
            preserve_arrays: If True, keep primitive arrays as-is instead of flattening
            
        Returns:
            Flattened dictionary with dot-notation keys
            
        Examples:
            Input: {"a": {"b": {"c": 1}}}
            Output: {"a.b.c": 1}
            
            Input: {"items": [{"id": 1}, {"id": 2}]}
            Output: {"items.0.id": 1, "items.1.id": 2}
            
            Input: {"tags": ["red", "blue"]}
            Output: {"tags.0": "red", "tags.1": "blue"}
            (or {"tags": ["red", "blue"]} if preserve_arrays=True)
        _value)
isinstancedictitemsr	   updateflattenlist_flatten_list)r
   r   r   r   	flattenedkeyvaluenew_keyr   r   r   r   -   s$   




	zJSONFlattener.flattenr   c                 C   s   i }|r|rt dd |D r|||< |S t|D ]1\}}| | j | }t|tr6|| ||| qt|trF|| ||| q|||< q|S )a  
        Flatten a list into indexed keys.
        
        Args:
            items: List to flatten
            parent_key: Current path prefix
            preserve_arrays: If True, keep primitive arrays as-is
            
        Returns:
            Flattened dictionary
        c                 s   s     | ]}t |ttf V  qd S N)r   r   r   ).0itemr   r   r   	<genexpr>{   s    z.JSONFlattener._flatten_list.<locals>.<genexpr>)	all	enumerater	   r   r   r   r   r   r   )r
   r   r   r   r   indexr!   r   r   r   r   r   g   s   


zJSONFlattener._flatten_list	flat_datac                 C   s  i }|  D ]\}}|dr|dv r|||< q|| j}|}t|dd D ]d\}}| re|dkr:||d  nd}	|	rDt|tsD	 t|}
t|tsOg }t	||
kr`|
i  t	||
ksU||
 }q(||vr|d t	|k rw||d  nd}|r| rg ||< ni ||< || }q(|d }| rt|}
t|tsg }t	||
kr|
d t	||
ks|||
< qt|tr|||< q|S )a6  
        Reconstruct nested structure from flattened data.
        
        Args:
            flat_data: Flattened dictionary with dot-notation keys
            
        Returns:
            Nested dictionary/list structure
            
        Note: This is used when writing back to MongoDB or JSON.
        _)_id_source_file_source_path_source_competitionNr      )r   
startswithsplitr	   r$   isdigitr   r   intlenappendr   )r
   r&   resultflat_keyr   partscurrentipartr   r%   	next_part	final_keyr   r   r   	unflatten   sL   


 





zJSONFlattener.unflattenrecordsc                 C   sF   t  }|D ]	}||  qdd |D }tdd |D }|| S )z
        Extract all unique keys from a list of flattened records.
        
        Args:
            records: List of flattened dictionaries
            
        Returns:
            Sorted list of all unique keys
        c                 S   s   g | ]	}| d r|qS r'   r.   r    kr   r   r   
<listcomp>       z.JSONFlattener.get_all_keys.<locals>.<listcomp>c                 S   s   g | ]	}| d s|qS r>   r?   r@   r   r   r   rB      rC   )setr   keyssorted)r
   r=   all_keysrecordmetadata_keys	data_keysr   r   r   get_all_keys   s   
zJSONFlattener.get_all_keysTfill_missingc                 C   sT   |sg S |  |}|s|S g }|D ]}i }|D ]
}||d||< q|| q|S )a,  
        Ensure all records have the same keys (columns).
        
        Args:
            records: List of flattened dictionaries
            fill_missing: If True, add missing keys with None values
            
        Returns:
            List of normalized records with consistent keys
        N)rK   getr3   )r
   r=   rL   rG   
normalizedrH   normalized_recordr   r   r   r   normalize_records   s   
zJSONFlattener.normalize_records	json_dataextract_arraysc                    s|  |s  |gS g }  |}i }| D ]f}| j}t|D ]Y\}}	|	 rz j|d| }
t|	}|d t|k rK j||d d nd}|
|vrUi ||
< |||
 vrai ||
 |< |rn|| ||
 | |< n
|| ||
 | d<  nq!q|rt	|  fddd}t
||  D ]$}|| |  }| D ]\}}|| j s|||< q|| q|S |g}|S )a  
        Flatten a JSON file that may contain arrays of records.
        
        This is specifically designed for ODF JSON files where the actual
        records are nested inside arrays within the structure.
        
        Args:
            json_data: Parsed JSON data
            extract_arrays: If True, extract array items as separate records
            
        Returns:
            List of flattened records
            
        Example:
            Input: {
                "odf_body": {
                    "competition": {
                        "participant": [
                            {"code": 1, "name": "A"},
                            {"code": 2, "name": "B"}
                        ]
                    }
                }
            }
            
            Output: [
                {
                    "odf_body.competition.participant.0.code": 1,
                    "odf_body.competition.participant.0.name": "A"
                },
                {
                    "odf_body.competition.participant.1.code": 2,
                    "odf_body.competition.participant.1.name": "B"
                }
            ]
        Nr-   r   r   c                    s   |   jS r   )countr	   )xr
   r   r   <lambda>d  s    z1JSONFlattener.flatten_json_file.<locals>.<lambda>)r   )r   rE   r/   r	   r$   r0   joinr1   r2   maxrF   copyr   r.   r3   )r
   rQ   rR   r=   flatarray_pathsr   r6   r8   r9   
array_basearray_indexarray_suffixdeepest_arrayr%   rH   r   r   rU   r   flatten_json_file  sD   )
,zJSONFlattener.flatten_json_fileNr   )r   F)T)__name__
__module____qualname____doc__strr   r   r   r   r   boolr   r   r<   rK   rP   r`   r   r   r   r   r      sR    

:

")"I
%
r   r   r   r	   r   c                 C      t |}|| S )z
    Convenience function to flatten JSON data.
    
    Args:
        data: Input JSON data
        separator: Path separator (default: ".")
        
    Returns:
        Flattened dictionary
    )r   r   )r   r	   	flattenerr   r   r   flatten_jsonx     
rj   r&   c                 C   rh   )z
    Convenience function to unflatten data.
    
    Args:
        flat_data: Flattened dictionary
        separator: Path separator (default: ".")
        
    Returns:
        Nested structure
    )r   r<   )r&   r	   ri   r   r   r   unflatten_json  rk   rl   ra   )re   typingr   r   r   r   r   jsonr   rf   rj   rl   r   r   r   r   <module>   s,      e



