o
    ג[i+                     @   sN   d Z ddlmZmZmZmZ ddlZddlmZ ddl	m
Z
 G dd dZdS )a5  
Import Preview Engine

This module builds editable preview tables for import decisions.
It tracks user decisions per row (import/skip) without performing any writes.

Key principles:
- NO database writes
- User decisions tracked in memory
- Field-level editing supported
- All changes reversible until apply
    )ListDictAnyOptionalN)datetime)JSONFlattenerc                   @   s2  e Zd ZdZdd Z	d deeeef  dede	j
fdd	Zd
eeeef  de	j
fddZde	j
deeee f fddZ	d!de	j
deeeef  dee dedeeeef  f
ddZdeeee f dedeeef fddZde	j
de	j
fddZde	j
de	j
fddZde	j
deeef fddZdS )"ImportPreviewEnginez
    Service for building and managing import preview data.
    
    Allows users to review, edit, and make decisions about records
    before importing them to MongoDB.
    c                 C   s   t  | _dS )z!Initialize import preview engine.N)r   	flattener)self r   :/var/www/html/IGF-ODF-V3/services/import_preview_engine.py__init__   s   zImportPreviewEngine.__init__newrecordsrecord_typereturnc                 C   s  |st  S g }t|D ]l\}}|dkr|di  }nN|dkrP|di }|di }|di }	| }|dd|d< |d	d
|d< |	dd|d< n|dkri|di  }|di d	d
|d< ni }||d< d|d< || qt |}
|
t |
d}
|
S )a  
        Build a DataFrame for preview and editing.
        
        Args:
            records: List of records to preview
            record_type: Type of records ('new', 'duplicate', 'exact')
            
        Returns:
            DataFrame with preview data
        r   json_record	duplicate	db_record
comparison
match_typeunknown_match_type_id _db_iddifference_countr   _difference_countexact
_row_indexpending_import_decisionN)pd	DataFrame	enumerategetcopyappendwherenotnull)r
   r   r   preview_datairecordrow_datajson_recdb_recr   dfr   r   r   build_preview_dataframe    s0   
z+ImportPreviewEngine.build_preview_dataframe
duplicatesc                 C   s$  |st  S g }t|D ]u\}}|di }|di }|di }|di }t| t| B }	|	D ]J}
|
dkr=q6||
}||
}|
|v rR||
 d }nd}|dur\t|nd}|durft|nd}|||d	d
t|dd|
|||dd q6qt |}|t 	|d}|S )z
        Build a side-by-side comparison DataFrame for duplicates.
        
        Args:
            duplicates: List of duplicate records with comparison data
            
        Returns:
            DataFrame with JSON vs DB comparison
        r   r   r   differencesr   statusmatchNr   r   r   r    )r   r   r   field
json_valuedb_valuer4   r!   )
r"   r#   r$   r%   setkeysstrr'   r(   r)   )r
   r2   comparison_datar+   dupr.   r/   r   r3   
all_fieldsr6   r7   r8   r4   json_value_strdb_value_strr0   r   r   r   $build_duplicate_comparison_dataframe]   sB   




z8ImportPreviewEngine.build_duplicate_comparison_dataframe
preview_dfc                 C   s   |j sd|jvrg g g dS g g g d}| D ]'\}}|dd}|d|}||v r6|| t| q|d t| q|S )z
        Extract user import decisions from preview DataFrame.
        
        Args:
            preview_df: DataFrame with user decisions
            
        Returns:
            Dictionary with lists of row indices by decision
        r!   )importskipr    r    r   )emptycolumnsiterrowsr%   r'   int)r
   rB   	decisionsidxrowdecision	row_indexr   r   r   extract_import_decisions   s    z,ImportPreviewEngine.extract_import_decisionsForiginal_recordsimport_indices	unflattenc                 C   s   g }|D ]@}|t |krq|j| }| }g d}	|	D ]}
||
d qdd | D }dd | D }|r?| j|}|| q|S )a  
        Prepare records for import based on user decisions and edits.
        
        Args:
            preview_df: DataFrame with user edits (flattened columns)
            original_records: Original record list
            import_indices: List of row indices to import
            unflatten: If True, convert flattened records back to nested structure
            
        Returns:
            List of records ready for import (flattened or nested)
        )r   r!   r   r   r   _source_file_source_path_source_competition_target_collection_validation_passed_document_type_expected_typesNc                 S   s    i | ]\}}| d s||qS )_)
startswith.0kvr   r   r   
<dictcomp>        zBImportPreviewEngine.prepare_records_for_import.<locals>.<dictcomp>c                 S   s    i | ]\}}t |r||qS r   )r"   notnar[   r   r   r   r_      r`   )lenilocto_dictpopitemsr	   rQ   r'   )r
   rB   rO   rP   rQ   records_to_importrJ   rK   r,   metadata_fieldsr6   r   r   r   prepare_records_for_import   s   
z.ImportPreviewEngine.prepare_records_for_importrI   c              	   C   sH   |t dd | D t|dg t|dg t|dg |dS )z
        Build a summary of import decisions.
        
        Args:
            decisions: Dictionary with decision lists
            record_type: Type of records
            
        Returns:
            Summary dictionary
        c                 s   s    | ]}t |V  qd S )N)rb   )r\   r^   r   r   r   	<genexpr>  s    z;ImportPreviewEngine.build_import_summary.<locals>.<genexpr>rC   rD   r    )r   total_records	to_importto_skipr    rI   )sumvaluesrb   r%   )r
   rI   r   r   r   r   build_import_summary   s   z(ImportPreviewEngine.build_import_summaryc                 C      d|j v r	d|d< |S )z
        Mark all records in preview for import.
        
        Args:
            preview_df: Preview DataFrame
            
        Returns:
            Updated DataFrame
        r!   rC   rF   r
   rB   r   r   r   mark_all_for_import     
z'ImportPreviewEngine.mark_all_for_importc                 C   rq   )z
        Mark all records in preview to skip.
        
        Args:
            preview_df: Preview DataFrame
            
        Returns:
            Updated DataFrame
        r!   rD   rr   rs   r   r   r   mark_all_for_skip(  ru   z%ImportPreviewEngine.mark_all_for_skipc                    s   |j ri S g d  fdd|jD }i }|D ]/}||   }||   }t|t|t|dkr@t|t| d dndd||< q|S )z
        Get statistics about fields in preview data.
        
        Args:
            preview_df: Preview DataFrame
            
        Returns:
            Dictionary with field statistics
        )r   r!   r   r   r   c                    s   g | ]}| vr|qS r   r   )r\   colrh   r   r   
<listcomp>S  s    z<ImportPreviewEngine.get_field_statistics.<locals>.<listcomp>r   d      )non_null_count
null_countcoverage_percentage)rE   rF   ra   rn   isnarH   rb   round)r
   rB   data_fieldsfield_statsr6   r|   r}   r   rx   r   get_field_statistics:  s   $z(ImportPreviewEngine.get_field_statisticsN)r   )F)__name__
__module____qualname____doc__r   r   r   r;   r   r"   r#   r1   rA   rH   rN   boolri   rp   rt   rv   r   r   r   r   r   r      sn    
=
<
*
@




r   )r   typingr   r   r   r   pandasr"   r   services.json_flattenerr   r   r   r   r   r   <module>   s    