o
    $&]iQ#                     @  s   d dl mZ d dlZd dlmZmZmZ d dlZd dl	m
Z d dlmZmZ d dlmZ d dlmZ d dlmZmZ erhd dlmZmZmZ d d	lmZ d d
lmZ d dlmZm Z m!Z! d dl"m#Z# d dl$m%Z% G dd ded Z&dS )    )annotationsN)TYPE_CHECKINGAnyClassVar)cast_to_comparable_string_typesextract_py_scalar)EagerGroupBy)!evaluate_output_names_and_aliases)generate_temporary_column_namerequires)IteratorMappingSequence)ArrowDataFrame)	ArrowExpr)AggregateOptionsAggregation
Incomplete)NarwhalsAggregation)UniqueKeepStrategyc                   @  s   e Zd ZU ddddddddd	dd
dddddZded< ddddZded< edZded< edZded< edZ	ded< edZ
ded< edZded< d=d'd(Zd>d.d/Zd?d4d5Zd@d7d8ZdAd:d;Zd<S )BArrowGroupBysummeanapproximate_medianmaxminstddevvariancecountcount_distinctallanyfirstlast)r   r   medianr   r   stdvarlenn_uniquer   r    r!   r"   r#   	any_valuez3ClassVar[Mapping[NarwhalsAggregation, Aggregation]]_REMAP_AGGS)r!   r"   r#   z2ClassVar[Mapping[UniqueKeepStrategy, Aggregation]]_REMAP_UNIQUE)r'   r(   z(ClassVar[frozenset[NarwhalsAggregation]]_OPTION_COUNT_ALL)r   _OPTION_COUNT_VALID)r"   r#   r)   _OPTION_ORDERED)r%   r&   _OPTION_VARIANCE)r!   r    r   _OPTION_SCALARdfr   keys#Sequence[ArrowExpr] | Sequence[str]drop_null_keysboolreturnNonec               C  sR   || _ | j||d\}| _| _|r|| jn|| _t| jj	| j| _
|| _d S )N)r2   )_df_parse_keys_keys_output_key_names
drop_nulls_compliant_framepaTableGroupBy	compliantnative_grouped_drop_null_keys)selfr1   r2   r4   frame rF   V/var/www/html/IGF-ODF-V3/venv/lib/python3.10/site-packages/narwhals/_arrow/group_by.py__init__?   s
   
zArrowGroupBy.__init__groupedpa.TableGroupByexprr   <tuple[pa.TableGroupBy, Aggregation, AggregateOptions | None]c                C  s   d }|  |}| |}|| jv r|d }tj|d}n9|| jv r(tjdd}n-|| jv r4tjdd}n!|| jv r@tj	dd}n|| j
v rU|dd	}| j|||d
\}}|| ||fS )Nddof)rM   r    mode
only_validr   )	min_countignore_nullsF)rR   )
_leaf_name_kwargsr/   pcVarianceOptionsr,   CountOptionsr-   r0   ScalarAggregateOptionsr.   get_ordered_agg_remap_expr_name)rD   rI   rK   optionfunction_namekwargsrM   rR   rF   rF   rG   _configure_aggM   s$   







zArrowGroupBy._configure_aggnamer   rR   (tuple[pa.TableGroupBy, AggregateOptions]c               C  sj   | j j}|dkr|jr| j j}tj||jdd}n|dk r-d| dt|d}t	||t
j|dfS )a  The default behavior of `pyarrow` raises when `first` or `last` are used.

        You'd see an error like:

            ArrowNotImplementedError: Using ordered aggregator in multiple threaded execution is not supported

        We need to **disable** multi-threading to use them, but the ability to do so
        wasn't possible before `14.0.0` ([pyarrow-36709])

        [pyarrow-36709]: https://github.com/apache/arrow/issues/36709
        )   r   F)use_threadszUsing `z]()` in a `group_by().agg(...)` context is only available in 'pyarrow>=14.0.0', found version z3.

See https://github.com/apache/arrow/issues/36709)
skip_nulls)r@   _backend_version_use_threadsrA   r>   r?   r2   r   _unparse_versionNotImplementedErrorrU   rX   )rD   rI   r`   rR   backend_versionrA   msgrF   rF   rG   rZ   c   s   zArrowGroupBy._ordered_aggexprsc                   s  |  | g }| j }| j g | j| jR }| j}|D ]p}t|| j|\}}|j}	t|		 }
t
|
dkrh|
d jdkrFd}t||d  || jd  d || jd dtjddf q| ||\}| |fd	d
|D  |fdd
|D  q||}tt t|D ]\}} | | qt|jt|krt
|jt
|ksd| d|j d}t| fdd
|jD }fdd
|D |}| j|tt| j| jS )N   r   r'   z_Safety assertion failed, please report a bug to https://github.com/narwhals-dev/narwhals/issues_countr   r    rN   c                   s   g | ]	}| d   qS )_rF   .0output_name)r]   rF   rG   
<listcomp>   s    z$ArrowGroupBy.agg.<locals>.<listcomp>c                   s   g | ]}| fqS rF   rF   ro   )r]   r\   rF   rG   rr      s    z"Safety assertion failed, expected z got zH, please report a bug at https://github.com/narwhals-dev/narwhals/issuesc                   s   g | ]	} |  d qS )r   )pop)rp   item)expected_old_names_indicesrF   rG   rr      s    c                   s   g | ]} | qS rF   rF   )rp   i)new_column_namesrF   rG   rr      s    )_ensure_all_simpler:   copyr;   rB   r	   r@   	_metadatalistop_nodes_reversedr'   r`   AssertionErrorappendrU   rW   r_   extend	aggregatecollectionsdefaultdict	enumeratesetcolumn_namesrename_columns_with_nativerenamedictzip)rD   rk   aggsexpected_pyarrow_column_namesexcluderI   rK   output_namesaliasesmdr|   rj   result_simpleidxrt   	index_maprF   )ru   r]   rw   r\   rG   agg   s^   







zArrowGroupBy.agg$Iterator[tuple[Any, ArrowDataFrame]]c           
   	   #  s    t d| jjd}d}| jj t fdd| jD ddi\}}tj}|g ||R d|d	} jd
||d t	|D ]/}| j
 t | ||g}|j| j d
}	tdd |	D |j| jj fV  q=d S )N   )n_bytescolumns__null_token_value__c                 3  s    | ]} | V  qd S NrF   )rp   keytablerF   rG   	<genexpr>       z(ArrowGroupBy.__iter__.<locals>.<genexpr>	separator replace)null_handlingnull_replacementr   )rv   field_columnc                 s  s    | ]}t |V  qd S r   )r   )rp   elrF   rF   rG   r      r   )r
   r@   r   rA   r   r:   rU   binary_join_element_wise
add_columnuniquer   filterequaldropsimple_selectrowtupler8   )
rD   	col_token
null_tokenitseparator_scalar
concat_str
key_valuesvtr   rF   r   rG   __iter__   s:   
zArrowGroupBy.__iter__N)r1   r   r2   r3   r4   r5   r6   r7   )rI   rJ   rK   r   r6   rL   )rI   rJ   r`   r   rR   r5   r6   ra   )rk   r   r6   r   )r6   r   )__name__
__module____qualname__r*   __annotations__r+   	frozensetr,   r-   r.   r/   r0   rH   r_   rZ   r   r   rF   rF   rF   rG   r      sJ   
 



 ;r   )r   r   r   )'
__future__r   r   typingr   r   r   pyarrowr>   pyarrow.computecomputerU   narwhals._arrow.utilsr   r   narwhals._compliantr   narwhals._expression_parsingr	   narwhals._utilsr
   r   collections.abcr   r   r   narwhals._arrow.dataframer   narwhals._arrow.exprr   narwhals._arrow.typingr   r   r   narwhals._compliant.typingr   narwhals.typingr   r   rF   rF   rF   rG   <module>   s"    