o
    $&]iM                     @  s  U d dl mZ d dlmZmZ d dlmZ d dlm	Z	m
Z
 d dlmZmZ d dlmZ d dlmZmZmZmZmZmZmZmZmZ d dlmZ d d	lmZ erd d
lmZm Z m!Z!m"Z" d dl#m$Z$ d dl%m&Z& d dl'm(Z( d dl)m  m*Z+ d dl,m-Z-m.Z.m/Z/ d dl0m1Z1 d dl2m3Z3 d dl4m5Z5 d dl6m7Z7 d dl8m9Z9 d dlm:Z:m;Z; d dl<m=Z= d dl>m?Z? d dlm@Z@ d dlmAZAmBZBmCZC dZDdeEd< 	 G dd ded eZFdS )    )annotations)TYPE_CHECKINGAnyN)add_row_indexevaluate_exprs)native_to_narwhals_dtypeselect_columns_by_name)assert_never)	ImplementationValidateBackendVersion_remap_full_join_keyscheck_column_names_are_uniquecheck_columns_existgenerate_temporary_column_namenot_implementedparse_columns_to_drop
zip_strict)MultiOutputExpressionError)CompliantLazyFrame)IterableIteratorMappingSequence)BytesIO)Path)
ModuleType)Self	TypeAliasTypeIs)CompliantDataFrameAny)DaskExprDaskLazyGroupByDaskNamespace)_EagerAllowedImpl)Version_LimitedContext)	LazyFrame)DType)ColumnNotFoundError)AsofJoinStrategyJoinStrategyUniqueKeepStrategyr   r   
Incompletec                   @  s  e Zd ZejZdddddZedddZe	dddZ
dddZdddZddd Zdd!d"Zdd#d$Zdd'd(Zdd,d-Zdd/d0Zdd3d4Zdd6d7Zdd<d=Zedd?d@ZddBdCZddFdGZddHdIZddJdKZddMdNZeddPdQZddRdSZddVdWZddZd[Z dd^d_Z!ddbdcZ"ddfdgZ#ddldmZ$ddqdrZ%ddwdxZ&ddydzZ'dd{d|Z(dd}d~Z)dddZ*dddZ+dddZ,dddZ-dddZ.dddZ/dddZ0dddZ1dddZ2dddZ3e4 Z5dS )DaskLazyFrameF)validate_backend_versionnative_dataframedd.DataFrameversionr&   r0   boolreturnNonec                C  s,   || _ || _d | _d | _|r|   d S d S N)_native_frame_version_cached_schema_cached_columns_validate_backend_version)selfr1   r3   r0    r>   V/var/www/html/IGF-ODF-V3/venv/lib/python3.10/site-packages/narwhals/_dask/dataframe.py__init__:   s   zDaskLazyFrame.__init__objdd.DataFrame | AnyTypeIs[dd.DataFrame]c                 C  s   t | tjS r7   )
isinstancedd	DataFrame)rA   r>   r>   r?   
_is_nativeH   s   zDaskLazyFrame._is_nativedatacontextr'   r   c               C  s   | ||j dS Nr3   )r9   )clsrH   rI   r>   r>   r?   from_nativeL   s   zDaskLazyFrame.from_nativeLazyFrame[dd.DataFrame]c                 C  s   | j j| ddS )Nlazy)level)r9   	lazyframer=   r>   r>   r?   to_narwhalsP      zDaskLazyFrame.to_narwhalsr   c                 C  s.   | j tju r| j  S dt| j  }t|)NzExpected dask, got: )_implementationr
   DASKto_native_namespacetypeAssertionError)r=   msgr>   r>   r?   __native_namespace__S   s   
z"DaskLazyFrame.__native_namespace__r$   c                 C  s   ddl m} || jdS )Nr   r#   rK   )narwhals._dask.namespacer$   r9   )r=   r$   r>   r>   r?   __narwhals_namespace__Z   s   z$DaskLazyFrame.__narwhals_namespace__c                 C  s   | S r7   r>   rR   r>   r>   r?   __narwhals_lazyframe___   s   z$DaskLazyFrame.__narwhals_lazyframe__c                 C  s   | j | j|dS rJ   )	__class__native)r=   r3   r>   r>   r?   _with_versionb   rT   zDaskLazyFrame._with_versiondfr   c                 C  s   | j || jdS rJ   )r_   r9   )r=   rb   r>   r>   r?   _with_nativee   rT   zDaskLazyFrame._with_nativesubsetSequence[str]ColumnNotFoundError | Nonec                 C  s   t || jdS )N)	available)r   columns)r=   rd   r>   r>   r?   _check_columns_existh   s   z"DaskLazyFrame._check_columns_existIterator[dx.Series]c                 c  s     | j  D ]\}}|V  qd S r7   )r`   items)r=   _colserr>   r>   r?   _iter_columnsk   s   zDaskLazyFrame._iter_columnsr    	dx.Seriesc                 C  s*   | | }t|dkrd}t||d S )N   z4multi-output expressions not allowed in this contextr   )_calllenr   )r=   rA   resultsrZ   r>   r>   r?   _evaluate_single_output_expro   s
   
z*DaskLazyFrame._evaluate_single_output_exprexprsc                 G  s,   t | g|R  }| | jjdi t|S )Nr>   )r   rc   r`   assigndict)r=   ru   
new_seriesr>   r>   r?   with_columnsv   s   zDaskLazyFrame.with_columnsbackend_EagerAllowedImpl | Nonekwargsr   c           
      K  s   | j jd
i |}|d u s|tju r#ddlm} ||tjd| jddS |tju r=dd l}ddl	m
} |||d| jdS |tju rYdd l}ddlm} ||j|d| jddS d	| }	t|	)Nr   )PandasLikeDataFrameT)implementationr0   r3   validate_column_names)PolarsDataFrame)r0   r3   )ArrowDataFrame)r0   r3   r   zUnsupported `backend` value: r>   )r`   computer
   PANDASnarwhals._pandas_like.dataframer}   r9   POLARSpolarsnarwhals._polars.dataframer   from_pandasPYARROWpyarrownarwhals._arrow.dataframer   Table
ValueError)
r=   rz   r|   resultr}   plr   par   rZ   r>   r>   r?   collectz   s:   



zDaskLazyFrame.collect	list[str]c                 C  s2   | j d u r| jd urt| jn| jj | _ | j S r7   )r;   r:   listschemar`   rh   tolistrR   r>   r>   r?   rh      s   



zDaskLazyFrame.columns	predicatec                 C  s   || d }|  | jj| S )Nr   )rc   r`   loc)r=   r   maskr>   r>   r?   filter   s   zDaskLazyFrame.filtercolumn_namesstrc                 G  s"   | j }t|t|| j}| |S r7   )r`   r   r   rU   rc   )r=   r   rb   r`   r>   r>   r?   simple_select   s   
zDaskLazyFrame.simple_selectc                 G  s2   t | g|R  }tjdd |D dd}| |S )Nc                 S  s   g | ]	\}}| |qS r>   )rename).0namevalr>   r>   r?   
<listcomp>       z+DaskLazyFrame.aggregate.<locals>.<listcomp>rp   axis)r   rE   concatrc   r=   ru   rx   rb   r>   r>   r?   	aggregate   s   
zDaskLazyFrame.aggregatec                 G  sH   t | g|R  }| j}t|jdi t|dd |D | j}| |S )Nc                 S  s   g | ]}|d  qS )r   r>   )r   sr>   r>   r?   r      s    z(DaskLazyFrame.select.<locals>.<listcomp>r>   )r   r`   r   rv   rw   rU   rc   r   r>   r>   r?   select   s   
zDaskLazyFrame.selectSequence[str] | Nonec                 C  sD   |d u r|  | j S |  }|j|j|  dd }| |S )NT)ignore_nulls)rc   r`   dropnar]   any_horizontalcolis_nullr   )r=   rd   plxr   r>   r>   r?   
drop_nulls   s
   
zDaskLazyFrame.drop_nullsdict[str, DType]c                   s2   j d u rjj  fddjjD _ j S )Nc                   s"   i | ]}|t  | jjqS r>   )r   r9   rU   r   r   native_dtypesr=   r>   r?   
<dictcomp>   s    z(DaskLazyFrame.schema.<locals>.<dictcomp>)r:   r`   dtypesrh   rR   r>   r   r?   r      s   
zDaskLazyFrame.schemac                 C  s   | j S r7   r   rR   r>   r>   r?   collect_schema   s   zDaskLazyFrame.collect_schemarh   strictc                C  s"   t | ||d}| | jj|dS )Nr   rh   )r   rc   r`   drop)r=   rh   r   to_dropr>   r>   r?   r      s   zDaskLazyFrame.dropr   order_byc                 C  s   |d u r|  t| j|S |  }| j}|jdd d| }||j	ddj
g |d|jdd d  }| |||j| S )Nrp   dtypeF)reverse)partition_byr   )rc   r   r`   r]   rh   litalias	broadcastr   cum_sumoverry   r   )r=   r   r   r   rh   
const_exprrow_index_exprr>   r>   r?   with_row_index   s   zDaskLazyFrame.with_row_indexmappingMapping[str, str]c                 C  s   |  | jj|dS )Nr   )rc   r`   r   )r=   r   r>   r>   r?   r      s   zDaskLazyFrame.renamenintc                 C  s   |  | jj|dddS )NFr   r   npartitions)rc   r`   head)r=   r   r>   r>   r?   r      s   zDaskLazyFrame.headkeepr-   c                C  s   |r|  | }r||dkr>|p| j}td|dd}| j| |}||dk }| j|d}| jj	||dd}nd	d
i
||}	|rS| j|dddj}
n| j}
|
j||	d}| |S )Nnone   count_n_bytesrh   prefixrp   r   inner)onhowanyfirstF)
descending
nulls_last)rd   r   )ri   rh   r   r`   groupbysizer   reset_indexr   mergegetsortdrop_duplicatesrc   )r=   rd   r   r   errortokenrm   uniquer   mapped_keepr`   r>   r>   r?   r      s"   

zDaskLazyFrame.uniquebyr   bool | Sequence[bool]r   c                G  sH   t |tr	| }ndd |D }|rdnd}| | jjt|||dS )Nc                 S  s   g | ]}| qS r>   r>   )r   dr>   r>   r?   r     s    z&DaskLazyFrame.sort.<locals>.<listcomp>lastr   )	ascendingna_position)rD   r4   rc   r`   sort_valuesr   )r=   r   r   r   r   positionr>   r>   r?   r     s   
zDaskLazyFrame.sortkIterable[str]r   c                  s   | j }| j t|}t|tr.t fdd|D r.|r%| |||S | |||S t|tr:|gt	| }| |j
|t|dj|dddS )Nc                 3  s    | ]	} |   V  qd S r7   )
is_numeric)r   xr   r>   r?   	<genexpr>  s    z&DaskLazyFrame.top_k.<locals>.<genexpr>)r   Fr   r   )r`   r   r   rD   r4   allrc   	nsmallestnlargestrr   r   r   )r=   r   r   r   rb   r>   r   r?   top_k  s    
zDaskLazyFrame.top_kotherleft_onright_onsuffixc                C  s&   | j j|ddj|j ||dd|fdS )Nr   rd   r   r    r   r   r   suffixes)r`   r   r   )r=   r   r   r   r  r>   r>   r?   _join_inner+  s   zDaskLazyFrame._join_innerc                  sL    j j|j j|ddd||dfd} fddt||D }|j|dS )	Nr   r  leftr  r   r   r   r  c                   s2   g | ]\}}||kr| j vr|n|  qS r>   r   )r   left_key	right_keyr=   r  r>   r?   r   @  s
    z,DaskLazyFrame._join_left.<locals>.<listcomp>r   )r`   r   r   r   r   )r=   r   r   r   r  result_nativeextrar>   r  r?   
_join_left6  s   zDaskLazyFrame._join_leftc                  s   | j }t|| }|j j|d}t|j t| }|t|  jdd}	||  jdd}
t	|j
| j|}||
 j fdd|D d}||	  j||
  ||dd fd}tj|||	 |gd	dd
S )Nr   rp   r   c                   s   i | ]	}||   qS r>   r>   r   r  r>   r?   r   X  r   z,DaskLazyFrame._join_full.<locals>.<dictcomp>outerr  r  r   )r   join)r`   r   r   r   rh   r   valuesisnar   setintersection
differencer   rE   r   )r=   r   r   r   r  self_nativeright_on_mapperother_nativeright_suffixedleft_null_maskright_null_mask	to_renameright_null_rowsjoin_resultr>   r  r?   
_join_fullG  s.   

zDaskLazyFrame._join_fullc                C  s`   t dg | j|jR dd}| jjd	i |dij|jjd	i |did||d|fdj|dS )
Nr   cross_join_key_r   r   r   r  r  r   r>   )r   rh   r`   rv   r   r   )r=   r   r  	key_tokenr>   r>   r?   _join_crossh  s   zDaskLazyFrame._join_crossc                C  s<   | j |t|tt||d}| jj|ddj|d||dS )Nr   columns_to_selectcolumns_mappingr   r  r   )r   r   r   )_join_filter_renamer   rw   zipr`   r   r   )r=   r   r   r   r  r>   r>   r?   
_join_semix  s   zDaskLazyFrame._join_semic                C  st   t dg | j|jR dd}| j|t|tt||d}| jj|j|ddd|||d}||| d	k j	|gd
S )Nr   join_indicator_r   r$  r   r  r  )r   	indicatorr   r   	left_onlyr   )
r   rh   r'  r   rw   r(  r`   r   r   r   )r=   r   r   r   indicator_tokenr  rb   r>   r>   r?   
_join_anti  s    zDaskLazyFrame._join_antir%  r&  dict[str, str]c                 C  s    |j }t||| jj|d S )zHelper function to avoid creating extra columns and row duplication.

        Used in `"anti"` and `"semi`" join's.

        Notice that a native object is returned.
        r   )r`   r   rU   r   r   )r=   r   r%  r&  r  r>   r>   r?   r'    s   	z!DaskLazyFrame._join_filter_renamer   r,   c                C  s   |dkr| j ||d}nU|d u s|d u rt|||dkr'| j||||d}n:|dkr4| j|||d}n-|dkrA| j|||d}n |dkrO| j||||d}n|d	kr]| j||||d}nt| | |S )
Ncross)r   r  r   )r   r   r   r  anti)r   r   r   semir  full)	r#  r   r  r.  r)  r  r   r	   rc   )r=   r   r   r   r   r  r   r>   r>   r?   r    s,   	

zDaskLazyFrame.joinby_leftby_rightstrategyr+   c          	      C  s0   |   }| |j| j|j|||||d|fdS )Nr  )r   r   left_byright_by	directionr  )r[   rc   
merge_asofr`   )	r=   r   r   r   r4  r5  r6  r  r   r>   r>   r?   	join_asof  s   zDaskLazyFrame.join_asofkeys"Sequence[str] | Sequence[DaskExpr]drop_null_keysr"   c                C  s   ddl m} || ||dS )Nr   r!   )r>  )narwhals._dask.group_byr"   )r=   r<  r>  r"   r>   r>   r?   group_by  s   zDaskLazyFrame.group_byc                 C  s6   | j }|j}|dkr| | j j|ddS d}t|)Nrp   F)r   r   zL`LazyFrame.tail` is not supported for Dask backend with multiple partitions.)r`   r   rc   tailNotImplementedError)r=   r   native_framen_partitionsrZ   r>   r>   r?   rA    s   zDaskLazyFrame.tailoffsetc                 C  s   t d| jdd}|  }|j|d d }|j|d d }|jdd d }| j|d d|||k||| | |k@ j|gddS )	Nr   
row_index_r   r   r   )r   Fr   )	r   rh   r]   r   r   r   r   r   r   )r=   r   rE  row_index_tokenr   offset_exprn_expr	zero_exprr>   r>   r?   gather_every  s    zDaskLazyFrame.gather_everyr   indexvariable_name
value_namec                 C  s   |  | jj||||dS )N)id_vars
value_varsvar_namerN  )rc   r`   melt)r=   r   rL  rM  rN  r>   r>   r?   unpivot  s   zDaskLazyFrame.unpivotfilestr | Path | BytesIOc                 C  s   | j | d S r7   )r`   
to_parquet)r=   rT  r>   r>   r?   sink_parquet  rT   zDaskLazyFrame.sink_parquetN)r1   r2   r3   r&   r0   r4   r5   r6   )rA   rB   r5   rC   )rH   r2   rI   r'   r5   r   )r5   rN   )r5   r   )r5   r$   )r5   r   )r3   r&   r5   r   )rb   r   r5   r   )rd   re   r5   rf   )r5   rj   )rA   r    r5   ro   )ru   r    r5   r   )rz   r{   r|   r   r5   r   )r5   r   )r   r    r5   r   )r   r   r5   r   )rd   r   r5   r   )r5   r   )rh   re   r   r4   r5   r   )r   r   r   r   r5   r   )r   r   r5   r   )r   r   r5   r   )rd   r   r   r-   r   r   r5   r   )r   r   r   r   r   r4   r5   r   )r   r   r   r   r   r   r5   r   )
r   r   r   re   r   re   r  r   r5   r2   )r   r   r  r   r5   r2   )r   r   r   re   r   re   r5   r2   )r   r   r%  r   r&  r/  r5   r2   )r   r   r   r,   r   r   r   r   r  r   r5   r   )r   r   r   r   r   r   r4  r   r5  r   r6  r+   r  r   r5   r   )r<  r=  r>  r4   r5   r"   )r   r   rE  r   r5   r   )
r   r   rL  r   rM  r   rN  r   r5   r   )rT  rU  r5   r6   )6__name__
__module____qualname__r
   rV   rU   r@   staticmethodrG   classmethodrM   rS   r[   r]   r^   ra   rc   ri   rn   rt   ry   r   propertyrh   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r  r   r#  r)  r.  r'  r  r;  r@  rA  rK  rS  rW  r   exploder>   r>   r>   r?   r/   4   sf    










*
	

















!




#





r/   )r    r2   rN   )G
__future__r   typingr   r   dask.dataframe	dataframerE   narwhals._dask.utilsr   r   narwhals._pandas_like.utilsr   r   narwhals._typing_compatr	   narwhals._utilsr
   r   r   r   r   r   r   r   r   narwhals.exceptionsr   narwhals.typingr   collections.abcr   r   r   r   ior   pathlibr   typesr   dask.dataframe.dask_expr	dask_exprdxtyping_extensionsr   r   r   narwhals._compliant.typingr   narwhals._dask.exprr    r?  r"   r\   r$   narwhals._typingr%   r&   r'   narwhals.dataframer(   narwhals.dtypesr)   r*   r+   r,   r-   r.   __annotations__r/   r>   r>   r>   r?   <module>   s@    ,
