o
    $&]i [                     @  s  U d dl mZ d dlmZ d dlmZ d dlmZmZ d dl	m
Z
 d dlmZ d dlmZmZmZmZmZmZmZ d dlmZ d d	lmZmZmZmZmZmZmZmZ d d
l m!Z! erd dl"m#Z#m$Z$m%Z%m&Z& d dl'm(Z( d dl)m*Z* d dl+m,Z, d dl-Z.d dl/m0Z0 d dl1m2Z2 d dl3m4Z4 d dl5m6Z6m7Z7m8Z8 d dl9m:Z: d dl;m<Z< d dl=m>Z> d dl?m@Z@ d dlmAZA d dlBmCZC d dlmDZDmEZE d dlFmGZG d dlHmIZI d dlJmKZKmLZL e2eeeeef ZMeZNdeOd< 	 G d d! d!ed" eZPdS )#    )annotations)reduce)and_)TYPE_CHECKINGAny)issue_warningis_native_spark_like)catch_pyspark_connect_exceptioncatch_pyspark_sql_exceptionevaluate_exprsimport_functionsimport_native_dtypesimport_windownative_to_narwhals_dtype)SQLLazyFrame)ImplementationValidateBackendVersionextend_boolgenerate_temporary_column_namenot_implementedparse_columns_to_dropto_pyarrow_table
zip_strict)InvalidOperationError)IterableIteratorMappingSequence)BytesIO)Path)
ModuleTypeN)Column)BaseDataFrameWindow)Self	TypeAliasTypeIs)CompliantDataFrameAny)SparkLikeExprSparkLikeLazyGroupBySparkLikeNamespace)SparkSession)_EagerAllowedImpl)Version_LimitedContext)	LazyFrame)DType)JoinStrategyUniqueKeepStrategyr'   
Incompletec                   @  s  e Zd ZdddddZedddZedd Zedd ZedddZe	dddZ
edd"d#Zdd%d&Zdd(d)Zdd+d,Zdd-d.Zdd/d0Zdd2d3Zdd5d6Zdd8d9Zdd;d<Zedd>d?ZddEdFZddGdHZddKdLZddOdPZddQdRZddSdTZddVdWZeddYdZZdd[d\Zdd`daZ ddddeZ!ddjdkZ"ddpdqZ#ddudvZ$ddydzZ%dd}d~Z&dddZ'dddZ(dddZ)dddZ*dddZ+dddZ,edddZ-e. Z/dS )SparkLikeLazyFrameF)validate_backend_versionnative_dataframeSQLFrameDataFrameversionr1   implementationr   r9   boolreturnNonec                C  s2   || _ || _|| _d | _d | _|r|   d S d S N)_native_frame_implementation_version_cached_schema_cached_columns_validate_backend_version)selfr:   r<   r=   r9    rI   \/var/www/html/IGF-ODF-V3/venv/lib/python3.10/site-packages/narwhals/_spark_like/dataframe.py__init__@   s   zSparkLikeLazyFrame.__init__tuple[int, ...]c                 C  
   | j  S rA   )rC   _backend_versionrH   rI   rI   rJ   rN   P   s   
z#SparkLikeLazyFrame._backend_versionc                 C     t r
ddlm} |S t| jS )Nr   )	functions)r   sqlframe.baserQ   r   rC   )rH   rQ   rI   rI   rJ   _FT      
zSparkLikeLazyFrame._Fc                 C  rP   )Nr   )types)r   rR   rU   r   rC   )rH   rU   rI   rI   rJ   _native_dtypes\   rT   z!SparkLikeLazyFrame._native_dtypestype[Window]c                 C  rP   )Nr   r$   )r   sqlframe.base.windowr%   r   rC   )rH   r%   rI   rI   rJ   _Windowd   rT   zSparkLikeLazyFrame._WindowobjSQLFrameDataFrame | AnyTypeIs[SQLFrameDataFrame]c                 C  s   t | S rA   r   )rZ   rI   rI   rJ   
_is_nativel   s   zSparkLikeLazyFrame._is_nativedatacontextr2   r&   c               C  s   | ||j |jdS Nr<   r=   )rD   rC   )clsr^   r_   rI   rI   rJ   from_nativep   s   zSparkLikeLazyFrame.from_nativeLazyFrame[SQLFrameDataFrame]c                 C  s   | j j| ddS )Nlazy)level)rD   	lazyframerO   rI   rI   rJ   to_narwhalst   s   zSparkLikeLazyFrame.to_narwhalsr!   c                 C  rM   rA   )rC   to_native_namespacerO   rI   rI   rJ   __native_namespace__w   s   
z'SparkLikeLazyFrame.__native_namespace__r.   c                 C  s   ddl m} || j| jdS )Nr   r-   ra   )narwhals._spark_like.namespacer.   rD   rC   )rH   r.   rI   rI   rJ   __narwhals_namespace__z   s   z)SparkLikeLazyFrame.__narwhals_namespace__c                 C  s   | S rA   rI   rO   rI   rI   rJ   __narwhals_lazyframe__   s   z)SparkLikeLazyFrame.__narwhals_lazyframe__c                 C  s   | j | j|| jdS r`   )	__class__nativerC   )rH   r<   rI   rI   rJ   _with_version      
z SparkLikeLazyFrame._with_versiondfc                 C  s   | j || j| jdS r`   )rn   rD   rC   )rH   rr   rI   rI   rJ   _with_native   rq   zSparkLikeLazyFrame._with_native	pa.Schemac                 C  s   dd l }ddlm} g }|  }| jj}| D ]H\}}z||| j}W n4 tyX }	 z(|| j	}
| j
j}t|
|sEtd|
 d|	t ||| f W Y d }	~	qd }	~	ww |||f q||S )Nr   )narwhals_to_native_dtypezCould not convert dtype z to PyArrow dtype, )pyarrownarwhals._arrow.utilsru   collect_schemaro   schemaitemsrD   	ExceptiondataTyperV   NullType
isinstancer   UserWarningappendnull)rH   paru   ry   	nw_schemanative_schemakeyvaluenative_dtypeexcnative_spark_dtype	null_typerI   rI   rJ   _to_arrow_schema   s*   


z#SparkLikeLazyFrame._to_arrow_schemapa.Tablec              
   C  s   | j  rF| jdk rFdd l}z
|j| j W S  tyE } z!dt	|v r@dd | j
D }|  }|jj||dW  Y d }~S  d }~ww | j  rc| jdk rcdd l}|  }|jj| j |dS t| j S )N)   r   zat least one RecordBatchc                 S  s   i | ]}|g qS rI   rI   ).0krI   rI   rJ   
<dictcomp>   s    z8SparkLikeLazyFrame._collect_to_arrow.<locals>.<dictcomp>ry   )rC   
is_pysparkrN   rv   Tablefrom_batchesro   _collect_as_arrow
ValueErrorstrcolumnsr   from_pydictis_pyspark_connectfrom_pandastoPandasr   toArrow)rH   r   r   r^   	pa_schemarI   rI   rJ   _collect_to_arrow   s"   z$SparkLikeLazyFrame._collect_to_arrowIterator[Column]c                 c  s     | j D ]	}| j|V  qd S rA   )r   rS   col)rH   r   rI   rI   rJ   _iter_columns   s   
z SparkLikeLazyFrame._iter_columns	list[str]c                 C  s.   | j d u r| jd urt| jn| jj| _ | j S rA   )rF   rE   listry   ro   r   rO   rI   rI   rJ   r      s   


zSparkLikeLazyFrame.columnsbackend_EagerAllowedImpl | Nonekwargsr   r)   c                 K  s   |t ju rddlm} || j t jd| jddS |d u s"|t ju r3ddlm	} || 
 d| jddS |t ju rOdd l}ddlm} ||| 
 d| jdS d	| }t|)
Nr   )PandasLikeDataFrameT)r=   r9   r<   validate_column_names)ArrowDataFrame)r9   r<   r   )PolarsDataFrame)r9   r<   zUnsupported `backend` value: )r   PANDASnarwhals._pandas_like.dataframer   ro   r   rD   PYARROWnarwhals._arrow.dataframer   r   POLARSpolarsnarwhals._polars.dataframer   
from_arrowr   )rH   r   r   r   r   plr   msgrI   rI   rJ   _collect   s6   


zSparkLikeLazyFrame._collectc              
   K  sT   | j  r!z
| j|fi |W S  ty  } zt|d d }~ww | j|fi |S rA   )rC   r   r   r{   r
   )rH   r   r   erI   rI   rJ   collect   s   

zSparkLikeLazyFrame.collectcolumn_namesr   c                 G  s   |  | jj| S rA   )rs   ro   select)rH   r   rI   rI   rJ   simple_select      z SparkLikeLazyFrame.simple_selectexprsr*   c              
   G  t   t | g|R  }dd |D }| j r1z
| | jj| W S  ty0 } zt|| d d }~ww | | jj| S )Nc                 S     g | ]	\}}| |qS rI   aliasr   col_namer   rI   rI   rJ   
<listcomp>      z0SparkLikeLazyFrame.aggregate.<locals>.<listcomp>)r   rC   r   rs   ro   aggr{   r   rH   r   new_columnsnew_columns_listr   rI   rI   rJ   	aggregate   s   
zSparkLikeLazyFrame.aggregatec              
   G  r   )Nc                 S  r   rI   r   r   rI   rI   rJ   r     r   z-SparkLikeLazyFrame.select.<locals>.<listcomp>)r   rC   r   rs   ro   r   r{   r   r   rI   rI   rJ   r     s   
zSparkLikeLazyFrame.selectc              
   G  sn   t | g|R  }| j r,z| | jt|W S  ty+ } zt|| d d }~ww | | jt|S rA   )	r   rC   r   rs   ro   withColumnsdictr{   r   )rH   r   r   r   rI   rI   rJ   with_columns  s   
zSparkLikeLazyFrame.with_columns	predicatec              
   C  sd   | | d }| j r)z
| | j|W S  ty( } zt|| d d }~ww | | j|S Nr   )_callrC   r   rs   ro   wherer{   r   )rH   r   	conditionr   rI   rI   rJ   filter  s   
zSparkLikeLazyFrame.filterdict[str, DType]c                   s(    j d u r fdd jjD  _  j S )Nc                   s(   i | ]}|j t|j j j jjqS rI   )namer   r|   rD   rV   ro   sparkSession)r   fieldrO   rI   rJ   r   ,  s    z-SparkLikeLazyFrame.schema.<locals>.<dictcomp>)rE   ro   ry   rO   rI   rO   rJ   ry   )  s
   

	zSparkLikeLazyFrame.schemac                 C  s   | j S rA   r   rO   rI   rI   rJ   rx   7  s   z!SparkLikeLazyFrame.collect_schemar   Sequence[str]strictc                C  s    t | ||d}| | jj| S )N)r   )r   rs   ro   drop)rH   r   r   columns_to_droprI   rI   rJ   r   :  s   zSparkLikeLazyFrame.dropnintc                 C  s   |  | j|S rA   )rs   ro   limit)rH   r   rI   rI   rJ   head>  r   zSparkLikeLazyFrame.headkeys'Sequence[str] | Sequence[SparkLikeExpr]drop_null_keysr,   c                C  s   ddl m} || ||dS )Nr   r+   )r   )narwhals._spark_like.group_byr,   )rH   r   r   r,   rI   rI   rJ   group_byA  s   zSparkLikeLazyFrame.group_byby
descendingbool | Sequence[bool]
nulls_lastc                  s^   t |t|}|r fdd|D }n	 fdd|D }dd t||D }  jj| S )Nc                 3  $    | ]}|r
 j jn j jV  qd S rA   rS   desc_nulls_lastasc_nulls_lastr   drO   rI   rJ   	<genexpr>K  
    
z*SparkLikeLazyFrame.sort.<locals>.<genexpr>c                 3  r   rA   )rS   desc_nulls_firstasc_nulls_firstr   rO   rI   rJ   r   P  r   c                 S     g | ]\}}||qS rI   rI   r   r   sort_frI   rI   rJ   r   U      z+SparkLikeLazyFrame.sort.<locals>.<listcomp>)r   lenr   rs   ro   sort)rH   r   r   r   
sort_funcs	sort_colsrI   rO   rJ   r   H  s   

zSparkLikeLazyFrame.sortr   Iterable[str]reversec                  sT   t |}t|t|} fdd|D }dd t||D }  jj| |S )Nc                 3  s$    | ]}|s
 j jn j jV  qd S rA   r   r   rO   rI   rJ   r   [  s    
z+SparkLikeLazyFrame.top_k.<locals>.<genexpr>c                 S  r   rI   rI   r   rI   rI   rJ   r   ^  r   z,SparkLikeLazyFrame.top_k.<locals>.<listcomp>)tupler   r   r   rs   ro   r   r   )rH   r   r   r   r   r   rI   rO   rJ   top_kX  s   
zSparkLikeLazyFrame.top_ksubsetSequence[str] | Nonec                 C  s$   |rt |nd }| | jj|dS )N)r   )r   rs   ro   dropna)rH   r   rI   rI   rJ   
drop_nullsa  s   zSparkLikeLazyFrame.drop_nullsmappingMapping[str, str]c                   s8    fddj D }jfdd| D S )Nc                   s   i | ]	}|  ||qS rI   )getr   colname)r  rI   rJ   r   f  s    z-SparkLikeLazyFrame.rename.<locals>.<dictcomp>c                   "   g | ]\}} j ||qS rI   rS   r   r   r   oldnewrO   rI   rJ   r   k     " z-SparkLikeLazyFrame.rename.<locals>.<listcomp>)r   rs   ro   r   rz   )rH   r  rename_mappingrI   )r  rH   rJ   renamee  s   
zSparkLikeLazyFrame.renamekeepr6   order_byc          
        s   |p j } | }r|td j dd} j|}|r/|dkr/|j fdd|D  }n|r>|j fdd|D  }n	| jd}|d	krU jd
	|}n j
 	|} j|| j| jdk|}	 |	S )N   
row_index_)prefixlastc                      g | ]} j |qS rI   )rS   r   r   xrO   rI   rJ   r   |  r   z-SparkLikeLazyFrame.unique.<locals>.<listcomp>c                   r  rI   )rS   r   r  rO   rI   rJ   r   ~  r      none*)r   _check_columns_existr   rY   partitionByorderByrS   litcountover
row_numberro   
withColumnr   r   r   rs   )
rH   r   r  r  subset_errortmp_namewindowexprrr   rI   rO   rJ   uniqueo  s$   

zSparkLikeLazyFrame.uniqueotherhowr5   left_onright_onsuffixc                  sP  j  |j }|d urt|ng |d urt|ng }|dkr'fdd|D n|}i tt| fdd|D |jfdd D   }	|dv ra|	fdd	|D  n|dkrl|		  fd
dD }
|dkrt
tfdd	t||
D n|dkrd n|}|dkrdn|}jj||d|	S )Nfullc                   s   g | ]}| vr|qS rI   rI   r   c)	right_on_rI   rJ   r     r   z+SparkLikeLazyFrame.join.<locals>.<listcomp>c                   s&   i | ]}|| v r|  n|qS rI   rI   r  )left_columnsr.  rI   rJ   r     s    z+SparkLikeLazyFrame.join.<locals>.<dictcomp>c                   r  rI   r	  r
  rO   rI   rJ   r     r  >   leftcrossinnerc                 3  s     | ]}|vr | V  qd S rA   rI   r  )r  r2  rI   rJ   r     s    z*SparkLikeLazyFrame.join.<locals>.<genexpr>c                   s   g | ]} | qS rI   rI   r0  )r  rI   rJ   r     s    c                 3  s*    | ]\}}t j|t  |kV  qd S rA   )getattrro   )r   left_key	right_key)other_nativerH   rI   rJ   r     s
    
r5  
full_outer)onr+  )r   r   r   zipro   r   rz   copyextendvaluesr   r   r   rs   join)rH   r*  r+  r,  r-  r.  right_columnsleft_on_right_cols_to_rename	col_orderright_on_remappedon_
how_nativerI   )r3  r:  r  r2  rH   r.  rJ   rA    sR   	
		zSparkLikeLazyFrame.joinc           	   	     s  j j} } D ]}|| }||jkrd| d}t|q
j}t dkr/d}t|j	 s9j
 rJjj fdd|D  S j r}dfdd}jj fdd|D  j| d j fdd|D  S d}t|)Nz-`explode` operation not supported for dtype `z`, expected List typer  zExploding on multiple columns is not supported with SparkLike backend since we cannot guarantee that the exploded columns have matching element counts.c                   <   g | ]}| d  krj ||nj ||qS r   )rS   r   r   explode_outerr   r   r   rH   rI   rJ   r         z.SparkLikeLazyFrame.explode.<locals>.<listcomp>r   r   r?   r"   c                   s    j |  j | dkB S r   )rS   isnull
array_size)r   rO   rI   rJ   null_condition  s   z2SparkLikeLazyFrame.explode.<locals>.null_conditionc                   rI  rJ  )rS   r   r   exploderL  rM  rI   rJ   r     rN  r   c                   s<   g | ]}| d  krj ||nj d|qS )r   N)rS   r   r   r  rL  rM  rI   rJ   r     rN  z[Unreachable code, please report an issue at https://github.com/narwhals-dev/narwhals/issues)r   r   r?   r"   )rD   dtypesrx   Listr   r   r   NotImplementedErrorrC   r   r   rs   ro   r   is_sqlframeunionr   AssertionError)	rH   r   rS  ry   col_to_explodedtyper   r   rQ  rI   rM  rJ   rR    sP   



zSparkLikeLazyFrame.exploder<  indexvariable_name
value_namec           	      C  s   | j  r|dkrd}t||dkrd}t|n	 |r!t|nd}|d u r3tt| jt|nt|}| jj||||d}|d u rJ|j	| }| 
|S )N z<`variable_name` cannot be empty string for sqlframe backend.z9`value_name` cannot be empty string for sqlframe backend.rI   )idsr@  variableColumnNamevalueColumnName)rC   rV  rU  r   setr   
differencero   unpivotr   rs   )	rH   r<  r[  r\  r]  r   r_  r@  unpivoted_native_framerI   rI   rJ   rd    s*   
&

zSparkLikeLazyFrame.unpivotr   c                 C  s`   |d u r
d}t || j | j| jdj| d |}| 	| j
j|g| jR  S )Nz;Cannot pass `order_by` to `with_row_index` for PySpark-liker  )	TypeErrorrS   r"  r!  rY   r  r  r  r   rs   ro   r   r   )rH   r   r  r   row_index_exprrI   rI   rJ   with_row_index2  s   
z!SparkLikeLazyFrame.with_row_indexfilestr | Path | BytesIOc                 C  s   | j j| d S rA   )ro   writeparquet)rH   ri  rI   rI   rJ   sink_parquet>  r   zSparkLikeLazyFrame.sink_parquetframesessionr/   c         	      C  sn   ddl m} |}|  o| dk}|r| }n|dr#| }n	t|jddd}| ||||ddS )	Nr   )	find_spec)r   r   r   pandasTi   )namedbuffer_size)r<   r=   r9   )	importlib.utilrp  rV  rN   to_arrow	to_pandasr   	iter_rowscreateDataFrame)	rb   rn  ro  r=   r<   rp  implis_spark_v4r^   rI   rI   rJ   _from_compliant_dataframeA  s   


z,SparkLikeLazyFrame._from_compliant_dataframeN)
r:   r;   r<   r1   r=   r   r9   r>   r?   r@   )r?   rL   )r?   rW   )rZ   r[   r?   r\   )r^   r;   r_   r2   r?   r&   )r?   rd   )r?   r!   )r?   r.   )r?   r&   )r<   r1   r?   r&   )rr   r;   r?   r&   )r?   rt   )r?   r   )r?   r   )r?   r   )r   r   r   r   r?   r)   )r   r   r?   r&   )r   r*   r?   r&   )r   r*   r?   r&   )r?   r   )r   r   r   r>   r?   r&   )r   r   r?   r&   )r   r   r   r>   r?   r,   )r   r   r   r   r   r>   r?   r&   )r   r   r   r   r   r   r?   r&   )r   r   r?   r&   )r  r  r?   r&   )r   r   r  r6   r  r   r?   r&   )r*  r&   r+  r5   r,  r   r-  r   r.  r   r?   r&   )r   r   r?   r&   )
r<  r   r[  r   r\  r   r]  r   r?   r&   )r   r   r  r   r?   r&   )ri  rj  r?   r@   )
rn  r)   ro  r/   r=   r   r<   r1   r?   r8   )0__name__
__module____qualname__rK   propertyrN   rS   rV   rY   staticmethodr]   classmethodrc   rh   rj   rl   rm   rp   rs   r   r   r   r   r   r   r   r   r   r   r   ry   rx   r   r   r   r   r   r  r  r)  rA  rR  rd  rh  rm  r{  r   	join_asofrI   rI   rI   rJ   r8   <   sh    












	
&















	




D
B
 

r8   )r*   r;   rd   )Q
__future__r   	functoolsr   operatorr   typingr   r   narwhals._exceptionsr   narwhals._nativer	   narwhals._spark_like.utilsr
   r   r   r   r   r   r   narwhals._sql.dataframer   narwhals._utilsr   r   r   r   r   r   r   r   narwhals.exceptionsr   collections.abcr   r   r   r   ior   pathlibr    rU   r!   rv   r   sqlframe.base.columnr"   sqlframe.base.dataframer#   rX   r%   typing_extensionsr&   r'   r(   narwhals._compliant.typingr)   narwhals._spark_like.exprr*   r   r,   rk   r.   r/   narwhals._typingr0   r1   r2   narwhals.dataframer3   narwhals.dtypesr4   narwhals.typingr5   r6   r;   r7   __annotations__r8   rI   rI   rI   rJ   <module>   sJ    $	(

