o
    $&]iO                     @  s  d dl mZ d dlmZ d dlmZ d dlmZmZ d dl	Z	d dl	m
Z
 d dlmZmZmZmZmZmZmZmZmZ d dlmZ d d	lmZmZmZmZmZmZmZmZ d d
l m!Z! d dl"m#Z# erd dl$m%Z%m&Z&m'Z'm(Z( d dl)m*Z* d dl+m,Z, d dl-m.Z. d dl/Z0d dl1Z2d dl	m3Z3 d dl4m5Z5m6Z6 d dl7m8Z8 d dl9m:Z: d dl;m<Z< d dl=m>Z> d dl?m@Z@ d dlmAZA d dlBmCZC d dlmDZD d dlEmFZF d dlGmHZH d dlImJZK d dlLmMZMmNZNmOZO G dd ded  eZPdS )!    )annotations)reduce)and_)TYPE_CHECKINGAnyN)StarExpression)	DeferredTimeZoneFcatch_duckdb_exceptioncolevaluate_exprs_and_aliasesjoin_column_nameslitnative_to_narwhals_dtypewindow_expression)SQLLazyFrame)ImplementationValidateBackendVersionVersionextend_boolgenerate_temporary_column_nameparse_columns_to_droprequires
zip_strict
get_duckdb)InvalidOperationError)IterableIteratorMappingSequence)BytesIO)Path)
ModuleType)
Expression)SelfTypeIs)CompliantDataFrameAny)
DuckDBExprDuckDBGroupByDuckDBNamespaceDuckDBInterchangeSeries)duckdb_dtypes)_EagerAllowedImpl)_LimitedContext)	LazyFrame)DType	DataFrame)AsofJoinStrategyJoinStrategyUniqueKeepStrategyc                   @  s  e Zd ZejZdddddZedddZe	dddZ
edddZddd Zdd!d"Zdd#d$Zdd&d'Zdd)d*Zdd.d/Zdd1d2Zdd7d8Zdd;d<Zdd>d?ZddBdCZddDdEZddIdJZdddMdNZddOdPZddRdSZeddUdVZeddXdYZdd[d\Zdd^d_Z dd`daZ!ddbdcZ"ddhdiZ#ddldmZ$ddudvZ%dd{d|Z&dd}d~Z'dddZ(dddZ)dddZ*dddZ+dddZ,dddZ-e./ddddZ0dddZ1dKS )DuckDBLazyFrameF)validate_backend_versiondfduckdb.DuckDBPyRelationversionr   r:   boolreturnNonec                C  s,   || _ || _d | _d | _|r|   d S d S N)_native_frame_version_cached_native_schema_cached_columns_validate_backend_version)selfr;   r=   r:    rH   X/var/www/html/IGF-ODF-V3/venv/lib/python3.10/site-packages/narwhals/_duckdb/dataframe.py__init__F   s   zDuckDBLazyFrame.__init__tuple[int, ...]c                 C  
   | j  S rA   )_implementation_backend_versionrG   rH   rH   rI   rN   T      
z DuckDBLazyFrame._backend_versionobjduckdb.DuckDBPyRelation | AnyTypeIs[duckdb.DuckDBPyRelation]c                 C  s   t | tjS rA   )
isinstanceduckdbDuckDBPyRelation)rQ   rH   rH   rI   
_is_nativeX   s   zDuckDBLazyFrame._is_nativedatacontextr1   r%   c               C  s   | ||j dS Nr=   )rC   )clsrX   rY   rH   rH   rI   from_native\   s   zDuckDBLazyFrame.from_nativeargsr   kwdsILazyFrame[duckdb.DuckDBPyRelation] | DataFrameV1[duckdb.DuckDBPyRelation]c                 O  s4   | j tju rddlm} || ddS | j j| ddS )Nr   r4   interchange)levellazy)rC   r   V1narwhals.stable.v1r5   	lazyframe)rG   r^   r_   DataFrameV1rH   rH   rI   to_narwhalsb   s   zDuckDBLazyFrame.to_narwhalsc                 C  s   | j tjurd}t|| S )Nz=__narwhals_dataframe__ is not implemented for DuckDBLazyFrame)rC   r   rd   AttributeError)rG   msgrH   rH   rI   __narwhals_dataframe__k   s   z&DuckDBLazyFrame.__narwhals_dataframe__c                 C  s   | S rA   rH   rO   rH   rH   rI   __narwhals_lazyframe__r   s   z&DuckDBLazyFrame.__narwhals_lazyframe__r#   c                 C  s   t  S rA   r   rO   rH   rH   rI   __native_namespace__u      z$DuckDBLazyFrame.__native_namespace__r,   c                 C  s   ddl m} || jdS )Nr   r+   r[   )narwhals._duckdb.namespacer,   rC   )rG   r,   rH   rH   rI   __narwhals_namespace__x   s   z&DuckDBLazyFrame.__narwhals_namespace__namestrr.   c                 C  s"   ddl m} || j|| jdS )Nr   r-   r[   )narwhals._duckdb.seriesr.   nativeselectrC   )rG   rq   r.   rH   rH   rI   
get_column}   s   zDuckDBLazyFrame.get_columnIterator[Expression]c                 c  s    | j D ]}t|V  qd S rA   )columnsr   )rG   rq   rH   rH   rI   _iter_columns   s   
zDuckDBLazyFrame._iter_columnsbackend_EagerAllowedImpl | Nonekwargsr'   c                 K  s   |d u s	|t ju r(ddlm} | jdk r| j }n| j }||d| jddS |t j	u rAddl
m} || j t j	d| jddS |t ju rWddlm} || j d| jd	S d
| }t|)Nr   )ArrowDataFrame)      T)r:   r=   validate_column_names)PandasLikeDataFrame)implementationr:   r=   r   )PolarsDataFrame)r:   r=   zUnsupported `backend` value: )r   PYARROWnarwhals._arrow.dataframer}   rN   rt   arrowfetch_arrow_tablerC   PANDASnarwhals._pandas_like.dataframer   r;   POLARSnarwhals._polars.dataframer   pl
ValueError)rG   rz   r|   r}   retr   r   rj   rH   rH   rI   collect   s6   




zDuckDBLazyFrame.collectnintc                 C  s   |  | j|S rA   )_with_nativert   limit)rG   r   rH   rH   rI   head      zDuckDBLazyFrame.headcolumn_namesc                 G  s   |  | jj| S rA   )r   rt   ru   )rG   r   rH   rH   rI   simple_select   r   zDuckDBLazyFrame.simple_selectexprsr(   c              
   G  sT   dd t | g|R  D }z
| | j|W S  ty) } zt|| d d }~ww )Nc                 S  s   g | ]	\}}| |qS rH   alias.0rq   valrH   rH   rI   
<listcomp>   s    z-DuckDBLazyFrame.aggregate.<locals>.<listcomp>)r   r   rt   	aggregate	Exceptionr
   rG   r   	selectionerH   rH   rI   r         zDuckDBLazyFrame.aggregatec              
   G  sT   dd t | g|R  D }z
| | jj| W S  ty) } zt|| d d }~ww )Nc                 s      | ]
\}}| |V  qd S rA   r   r   rH   rH   rI   	<genexpr>   s    
z)DuckDBLazyFrame.select.<locals>.<genexpr>)r   r   rt   ru   r   r
   r   rH   rH   rI   ru      r   zDuckDBLazyFrame.selectrx   Sequence[str]strictc                  s4   t | ||d  fdd| jD }| | jj| S )Nr   c                   s   g | ]
}| vrt |qS rH   r   r   rq   columns_to_droprH   rI   r          z(DuckDBLazyFrame.drop.<locals>.<listcomp>)r   rx   r   rt   ru   )rG   rx   r   r   rH   r   rI   drop   s   zDuckDBLazyFrame.dropN_c                 K  s   |d ur
d}t || S )Nz.`backend` argument is not supported for DuckDB)r   )rG   rz   r   rj   rH   rH   rI   rc      s   zDuckDBLazyFrame.lazyc              
     sz   t t| g|R    fdd| jD }|dd   D  z
| | jj| W S  ty< } zt	|| d d }~ww )Nc                   s,   g | ]}| v r  ||nt|qS rH   )popr   r   r   new_columns_maprH   rI   r      s    z0DuckDBLazyFrame.with_columns.<locals>.<listcomp>c                 s  r   rA   r   )r   rq   valuerH   rH   rI   r      s    z/DuckDBLazyFrame.with_columns.<locals>.<genexpr>)
dictr   rx   extenditemsr   rt   ru   r   r
   )rG   r   resultr   rH   r   rI   with_columns   s   
zDuckDBLazyFrame.with_columns	predicatec              
   C  sF   || d }z
|  | j|W S  ty" } zt|| |d }~ww )Nr   )r   rt   filterr   r
   )rG   r   maskr   rH   rH   rI   r      s   zDuckDBLazyFrame.filterdict[str, DType]c                   sL   j d u rttjjj_ tj  fddtjjjjD S )Nc                   s    i | ]\}}|t |j qS rH   )r   rC   )r   column_nameduckdb_dtypedeferred_time_zonerG   rH   rI   
<dictcomp>   s    z*DuckDBLazyFrame.schema.<locals>.<dictcomp>)rD   r   ziprx   rt   typesr   r   rO   rH   r   rI   schema   s   

zDuckDBLazyFrame.schema	list[str]c                 C  s.   | j d u r| jd urt| jn| jj| _ | j S rA   )rE   rD   listr   rt   rx   rO   rH   rH   rI   rx      s   


zDuckDBLazyFrame.columnspd.DataFramec                 C  rL   rA   )rt   r;   rO   rH   rH   rI   	to_pandas  rP   zDuckDBLazyFrame.to_pandaspa.Tablec                 C  s   |   tjjS rA   )rc   r   r   r   rt   rO   rH   rH   rI   to_arrow  s   zDuckDBLazyFrame.to_arrowc                 C  s   | j | j|dS rZ   )	__class__rt   )rG   r=   rH   rH   rI   _with_version     zDuckDBLazyFrame._with_versionc                 C  s   | j || jdS rZ   )r   rC   )rG   r;   rH   rH   rI   r     r   zDuckDBLazyFrame._with_nativekeys$Sequence[str] | Sequence[DuckDBExpr]drop_null_keysr*   c                C  s   ddl m} || ||dS )Nr   r)   )r   )narwhals._duckdb.group_byr*   )rG   r   r   r*   rH   rH   rI   group_by  s   zDuckDBLazyFrame.group_bymappingMapping[str, str]c                   s,   | j } fdd|jD }| | j j| S )Nc                 3  s2    | ]}| v rt | | nt |V  qd S rA   )r   r   r   r   rH   rI   r     s
     
z)DuckDBLazyFrame.rename.<locals>.<genexpr>)rt   rx   r   ru   )rG   r   r;   r   rH   r   rI   rename  s
   
zDuckDBLazyFrame.renameotherhowr7   left_onSequence[str] | Noneright_onsuffixc                C  s  |dkrdn|}|dkr)| j dk rd| j  }t|| jd|jd}n+|d us/J |d us5J dd	 t||D }	tt|	}
| jdj|jd|
|d
}|dv rdd | j	D }|j	D ]I}|| j	v }|dkr||s||
td| d qc|dks|r|d u s||vr|
td| d| |  qc|d u s||vr|
t| qc|j| | jj}n
|d| jj}| |S )Nfulloutercross)r~   r~   r   z;'duckdb>=1.1.4' is required for cross-join, found version: lhsrhsc                 s  4    | ]\}}t d | dt d| dkV  qdS lhs.""rhs."Nr   r   leftrightrH   rH   rI   r   5  
    
z'DuckDBLazyFrame.join.<locals>.<genexpr>)	conditionr   >   r   r   innerr   c                 S  s   g | ]
}t d | dqS )r   r   r   )r   xrH   rH   rI   r   B  r   z(DuckDBLazyFrame.join.<locals>.<listcomp>r   r   lhs.*)rN   NotImplementedErrorrt   	set_aliasr   r   r   r   joinrx   appendr   r   ru   r   )rG   r   r   r   r   r   
native_howrj   relitr   ru   rq   
col_in_lhsresrH   rH   rI   r   !  sD   	




&
zDuckDBLazyFrame.joinby_leftby_rightstrategyr6   c             	   C  sZ  | j }|j }	g }
|d ur|d ur|
dd t||D  ng  }}|dkr:|
td| dtd| dk n|dkrR|
td| dtd| dk nd}t|tt|
}d	g}|	jD ]5}||jv r|d u su||h|vr|d| d
| | d qc|d u s||h|vr|t	t| qcdd
| d| d}| t|S )Nc                 s  r   r   r   r   rH   rH   rI   r   b  r   z,DuckDBLazyFrame.join_asof.<locals>.<genexpr>backwardr   r   r   forwardzKOnly 'backward' and 'forward' strategies are currently supported for DuckDBr   z" as "z
            SELECT ,zD
            FROM lhs
            ASOF LEFT JOIN rhs
            ON 
            )rt   r   r   r   r   r   r   r   rx   rr   r   r   rU   sql)rG   r   r   r   r   r   r   r   r   r   
conditionsrj   r   ru   rq   queryrH   rH   rI   	join_asofS  s:   

((


zDuckDBLazyFrame.join_asofc                 C  s   | j S rA   )r   rO   rH   rH   rI   collect_schema  rn   zDuckDBLazyFrame.collect_schemasubsetkeepr8   order_byc          	      C  s   |p| j }| | }r|td| j dd}|r#|dkr#tdt|nd }|dkr9ttdt ||p3d||d	}nttd
||pAd||d	}| | j	
t ||t|tdkj|gddS )N   
row_index_)prefixlastTnonecountrH   )
descending
nulls_last
row_numberr~   Fr   )rx   _check_columns_existr   r   lenr   r	   r   r   rt   ru   r   r   r   r   r   )	rG   r  r  r  subset_errortmp_nameflagsexprrH   rH   rI   unique  s6   

zDuckDBLazyFrame.uniquebyr  bool | Sequence[bool]r  c                G  sN   t |t|}|rdd t||D }n
dd t||D }| | jj| S )Nc                 s  4    | ]\}}|st | nt |  V  qd S rA   )r   r  descr   rq   r  rH   rH   rI   r     r   z'DuckDBLazyFrame.sort.<locals>.<genexpr>c                 s  r  rA   )r   nulls_firstr  r  rH   rH   rI   r     r   )r   r  r   r   rt   sort)rG   r  r  r  r   rH   rH   rI   r    s   zDuckDBLazyFrame.sortkIterable[str]reversec          	      C  s   | j }t|}t|trt| t|}n	tdd |D }ttd||tdt|d}|t	|k}d| d}| 
t|S )Nc                 s  s    | ]}| V  qd S rA   rH   )r   revrH   rH   rI   r     s    z(DuckDBLazyFrame.top_k.<locals>.<genexpr>r  T)r  r  r  z@
            SELECT *
            FROM _rel
            QUALIFY z	
        )rt   r   rT   r>   r   r  tupler   r	   r   r   rU   r   )	rG   r   r  r"  _relr  r  r   r  rH   rH   rI   top_k  s    
zDuckDBLazyFrame.top_kc                 C  s8   |d ur|n| j }ttdd |D }| | j|S )Nc                 s  s    | ]	}t | V  qd S rA   )r   	isnotnullr   rH   rH   rI   r     s    z-DuckDBLazyFrame.drop_nulls.<locals>.<genexpr>)rx   r   r   r   rt   r   )rG   r  r  keep_conditionrH   rH   rI   
drop_nulls  s   zDuckDBLazyFrame.drop_nullsc                   s   | j j}|  }D ]}|| }||jkrd| d}t|q
tdkr,d}t|td  | j}| j	}t
d}	  td @ |	k}
||
j fdd|D  }||
 jfd	d|D  }| ||S )
Nz-`explode` operation not supported for dtype `z`, expected List typer~   zExploding on multiple columns is not supported with DuckDB backend since we cannot guarantee that the exploded columns have matching element counts.r   r  c                 3  s,    | ]}|v rt d  |n|V  qdS )unnestN)r	   r   r   col_to_exploderx   rH   rI   r     s
    
z*DuckDBLazyFrame.explode.<locals>.<genexpr>c                 3  s*    | ]}| v rt d |n|V  qd S rA   )r   r   r   )rx   rH   rI   r     s
    
)rC   dtypesr  Listr   r  r   r   rt   rx   r   r'  r	   r   ru   r   union)rG   rx   r-  r   rq   dtyperj   r   original_columnszeronot_null_conditionnon_null_relnull_relrH   r+  rI   explode  s:   



zDuckDBLazyFrame.explodeonindexvariable_name
value_namec           
        s   |d u rg n| |d u r fdd| j D n|}|dkr"d}t||dkr,d}t|t| }| j}d| dt| dt| d	}	| t|	jg  || S )
Nc                   s   g | ]}| vr|qS rH   rH   )r   cindex_rH   rI   r      s    z+DuckDBLazyFrame.unpivot.<locals>.<listcomp> z:`variable_name` cannot be empty string for duckdb backend.z7`value_name` cannot be empty string for duckdb backend.z)
            unpivot _rel
            on z'
            into
                name z
                value r   )	rx   r   r   rt   r   r   rU   r   ru   )
rG   r7  r8  r9  r:  on_rj   
unpivot_onr%  r  rH   r<  rI   unpivot  s(    zDuckDBLazyFrame.unpivot)r~      c                 C  sH   |d u r
d}t |ttd|dtd |}| | j|t S )Nz5Cannot pass `order_by` to `with_row_index` for DuckDBr  )r  r~   )		TypeErrorr   r	   r   r   r   rt   ru   r   )rG   rq   r  rj   r  rH   rH   rI   with_row_index  s   zDuckDBLazyFrame.with_row_indexfilestr | Path | BytesIOc                 C  s    | j }d| d}t| d S )Nz7
            COPY (SELECT * FROM _rel)
            TO 'z+'
            (FORMAT parquet)
            )rt   rU   r   )rG   rE  r%  r  rH   rH   rI   sink_parquet#  s
   zDuckDBLazyFrame.sink_parquet)r;   r<   r=   r   r:   r>   r?   r@   )r?   rK   )rQ   rR   r?   rS   )rX   r<   rY   r1   r?   r%   )r^   r   r_   r   r?   r`   )r?   r%   )r?   r#   )r?   r,   )rq   rr   r?   r.   )r?   rw   )rz   r{   r|   r   r?   r'   )r   r   r?   r%   )r   rr   r?   r%   )r   r(   r?   r%   )rx   r   r   r>   r?   r%   rA   )rz   r@   r   r@   r?   r%   )r   r(   r?   r%   )r?   r   )r?   r   )r?   r   )r?   r   )r=   r   r?   r%   )r;   r<   r?   r%   )r   r   r   r>   r?   r*   )r   r   r?   r%   )r   r%   r   r7   r   r   r   r   r   rr   r?   r%   )r   r%   r   rr   r   rr   r   r   r   r   r   r6   r   rr   r?   r%   )r  r   r  r8   r  r   r?   r%   )r  rr   r  r  r  r>   r?   r%   )r   r   r  r!  r"  r  r?   r%   )r  r   r?   r%   )rx   r   r?   r%   )
r7  r   r8  r   r9  rr   r:  rr   r?   r%   )rq   rr   r  r   r?   r%   )rE  rF  r?   r@   )2__name__
__module____qualname__r   DUCKDBrM   rJ   propertyrN   staticmethodrW   classmethodr]   rh   rk   rl   rm   rp   rv   ry   r   r   r   r   ru   r   rc   r   r   r   rx   r   r   r   r   r   r   r   r  r  r  r  r&  r)  r6  rA  r   backend_versionrD  rG  rH   rH   rH   rI   r9   <   s`    

	






&


	
	



	






2
/

"



)!	r9   )r(   r<   r`   )Q
__future__r   	functoolsr   operatorr   typingr   r   rU   r   narwhals._duckdb.utilsr   r	   r
   r   r   r   r   r   r   narwhals._sql.dataframer   narwhals._utilsr   r   r   r   r   r   r   r   narwhals.dependenciesr   narwhals.exceptionsr   collections.abcr   r   r   r    ior!   pathlibr"   r   r#   pandaspdpyarrowpar$   typing_extensionsr%   r&   narwhals._compliant.typingr'   narwhals._duckdb.exprr(   r   r*   ro   r,   rs   r.   r/   narwhals._typingr0   r1   narwhals.dataframer2   narwhals.dtypesr3   re   r5   rg   narwhals.typingr6   r7   r8   r9   rH   rH   rH   rI   <module>   sL    ,(

