o
    +&]iI                  
   @  sX  U d Z ddlmZ ddlmZ ddlmZ ddlmZ ddl	m
Z
 ddlmZ ddlmZ dd	lmZmZmZmZmZmZmZ dd
lmZ ddlmZ ddlmZmZ ddl m!Z! ddl"m#Z# ddl$m%Z%m&Z&m'Z'm(Z( ddl)m*Z* ddl+m,Z,m-Z- ddl.m/Z/m0Z0 er|ddl1Z1ddlm2Z2m3Z3 ddlm4Z4 ddl5Z6ddl7Z8ddl9Z:ddl.m;Z;m<Z<m=Z=m>Z> ddl?m@Z@mAZAmBZB ddlCmDZD e1jEdkrddlmFZFmGZG nddlHmFZFmGZG e1jEdkrddlmIZI nddlHmIZI e1jEdkrddlmJZJ nddlHmJZJ e1jEdkrdd lmKZK ndd lHmKZK ed! ZLd"eMd#< ed$ ZNd"eMd%< ed& ZOd"eMd'< eeNd(f ZPd"eMd)< eeLePeOf ZQd"eMd*< ed+ ZRd"eMd,< ed- ZSd"eMd.< ed/ ZTd"eMd0< ed1 ZUd"eMd2< ed3 ZVd"eMd4< eeLeNeOeReSeTeUeVf ZWd"eMd5< eGd6eLeNeOeReSeTeUeV	ZXeed7 d8f ZYd"eMd9< 	 G d:d; d;eee/f ZZddAdBZ[G dCdD dDeZee/f Z\e	EddEdEdFddPdQZ]edEdEdFddTdQZ]	Uddej^j_dFddWdQZ]dXdYdd]d^Z`edd`daZaeddcdaZaeddedaZaddfdaZaddjdkZbddmdnZcddsdtZdddwdxZeddzd{Zfdd}d~ZgedddZhedddZhedddZhdddZhdddZidS )a  
Backend for ``alt.datasets.Loader``.

Notes
-----
Extending would be more ergonomic if `read`, `scan`, `_constraints` were available under a single export::

    from altair.datasets import ext, reader
    import polars as pl

    impls = (
        ext.read(pl.read_parquet, ext.is_parquet),
        ext.read(pl.read_csv, ext.is_csv),
        ext.read(pl.read_json, ext.is_json),
    )
    user_reader = reader(impls)
    user_reader.dataset("airports")
    )annotations)Counter)Mapping)import_module)	find_spec)chain)Path)TYPE_CHECKINGAnyClassVarGenericLiteralcastoverload)build_opener)v1)IntoDataFrameTIntoExpr)Requirement)	_readimpl)CsvCacheDatasetCacheSchemaCache_iter_metadata)
is_parquet)AltairDatasetsErrormodule_not_found)
IntoFrameTis_availableN)CallableSequence)OpenerDirector)BaseImplRReadScan)Dataset	ExtensionMetadata)OneOrSeq)      )TypeIsTypeVar)r*      )Unpack)r*      )LiteralString)r*   
   )	TypeAliaspolarsr3   _Polarspandas_Pandaspyarrow_PyArrowpandas[pyarrow]
_PandasAny_Backendcudf_CuDFdask_Daskduckdb_DuckDBibis_Ibispyspark_PySpark
_NwSupport_NwSupportT)	supportedunsupportedSequence[Dataset]_SupportProfilec                   @  s4  e Zd ZU dZded< 	 ded< 	 ded< 	 ded	< 	 e Zd
ed< eej	d d Z
ded< dZddZd[ddZd\ddZd]d!d"Zed^d$d%Zd_d(d)Z	*d`dad1d2Z	*d`dbd3d4Zdcd6d7Zed8d9ddd=d>Zeded@d>ZdAd9dfdDd>Z	*d`dgdFdGZdhdJdKZedidMdNZdjdSdTZdkdXdYZd*S )lReaderz
    Modular file reader, targeting remote & local tabular resources.

    .. warning::
        Use ``reader(...)`` instead of instantiating ``Reader`` directly.
    Sequence[Read[IntoDataFrameT]]_readSequence[Scan[IntoFrameT]]_scanstr_namenw.Implementation_implementationzClassVar[OpenerDirector]_opener	_metadatazmetadata.parquetzClassVar[Path]_metadata_pathreadscannameimplementationreturnNonec                 C  s(   || _ || _|| _|| _t|d| _d S )N)r\   )rO   rQ   rS   rU   r   _schema_cache)selfrY   rZ   r[   r\    ra   U/var/www/html/IGF-ODF-V3/venv/lib/python3.10/site-packages/altair/datasets/_reader.py__init__   s
   zReader.__init__c                 C  s|   ddl m} d}d}d||dd | jD | }| jr1|d||d	d | jD | 7 }d
| j d| jd| S )Nr   )indentz    
zread
c                 s      | ]}t |V  qd S NrR   .0elra   ra   rb   	<genexpr>       z"Reader.__repr__.<locals>.<genexpr>z
scan
c                 s  rf   rg   rh   ri   ra   ra   rb   rl      rm   zReader[z] )textwraprd   joinrO   rQ   rS   rU   )r`   rd   PREFIXNLbodyra   ra   rb   __repr__   s   "&zReader.__repr__metar(   Callable[..., IntoDataFrameT]c                C  s   |  || jS rg   )_solverO   r`   rt   ra   ra   rb   read_fn   s   zReader.read_fnMetadata | Path | strCallable[..., IntoFrameT]c                C  s(   t |tr|ndt|i}| || jS )Nsuffix)
isinstancer   _into_suffixrv   rQ   rw   ra   ra   rb   scan_fn   s   zReader.scan_fnr   c                 C  s   t | S rg   )r   r`   ra   ra   rb   cache   s   zReader.cachee	Exceptionc                 C  sH   dt |v r"dt t|jv r"d| d| d| d| }t|||)zuHandle PyArrow date parsing errors with informative error messages, see https://github.com/apache/arrow/issues/41488.zCSV conversion error to dater8   z-PyArrow cannot parse date format in dataset 'z'. This is a known limitation of PyArrow's CSV reader for non-ISO date formats.

Alternatives:
1. Use a different backend: data.z(engine='pandas') or data.zV(engine='polars')
2. Convert dates manually after loading as strings

Original error: )rR   type
__module__r   )r`   r   r[   messagera   ra   rb   _handle_pyarrow_date_error   s   
z!Reader._handle_pyarrow_date_errorNDataset | LiteralStringr{   Extension | Nonekwdsr
   r   c                K  s   |  ||}tt|}| |}| ||}| j r?| j|}z	||fi |W S  ty> }	 z| 	|	|  d }	~	ww | j
|d %}
z||
fi |W W  d    S  tyk }	 z| 	|	|  d }	~	ww 1 sow   Y  d S )Nurl)_querynextr   rx   _merge_kwdsr   	is_active_maybe_downloadr   r   rV   open)r`   r[   r{   r   framert   fnfn_kwdsfpr   fra   ra   rb   dataset   s.   

zReader.datasetc                C  sl   |  ||}tt|}t| rtdst||d }t|t	r&|S dt
|jd|d}t|)N
vegafusionr   zExpected 'str' but got z
from .)r   r   r   r   itemsr   r   from_urlr|   rR   r   __name__	TypeError)r`   r[   r{   r   rt   r   msgra   ra   rb   r      s   

z
Reader.urlr&   c                C  sX   ddl }ddlm} | |dddd}|d }d| d	| }|| dS )
a  
        Learn more about a dataset, opening `vega-datasets/datapackage.md`_ with the default browser.

        Additional info *may* include: `description`_, `schema`_, `sources`_, `licenses`_.

        .. _vega-datasets/datapackage.md:
            https://github.com/vega/vega-datasets/blob/main/datapackage.md
        .. _description:
            https://datapackage.org/standard/data-resource/#description
        .. _schema:
            https://datapackage.org/standard/table-schema/#schema
        .. _sources:
            https://datapackage.org/standard/data-package/#sources
        .. _licenses:
            https://datapackage.org/standard/data-package/#licenses
        r   N)VERSIONS	file_namer    zvega-datasetsz,https://github.com/vega/vega-datasets/blob/vz/datapackage.md#)
webbrowseraltair.utilsr   r   
get_columnitemreplacer   )r`   r[   r   r   reftagr   ra   ra   rb   open_markdown   s   zReader.open_markdown.)showr   Literal[False]rL   c                C     d S rg   ra   r`   r   ra   ra   rb   profile     zReader.profileLiteral[True]c                C  r   rg   ra   r   ra   ra   rb   r     r   Fbool_SupportProfile | Nonec                C  s   t tdd | jD }|  jdg|R  }tdd | jD }t|| t||d}|r?ddl	}|j	|dd	d
 dS |S )z
        Describe which datasets can be loaded as tabular data.

        Parameters
        ----------
        show
            Print a densely formatted repr *instead of* returning a mapping.
        c                 s      | ]}|j V  qd S rg   )_relevant_columnsrj   implra   ra   rb   rl         z!Reader.profile.<locals>.<genexpr>dataset_namec                 s  r   rg   )_include_exprr   ra   ra   rb   rl   !  r   )rJ   rI   r   NTF)compact
sort_dicts)
setr   from_iterablerO   _scan_metadataselectnwany_horizontal_dataset_namespprint)r`   r   relevant_columnsr   inc_exprresultr   ra   ra   rb   r     s   	
nw.DataFrame[IntoDataFrameT]c                C  s<   t ||}| jdi | }| s|S d|}t|)a  
        Query a tabular version of `vega-datasets/datapackage.json`_.

        Applies a filter, erroring out when no results would be returned.

        .. _vega-datasets/datapackage.json:
            https://github.com/vega/vega-datasets/blob/main/datapackage.json
        zFound no results for:
    Nra   )_into_constraintsr   collectis_empty
ValueError)r`   r[   r{   constraintsr   r   ra   ra   rb   r   -  s   

zReader._querydict[str, Any]Mapping[str, Any]c                C  s.   | j  r| j | }r|r||B n|}|S )z
        Extend user-provided arguments with dataset & library-specfic defaults.

        .. important:: User-provided arguments have a higher precedence.
        )r_   r   schema_kwds)r`   rt   r   schemara   ra   rb   r   @  s
   
zReader._merge_kwdsnw.LazyFrame[IntoFrameT]c                 C  s   | j }t| || S rg   )rX   r   from_nativer~   lazy)r`   r   ra   ra   rb   _metadata_frameL  s   zReader._metadata_frame
predicatesOneOrSeq[IntoExpr]r   Unpack[Metadata]c                 O  s    |s|r| j j|i |S | j S rg   )r   filter)r`   r   r   ra   ra   rb   r   Q  s   zReader._scan_metadataimplsSequence[BaseImpl[R]]Callable[..., R]c                  sR   |    fdd|D }t|d }r"t|r ||| j|S t|| j)z
        Return the first function that satisfies dataset constraints.

        See Also
        --------
        ``altair.datasets._readimpl.BaseImpl.unwrap_or_skip``
        c                 3  s"    | ]}|   rV  qd S rg   )unwrap_or_skipr   r   somera   rb   rl   c  s     z Reader._solve.<locals>.<genexpr>N)r   r   _is_errfrom_tabularrS   r   )r`   rt   r   it	fn_or_errra   r   rb   rv   X  s   
zReader._solve)
rY   rN   rZ   rP   r[   rR   r\   rT   r]   r^   r]   rR   )rt   r(   r]   ru   )rt   ry   r]   rz   )r]   r   )r   r   r[   rR   r]   r^   rg   )r[   r   r{   r   r   r
   r]   r   )r[   r   r{   r   r]   rR   )r[   r&   r]   r^   )r   r   r]   rL   )r   r   r]   r^   )r   r   r]   r   )r[   r   r{   r   r]   r   )rt   r(   r   r   r]   r   r]   r   )r   r   r   r   r]   r   )rt   r(   r   r   r]   r   )r   r   __qualname____doc____annotations___build_openerrV   r   __file__parentrX   rc   rs   rx   r~   propertyr   r   r   r   r   r   r   r   r   r   r   rv   ra   ra   ra   rb   rM   t   sJ   
 









rM   r   nw.LazyFramer   r   r]   c                 G  s   | j | d d S )Nr   )r   r   r   r   to_list)r   r   ra   ra   rb   r   k  s   r   c                      s:   e Zd Zd fddZedddZedd	d
Z  ZS )_NoParquetReaderr]   rR   c                   s   t    d| jS )Nz
csv_cache
    )superrs   	csv_cacher   	__class__ra   rb   rs   y  s   z_NoParquetReader.__repr__r   c                 C  s   t | ds	t | _| jS )N
_csv_cache)hasattrr   r   r   ra   ra   rb   r   |  s   
z_NoParquetReader.csv_cacher   c                 C  s,   t d| jj}| j}ttj||d S )Nr   )backend)r   r   rotatedrU   r   maybe_convert_dtypes	from_dictr   )r`   datar   ra   ra   rb   r     s   z _NoParquetReader._metadata_framer   )r]   r   r   )r   r   r   rs   r   r   r   __classcell__ra   ra   r   rb   r   x  s    r   .r[   r\   read_fnsrN   scan_fns	tuple[()]r[   
str | Noner\   rT   4Reader[IntoDataFrameT, nw.LazyFrame[IntoDataFrameT]]c                C  r   rg   ra   r   r   r[   r\   ra   ra   rb   reader     r  rP   "Reader[IntoDataFrameT, IntoFrameT]c                C  r   rg   ra   r   ra   ra   rb   r    r  ra   YReader[IntoDataFrameT, IntoFrameT] | Reader[IntoDataFrameT, nw.LazyFrame[IntoDataFrameT]]c                C  sz   |pt dd | D dd d }|tjju rtt|}|r't| |||S t|  }r4t| |||S t	t
 | d||S )Nc                 s  r   rg   )_inferred_packageri   ra   ra   rb   rl     r   zreader.<locals>.<genexpr>   r   ra   )r   most_commonr   ImplementationUNKNOWN_into_implementationr   rM   _steal_eager_parquetr   r   )r   r   r[   r\   stolenra   ra   rb   r    s   $
)r4   r:   r6   r8   )priorityr  Sequence[_Backend]Reader[Any, Any]c                 C  s*   dd | D }t |d }r|S t| )a  
    Return the first available reader in order of `priority`.

    Notes
    -----
    - ``"polars"``: can natively load every dataset (including ``(Geo|Topo)JSON``)
    - ``"pandas[pyarrow]"``: can load *most* datasets, guarantees ``.parquet`` support
    - ``"pandas"``: supports ``.parquet``, if `fastparquet`_ is installed
    - ``"pyarrow"``: least reliable

    .. _fastparquet:
        https://github.com/dask/fastparquet
    c                 s  s$    | ]}t t|rt|V  qd S rg   )r   _requirements_from_backend)rj   r[   ra   ra   rb   rl     s   " z infer_backend.<locals>.<genexpr>N)r   r   from_priority)r  r   r  ra   ra   rb   infer_backend  s   
r  "Reader[pl.DataFrame, pl.LazyFrame]c                C  r   rg   ra   r[   ra   ra   rb   r    r   r  "Reader[pd.DataFrame, pd.DataFrame]c                C  r   rg   ra   r  ra   ra   rb   r    r   Reader[pa.Table, pa.Table]c                C  r   rg   ra   r  ra   ra   rb   r    r   c                C  s   t | sd| }t|t| }| dkr#t \}}t||| |dS | dkr0tt | |dS | dkr=tt | |dS | dkrJtt | |dS dS )zu
    Reader initialization dispatcher.

    FIXME: Works, but defining these in mixed shape functions seems off.
    zUnknown backend r4   r   r:   r6   r8   N)	_is_backendr   r
  r   pl_onlyr  
pd_pyarrowpd_onlypa_any)r[   r   r\   rdscra   ra   rb   r    s   
objr
   TypeIs[_Backend]c                 C  s   | dv S )N>   r6   r4   r8   r:   ra   r  ra   ra   rb   r       r  !TypeIs[type[AltairDatasetsError]]c                 C  s   | t u S rg   )r   r!  ra   ra   rb   r     r"  r   r   r{   r   r(   c                C  sx   i }d| v r| |d< |S |du r| |d< |S | dr"| |d}|S ddlm} ddlm} d	||d
|}t|)z.Transform args into a mapping to column names.r   r   Nr   )r   r{   r   )get_args)r'   zExpected 'suffix' to be one of z,
but got: )
startswithtypingr$  altair.datasets._typingr'   r   )r[   r{   mr$  r'   r   ra   ra   rb   r     s"   

r   r   %_NwSupport | _PandasAny | Requirementc                C  s8   t | }tj|}|tjjur|S d|d}t|)NzPackage z  is not supported by `narwhals`.)_import_guardedr   r  from_backendr	  r   )r   primaryr   r   ra   ra   rb   r
    s   r
  
Path | strc                C  s6   t | tr| jS t | tr| S dt| j}t|)NzUnexpected type )r|   r   r{   rR   r   r   r   )r  r   ra   ra   rb   r}     s   

r}   3Sequence[Scan[nw.LazyFrame[IntoDataFrameT]]] | Nonec                C  s(   t dd | D d  }rt|fS d S )Nc                 s  s    | ]
}|j tkr|V  qd S rg   )includer   )rj   r  ra   ra   rb   rl     s    z'_steal_eager_parquet.<locals>.<genexpr>)r   r   	into_scan)r   convertablera   ra   rb   r    s   r  reqc                C  r   rg   ra   r2  ra   ra   rb   r*  $  r   r*  c                C  r   rg   ra   r3  ra   ra   rb   r*  (  r   r   r1   c                C  r   rg   ra   r3  ra   ra   rb   r*  ,  r   c                C  sB   t | }|D ]}t| }rt|j qtt| ||d|d S )N)missingr   )r  r   r   r[   r   rR   )r2  requiresr[   specra   ra   rb   r*  0  s   Requirement | strtuple[Any, ...]c                C  s&   t | tr	t| n| } | jg| jR S rg   )r|   rR   r   r[   extrasr3  ra   ra   rb   r  :  s   r  )r   r   r   r   r]   rK   ).)
r   rN   r   r   r[   r   r\   rT   r]   r   )
r   rN   r   rP   r[   r   r\   rT   r]   r  )ra   )
r   rN   r   rP   r[   r   r\   rT   r]   r  )r  r  r]   r  )r[   r5   r]   r  )r[   r;   r]   r  )r[   r9   r]   r  )r[   r<   r]   r  )r  r
   r]   r   )r  r
   r]   r#  )r[   r   r{   r   r]   r(   )r   r)  r]   rT   )r  r-  r]   r
   )r   rN   r]   r.  )r2  r;   r]   r7   )r2  rH   r]   rH   )r2  r   r]   r1   )r2  r
   r]   r1   )r2  r7  r]   r8  )jr   
__future__r   collectionsr   collections.abcr   	importlibr   importlib.utilr   	itertoolsr   pathlibr   r&  r	   r
   r   r   r   r   r   urllib.requestr   r   narwhals.stabler   r   narwhals.stable.v1.typingr   r   packaging.requirementsr   altair.datasetsr   altair.datasets._cacher   r   r   r   altair.datasets._constraintsr   altair.datasets._exceptionsr   r   altair.datasets._readimplr   r   sysr   r    r!   r6   pdr4   plr8   par"   r#   r$   r%   r'  r&   r'   r(   !altair.vegalite.v6.schema._typingr)   version_infor,   r-   typing_extensionsr/   r1   r3   r5   r   r7   r9   r;   r<   r>   r@   rB   rD   rF   rG   rH   rL   rM   r   r   r  r  r	  r  r  r  r   r   r
  r}   r  r*  r  ra   ra   ra   rb   <module>   s    $



 
x	









