You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
ORPA-pyOpenRPA/WPy32-3720/python-3.7.2/Lib/site-packages/dask/bytes/pyarrow.py

60 lines
1.4 KiB

6 years ago
from __future__ import print_function, division, absolute_import
import posixpath
from .glob import generic_glob
from ..base import tokenize
import pyarrow as pa
class HDFS3Wrapper(pa.filesystem.DaskFileSystem):
"""Wrapper around `hdfs3.HDFileSystem` that allows it to be passed to
pyarrow methods"""
def isdir(self, path):
return self.fs.isdir(path)
def isfile(self, path):
return self.fs.isfile(path)
_MIN_PYARROW_VERSION_SUPPORTED = '0.8.1.dev81'
def update_hdfs_options(options):
username = options.pop('username', None)
if username is not None:
options['user'] = username
return options
class PyArrowHadoopFileSystem(object):
sep = "/"
def __init__(self, **kwargs):
self.fs = pa.hdfs.HadoopFileSystem(**update_hdfs_options(kwargs))
@classmethod
def from_pyarrow(cls, fs):
out = object.__new__(cls)
out.fs = fs
return out
def open(self, path, mode='rb', **kwargs):
return self.fs.open(path, mode=mode, **kwargs)
def glob(self, path):
return sorted(generic_glob(self.fs, posixpath, path))
def mkdirs(self, path):
return self.fs.mkdir(path, create_parents=True)
def ukey(self, path):
return tokenize(path, self.fs.info(path)['last_modified'])
def size(self, path):
return self.fs.info(path)['size']
def _get_pyarrow_filesystem(self):
return self.fs