You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
60 lines
1.4 KiB
60 lines
1.4 KiB
6 years ago
|
from __future__ import print_function, division, absolute_import
|
||
|
|
||
|
import posixpath
|
||
|
|
||
|
from .glob import generic_glob
|
||
|
from ..base import tokenize
|
||
|
|
||
|
import pyarrow as pa
|
||
|
|
||
|
|
||
|
class HDFS3Wrapper(pa.filesystem.DaskFileSystem):
|
||
|
"""Wrapper around `hdfs3.HDFileSystem` that allows it to be passed to
|
||
|
pyarrow methods"""
|
||
|
def isdir(self, path):
|
||
|
return self.fs.isdir(path)
|
||
|
|
||
|
def isfile(self, path):
|
||
|
return self.fs.isfile(path)
|
||
|
|
||
|
|
||
|
_MIN_PYARROW_VERSION_SUPPORTED = '0.8.1.dev81'
|
||
|
|
||
|
|
||
|
def update_hdfs_options(options):
|
||
|
username = options.pop('username', None)
|
||
|
if username is not None:
|
||
|
options['user'] = username
|
||
|
return options
|
||
|
|
||
|
|
||
|
class PyArrowHadoopFileSystem(object):
|
||
|
sep = "/"
|
||
|
|
||
|
def __init__(self, **kwargs):
|
||
|
self.fs = pa.hdfs.HadoopFileSystem(**update_hdfs_options(kwargs))
|
||
|
|
||
|
@classmethod
|
||
|
def from_pyarrow(cls, fs):
|
||
|
out = object.__new__(cls)
|
||
|
out.fs = fs
|
||
|
return out
|
||
|
|
||
|
def open(self, path, mode='rb', **kwargs):
|
||
|
return self.fs.open(path, mode=mode, **kwargs)
|
||
|
|
||
|
def glob(self, path):
|
||
|
return sorted(generic_glob(self.fs, posixpath, path))
|
||
|
|
||
|
def mkdirs(self, path):
|
||
|
return self.fs.mkdir(path, create_parents=True)
|
||
|
|
||
|
def ukey(self, path):
|
||
|
return tokenize(path, self.fs.info(path)['last_modified'])
|
||
|
|
||
|
def size(self, path):
|
||
|
return self.fs.info(path)['size']
|
||
|
|
||
|
def _get_pyarrow_filesystem(self):
|
||
|
return self.fs
|