# The glob functionality here copies (and heavily modifies) code from the # `glob` module to allow for use with arrow's hdfs. These functions are subject # to the license found at https://docs.python.org/3/license.html, which is also # included below: # # PSF LICENSE AGREEMENT FOR PYTHON 3.6.4 # ====================================== # # 1. This LICENSE AGREEMENT is between the Python Software Foundation ("PSF"), # and the Individual or Organization ("Licensee") accessing and otherwise # using Python 3.6.4 software in source or binary form and its associated # documentation. # # 2. Subject to the terms and conditions of this License Agreement, PSF hereby # grants Licensee a nonexclusive, royalty-free, world-wide license to # reproduce, analyze, test, perform and/or display publicly, prepare # derivative works, distribute, and otherwise use Python 3.6.4 alone or in # any derivative version, provided, however, that PSF's License Agreement # and PSF's notice of copyright, i.e., "Copyright c 2001-2016 Python # Software Foundation; All Rights Reserved" are retained in Python 3.6.4 # alone or in any derivative version prepared by Licensee. # # 3. In the event Licensee prepares a derivative work that is based on or # incorporates Python 3.6.4 or any part thereof, and wants to make the # derivative work available to others as provided herein, then Licensee # hereby agrees to include in any such work a brief summary of the changes # made to Python 3.6.4. # # 4. PSF is making Python 3.6.4 available to Licensee on an "AS IS" basis. PSF # MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED. BY WAY OF # EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND DISCLAIMS ANY REPRESENTATION # OR WARRANTY OF MERCHANTABILITY OR FITNESS FOR ANY PARTICULAR PURPOSE OR # THAT THE USE OF PYTHON 3.6.4 WILL NOT INFRINGE ANY THIRD PARTY RIGHTS. # # 5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON 3.6.4 FOR # ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS A RESULT OF # MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON 3.6.4, OR ANY # DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF. # # 6. This License Agreement will automatically terminate upon a material breach # of its terms and conditions. # # 7. Nothing in this License Agreement shall be deemed to create any # relationship of agency, partnership, or joint venture between PSF and # Licensee. This License Agreement does not grant permission to use PSF # trademarks or trade name in a trademark sense to endorse or promote # products or services of Licensee, or any third party. # # 8. By copying, installing or otherwise using Python 3.6.4, Licensee agrees to # be bound by the terms and conditions of this License Agreement. # # These functions are under copyright by the Python Software Foundation # # Copyright 2001-2018 Python Software Foundation; All Rights Reserved import fnmatch import re def generic_glob(fs, path_impl, pathname): """A filesystem agnostic glob implemention. Parameters ---------- fs : filesystem The filesystem to search. path_impl : os.path like The path module implementation to use. Designed to pass in ``posixpath`` or ``ntpath`` modules directly. pathname : str The path or pattern to glob Returns ------- paths : list A list of paths matching the given path or pattern. """ dirname, basename = path_impl.split(pathname) if not dirname: raise ValueError("glob pattern must be an absolute path") if not _has_magic(pathname): if (not basename and _safe_isdir(fs, dirname) or basename and fs.exists(pathname)): return [pathname] return [] if basename and _has_magic(dirname): # Directory is a pattern, collect all matching directories dirs = [d for d in generic_glob(fs, path_impl, dirname) if _safe_isdir(fs, d)] else: # No basename (pattern ends in `/`, must match directories only) # or no magic in dirname (use dirname directly) dirs = [dirname] if _safe_isdir(fs, dirname) else [] glob_in_dir = _glob_pattern if _has_magic(basename) else _glob_path return [path_impl.join(dirname2, name) for dirname2 in dirs for name in glob_in_dir(fs, path_impl, dirname2, basename)] def _safe_isdir(fs, dirname): try: return fs.isdir(dirname) except OSError: # pyarrow isdir raises if the directory doesn't exist return False def _glob_pattern(fs, path_impl, dirname, pattern): names = [path_impl.split(f)[1] for f in fs.ls(dirname)] if not _ishidden(pattern): names = [x for x in names if not _ishidden(x)] return fnmatch.filter(names, pattern) def _glob_path(fs, path_impl, dirname, basename): if (not basename and _safe_isdir(fs, dirname) or basename and fs.exists(path_impl.join(dirname, basename))): return [basename] return [] _magic_check = re.compile('([*?[])') def _has_magic(s): return _magic_check.search(s) is not None def _ishidden(path): return path[0] == '.'