| 1 |
|
|---|
| 2 |
|
|---|
| 3 |
|
|---|
| 4 |
|
|---|
| 5 |
|
|---|
| 6 |
|
|---|
| 7 |
|
|---|
| 8 |
|
|---|
| 9 |
|
|---|
| 10 |
|
|---|
| 11 |
|
|---|
| 12 |
|
|---|
| 13 |
|
|---|
| 14 |
|
|---|
| 15 |
|
|---|
| 16 |
|
|---|
| 17 |
|
|---|
| 18 |
|
|---|
| 19 |
|
|---|
| 20 |
|
|---|
| 21 |
|
|---|
| 22 |
|
|---|
| 23 |
import fnmatch |
|---|
| 24 |
import itertools |
|---|
| 25 |
import os |
|---|
| 26 |
import stat |
|---|
| 27 |
import sys |
|---|
| 28 |
|
|---|
| 29 |
|
|---|
| 30 |
import config |
|---|
| 31 |
|
|---|
| 32 |
|
|---|
| 33 |
class ReadError(Exception): |
|---|
| 34 |
pass |
|---|
| 35 |
|
|---|
| 36 |
|
|---|
| 37 |
def dir_level(path): |
|---|
| 38 |
""" |
|---|
| 39 |
Return the "depth" of a directory, here defined as the number of |
|---|
| 40 |
directory separators in the string `path`. |
|---|
| 41 |
""" |
|---|
| 42 |
level = path.count(os.sep) |
|---|
| 43 |
if level == 1 and path.endswith(os.sep): |
|---|
| 44 |
return 0 |
|---|
| 45 |
else: |
|---|
| 46 |
return level |
|---|
| 47 |
|
|---|
| 48 |
|
|---|
| 49 |
class _ItemFamily(object): |
|---|
| 50 |
""" |
|---|
| 51 |
Store parent, previous and next sibling items. |
|---|
| 52 |
""" |
|---|
| 53 |
def __init__(self): |
|---|
| 54 |
self.parent = self.previous = self.next = "" |
|---|
| 55 |
|
|---|
| 56 |
def __repr__(self): |
|---|
| 57 |
return '<%s object with parent="%s", previous="%s", next="%s">' % \ |
|---|
| 58 |
(self.__class__.__name__, self.parent, self.previous, self.next) |
|---|
| 59 |
|
|---|
| 60 |
|
|---|
| 61 |
class DirectoryTree(object): |
|---|
| 62 |
def __init__(self, root): |
|---|
| 63 |
self.root = os.path.abspath(os.path.normpath(root)) |
|---|
| 64 |
|
|---|
| 65 |
def walk(self, path, depth=sys.maxint, _max_level=None): |
|---|
| 66 |
""" |
|---|
| 67 |
Return a generator function which recursively yields the items |
|---|
| 68 |
(directories and files) with their full paths, starting at the |
|---|
| 69 |
root `path`. On each level, the items are sorted with |
|---|
| 70 |
directories first, then regular files. |
|---|
| 71 |
|
|---|
| 72 |
If `depth` is given, it's taken for the maximum recursion |
|---|
| 73 |
depth of the algorithm, i. e. if `depth` is 1, no recursion is |
|---|
| 74 |
done, only the directories and files in `path` are listed. By |
|---|
| 75 |
default, the directory `path` with all its nested |
|---|
| 76 |
subdirectories is visited. |
|---|
| 77 |
|
|---|
| 78 |
Like with `os.walk`, if a directory is really a symbolic link, |
|---|
| 79 |
it will be listed but not visited to avoid infinite link |
|---|
| 80 |
cycles. |
|---|
| 81 |
""" |
|---|
| 82 |
path = os.path.abspath(path) |
|---|
| 83 |
if _max_level is None: |
|---|
| 84 |
_max_level = dir_level(path) + depth + 1 |
|---|
| 85 |
|
|---|
| 86 |
try: |
|---|
| 87 |
dirs_and_files = os.listdir(path) |
|---|
| 88 |
except OSError: |
|---|
| 89 |
return |
|---|
| 90 |
|
|---|
| 91 |
|
|---|
| 92 |
for index, item in enumerate(dirs_and_files): |
|---|
| 93 |
joined_path = os.path.join(path, item) |
|---|
| 94 |
try: |
|---|
| 95 |
item_mode = os.stat(joined_path)[0] |
|---|
| 96 |
except OSError: |
|---|
| 97 |
priority = 0 |
|---|
| 98 |
dirs_and_files[index] = (priority, item) |
|---|
| 99 |
continue |
|---|
| 100 |
if stat.S_ISDIR(item_mode): |
|---|
| 101 |
priority = 1 |
|---|
| 102 |
elif stat.S_ISREG(item_mode): |
|---|
| 103 |
priority = 2 |
|---|
| 104 |
else: |
|---|
| 105 |
|
|---|
| 106 |
priority = 0 |
|---|
| 107 |
|
|---|
| 108 |
dirs_and_files[index] = (priority, item) |
|---|
| 109 |
|
|---|
| 110 |
dirs_and_files = [item for item in dirs_and_files if item[0]] |
|---|
| 111 |
|
|---|
| 112 |
dirs_and_files.sort() |
|---|
| 113 |
|
|---|
| 114 |
for priority, item in dirs_and_files: |
|---|
| 115 |
item = os.path.join(path, item) |
|---|
| 116 |
yield item |
|---|
| 117 |
is_directory = (priority == 1) |
|---|
| 118 |
follow_non_link = is_directory and not os.path.islink(item) |
|---|
| 119 |
levels_left = (dir_level(item) + 1 < _max_level) |
|---|
| 120 |
if is_directory and follow_non_link and levels_left: |
|---|
| 121 |
for inner_item in self.walk(item, _max_level=_max_level): |
|---|
| 122 |
yield inner_item |
|---|
| 123 |
|
|---|
| 124 |
def _ignore_item(self, item): |
|---|
| 125 |
""" |
|---|
| 126 |
Return `True` if the path `item` should be omitted from the |
|---|
| 127 |
list of directories and files, else return `False`. |
|---|
| 128 |
""" |
|---|
| 129 |
for pattern in config.ignore_patterns: |
|---|
| 130 |
if fnmatch.fnmatch(item, pattern): |
|---|
| 131 |
return True |
|---|
| 132 |
return False |
|---|
| 133 |
|
|---|
| 134 |
def read(self, depth=sys.maxint): |
|---|
| 135 |
""" |
|---|
| 136 |
Read a directory tree `depth` levels deep. A level 1 means |
|---|
| 137 |
just the flat directory contents. If the root directory |
|---|
| 138 |
(`self.root`) can't be scanned, raise a `ReadError`. |
|---|
| 139 |
|
|---|
| 140 |
If the method executes successfully, the instance attribute |
|---|
| 141 |
`items` will return a list of the read file system items. The |
|---|
| 142 |
items will have the ignore patterns in `config.ignore_patterns` |
|---|
| 143 |
already applied. The instance attribute `family` will contain |
|---|
| 144 |
a mapping from each item (strings) to an object with the |
|---|
| 145 |
attributes `parent`, `previous` and `next`. These are the |
|---|
| 146 |
respective items for the parent and the previous and next |
|---|
| 147 |
sibling (i. e. previous and next items immediately below the |
|---|
| 148 |
same directory). If one of the three paths can't sensibly be |
|---|
| 149 |
given, it's set to `None`. Note that the parent directory is |
|---|
| 150 |
set to `None` if it's the root directory for the read process. |
|---|
| 151 |
""" |
|---|
| 152 |
if not os.access(self.root, os.R_OK): |
|---|
| 153 |
raise ReadError("root '%s' can't be scanned" % self.root) |
|---|
| 154 |
|
|---|
| 155 |
self.items = list( |
|---|
| 156 |
itertools.ifilterfalse(self._ignore_item, |
|---|
| 157 |
self.walk(self.root, depth=depth))) |
|---|
| 158 |
|
|---|
| 159 |
def __str__(self): |
|---|
| 160 |
return "\n".join(self.items) |
|---|
| 161 |
|
|---|
| 162 |
|
|---|
| 163 |
if __name__ == '__main__': |
|---|
| 164 |
|
|---|
| 165 |
config.set_from_environment() |
|---|
| 166 |
dt = DirectoryTree(root="/home/schwa/sd/pypy-dist") |
|---|
| 167 |
dt.read() |
|---|
| 168 |
|
|---|