root/dirtree.py

Revision 331:fac137cd9111, 6.3 kB (checked in by Stefan Schwarzer <sschwarzer@sschwarzer.net>, 1 year ago)
Documented that the "main program" is actually test code.
Line 
1 # Copyright (C) 2007, Stefan Schwarzer
2 #
3 # Permission is hereby granted, free of charge, to any person
4 # obtaining a copy of this software and associated documentation files
5 # (the "Software"), to deal in the Software without restriction,
6 # including without limitation the rights to use, copy, modify, merge,
7 # publish, distribute, sublicense, and/or sell copies of the Software,
8 # and to permit persons to whom the Software is furnished to do so,
9 # subject to the following conditions:
10 #
11 # The above copyright notice and this permission notice shall be
12 # included in all copies or substantial portions of the Software.
13 #
14 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16 # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17 # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
18 # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
19 # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20 # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 # SOFTWARE.
22
23 import fnmatch
24 import itertools
25 import os
26 import stat
27 import sys
28
29 # Websourcebrowser modules
30 import config
31
32
33 class ReadError(Exception):
34     pass
35
36
37 def dir_level(path):
38     """
39     Return the "depth" of a directory, here defined as the number of
40     directory separators in the string `path`.
41     """
42     level = path.count(os.sep)
43     if level == 1 and path.endswith(os.sep):
44         return 0
45     else:
46         return level
47
48
49 class _ItemFamily(object):
50     """
51     Store parent, previous and next sibling items.
52     """
53     def __init__(self):
54         self.parent = self.previous = self.next = ""
55
56     def __repr__(self):
57         return '<%s object with parent="%s", previous="%s", next="%s">' % \
58                (self.__class__.__name__, self.parent, self.previous, self.next)
59
60
61 class DirectoryTree(object):
62     def __init__(self, root):
63         self.root = os.path.abspath(os.path.normpath(root))
64
65     def walk(self, path, depth=sys.maxint, _max_level=None):
66         """
67         Return a generator function which recursively yields the items
68         (directories and files) with their full paths, starting at the
69         root `path`. On each level, the items are sorted with
70         directories first, then regular files.
71
72         If `depth` is given, it's taken for the maximum recursion
73         depth of the algorithm, i. e. if `depth` is 1, no recursion is
74         done, only the directories and files in `path` are listed. By
75         default, the directory `path` with all its nested
76         subdirectories is visited.
77
78         Like with `os.walk`, if a directory is really a symbolic link,
79         it will be listed but not visited to avoid infinite link
80         cycles.
81         """
82         path = os.path.abspath(path)
83         if _max_level is None:
84             _max_level = dir_level(path) + depth + 1
85         # `listdir` doesn't return items with path separators
86         try:
87             dirs_and_files = os.listdir(path)
88         except OSError:
89             return
90         # filter out other items than directories and files and add a
91         #  priority value for sorting
92         for index, item in enumerate(dirs_and_files):
93             joined_path = os.path.join(path, item)
94             try:
95                 item_mode = os.stat(joined_path)[0]
96             except OSError:
97                 priority = 0
98                 dirs_and_files[index] = (priority, item)
99                 continue
100             if stat.S_ISDIR(item_mode):
101                 priority = 1
102             elif stat.S_ISREG(item_mode):
103                 priority = 2
104             else:
105                 # ignore sockets, device files etc.
106                 priority = 0
107             # prepend priority value (1 for directories, 2 for files, 0 else)
108             dirs_and_files[index] = (priority, item)
109         # remove items with priority 0
110         dirs_and_files = [item for item in dirs_and_files if item[0]]
111         # sort by priority, then basename
112         dirs_and_files.sort()
113         # yield the sorted items, if necessary, recursively
114         for priority, item in dirs_and_files:
115             item = os.path.join(path, item)
116             yield item
117             is_directory = (priority == 1)
118             follow_non_link = is_directory and not os.path.islink(item)
119             levels_left = (dir_level(item) + 1 < _max_level)
120             if is_directory and follow_non_link and levels_left:
121                 for inner_item in self.walk(item, _max_level=_max_level):
122                     yield inner_item
123
124     def _ignore_item(self, item):
125         """
126         Return `True` if the path `item` should be omitted from the
127         list of directories and files, else return `False`.
128         """
129         for pattern in config.ignore_patterns:
130             if fnmatch.fnmatch(item, pattern):
131                 return True
132         return False
133
134     def read(self, depth=sys.maxint):
135         """
136         Read a directory tree `depth` levels deep. A level 1 means
137         just the flat directory contents. If the root directory
138         (`self.root`) can't be scanned, raise a `ReadError`.
139
140         If the method executes successfully, the instance attribute
141         `items` will return a list of the read file system items. The
142         items will have the ignore patterns in `config.ignore_patterns`
143         already applied. The instance attribute `family` will contain
144         a mapping from each item (strings) to an object with the
145         attributes `parent`, `previous` and `next`. These are the
146         respective items for the parent and the previous and next
147         sibling (i. e. previous and next items immediately below the
148         same directory). If one of the three paths can't sensibly be
149         given, it's set to `None`. Note that the parent directory is
150         set to `None` if it's the root directory for the read process.
151         """
152         if not os.access(self.root, os.R_OK):
153             raise ReadError("root '%s' can't be scanned" % self.root)
154         # read and filter items
155         self.items = list(
156           itertools.ifilterfalse(self._ignore_item,
157                                  self.walk(self.root, depth=depth)))
158
159     def __str__(self):
160         return "\n".join(self.items)
161
162
163 if __name__ == '__main__':
164     # test code
165     config.set_from_environment()
166     dt = DirectoryTree(root="/home/schwa/sd/pypy-dist")
167     dt.read()
168
Note: See TracBrowser for help on using the browser.