root/browser.py

Revision 339:a46365f70a13, 12.5 kB (checked in by Stefan Schwarzer <sschwarzer@sschwarzer.net>, 1 year ago)
Improved XHTML compatibilty. Remaining errors (misplaced <body> and
premature </html>) seem to come from bugs in W3C's parser.
  • Property exe set to *
Line 
1 #! /usr/bin/env python
2 # encoding: UTF-8
3 # Copyright (C) 2007, Stefan Schwarzer
4 #
5 # Permission is hereby granted, free of charge, to any person
6 # obtaining a copy of this software and associated documentation files
7 # (the "Software"), to deal in the Software without restriction,
8 # including without limitation the rights to use, copy, modify, merge,
9 # publish, distribute, sublicense, and/or sell copies of the Software,
10 # and to permit persons to whom the Software is furnished to do so,
11 # subject to the following conditions:
12 #
13 # The above copyright notice and this permission notice shall be
14 # included in all copies or substantial portions of the Software.
15 #
16 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19 # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
20 # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
21 # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 # SOFTWARE.
24
25 import BaseHTTPServer
26 import cgi
27 import email.Utils
28 import httplib
29 import mimetypes
30 import os
31 import sys
32 import time
33 import urllib
34 import urlparse
35
36 # Websourcebrowser modules
37 import coding
38 import config
39 import converter
40 import pygmentsfinder
41 import session
42 import template
43 import urlpath
44
45
46 def actual_css_path():
47     """
48     Return the file system path of the CSS file to use.
49     """
50     py_path = os.path.abspath(sys.modules[__name__].__file__)
51     # assume the CSS file is in the same directory as the Python file
52     css_path = os.path.join(os.path.dirname(py_path), config.CSS_FILE_NAME)
53     return css_path
54
55
56 class SourceBrowserHandler(BaseHTTPServer.BaseHTTPRequestHandler):
57     #protocol_version = "HTTP/1.1"
58     server_version = "Websourcebrowser 0.x"
59     sys_version = ""
60
61     #
62     # deal with builtin commands
63     #
64     def handle_builtin(self, url):
65         """
66         Handle a special command which is built into Websourcebrowser.
67
68         Currently, there are the following commands:
69
70         - the CSS file name: return the CSS file of Websourcebrowser
71           to the web client
72
73         - raw_data: returns the raw data of a file to the web client
74         """
75         # determine builtin command/file; skip to string after the
76         #  special directory
77         builtin_url = url[2+len(config.SPECIAL_DIR):]
78         parse_result = urlparse.urlparse(builtin_url)
79         builtin = parse_result[2]
80         query_mapping = cgi.parse_qs(parse_result[4])
81         if builtin == config.CSS_FILE_NAME:
82             self.emit_data(self.get_file(actual_css_path()),
83                            content_type="text/css",
84                            mtime=os.path.getmtime(actual_css_path()))
85         elif builtin == "help":
86             self.emit_data(template.HELP, title=u"Websourcebrowser help",
87                            h1_class="Other")
88         elif builtin == "source_placeholder":
89             self.emit_data(template.SOURCE_PLACEHOLDER,
90                            title=u"Source code placeholder",
91                            h1_class="Other")
92         elif builtin == "raw_data":
93             url = query_mapping.get('url', ["/"])[0]
94             # `urlpath.to_file_system` takes an encoded URL, so provide it
95             url = urllib.quote(url)
96             try:
97                 source_path = urlpath.to_file_system(config.root, url)
98             except urlpath.NotUnderRoot:
99                 self.emit_data(http_status=httplib.NOT_FOUND)
100             else:
101                 content_type = mimetypes.guess_type(source_path)[0]
102                 if content_type is None:
103                     content_type = "application/octet-stream"
104                 self.emit_data(data=self.get_file(source_path, raw=True),
105                                content_type=content_type)
106
107     #
108     # HTTP-specific processing
109     #
110     def log_message(self, *args, **kwargs):
111         if config.logging:
112             return BaseHTTPServer.BaseHTTPRequestHandler.log_message(
113                    self, *args, **kwargs)
114
115     def emit_data(self, data=u"", title=u"", content_type="text/html",
116                    h1_class="Path", http_status=httplib.OK, mtime=None,
117                    refresh_html=u"", use_header_and_footer=True):
118         """
119         Emit the `data` on the HTTP output stream with a content type
120         of `content_type`. If `h1_class` is set, it's used to set the
121         CSS class of the h1 heading. If the integer `http_status` is
122         set, it's the HTTP status code of the response. The default is
123         200 (OK). If `mtime` is set, it's taken as a floating point
124         value like from `time.time()` to set a Last-Modified header.
125         If `use_header_and_footer` is true (the default),
126         prepend/append the header and footer templates from the
127         `template` module.
128         """
129         self.send_response(http_status)
130         if content_type.startswith("text/"):
131             header_content_type = "%s; charset=%s" % (content_type,
132                                                       coding.DEFAULT_ENCODING)
133         else:
134             header_content_type = content_type
135         self.send_header("Content-type", header_content_type)
136         if mtime:
137             self.send_header("Last-Modified", email.Utils.formatdate(mtime))
138         if content_type != "text/css":
139             self.send_header("Cache-Control", "no-cache")
140         self.end_headers()
141         if content_type == "text/html":
142             http_status_group = str(http_status)[:1]
143             if http_status_group in ("4", "5"):
144                 h1_class = "Error"
145                 title = httplib.responses[http_status]
146                 data = self.responses[http_status][1]
147             if use_header_and_footer:
148                 self.wfile.write(coding.encode(template.HEADER % {
149                   'title': cgi.escape(title),
150                   'project_title': cgi.escape(config.project_title),
151                   'h1_class': h1_class,
152                   'special_dir': config.SPECIAL_DIR,
153                   'refresh_html': refresh_html}))
154         if content_type.startswith("text/"):
155             self.wfile.write(coding.encode(data))
156         else:
157             self.wfile.write(data)
158         if content_type == "text/html" and use_header_and_footer:
159             self.wfile.write(coding.encode(template.FOOTER))
160
161     def _url_and_params(self, url):
162         """
163         Return a tuple, splitting the URL `url` into an URL without
164         query string and a query mapping as returned by `cgi.parse_qs`.
165         """
166         params = session.default_session.get_from_url(url)
167         parsed_url = list(urlparse.urlparse(url))
168         parsed_url[4] = ""
169         return urlparse.urlunparse(parsed_url), params
170
171     def do_GET(self):
172         """
173         Handle HTTP GET request.
174         """
175         # reject forbidden clients
176         if config.allowed_clients is not config.ALL_CLIENTS and \
177           self.client_address[0] not in config.allowed_clients and \
178           self.path != "/%s/%s" % (config.SPECIAL_DIR, config.CSS_FILE_NAME):
179             self.emit_data(http_status=httplib.FORBIDDEN)
180             return
181         # handle builtin command
182         if self.path.startswith("/%s/" % config.SPECIAL_DIR):
183             self.handle_builtin(self.path)
184             return
185         # separate URL and query parameters
186         url, params = self._url_and_params(self.path)
187         try:
188             path = urlpath.to_file_system(config.root, url)
189         except urlpath.NotUnderRoot:
190             # don't show items "above" the current directory; thereby
191             #  avoiding information disclosure (double dot attack)
192             self.emit_data(http_status=httplib.BAD_REQUEST)
193         # defaults
194         use_header_and_footer = True
195         h1_class = "File"
196         if not os.path.exists(path):
197             self.emit_data(http_status=httplib.NOT_FOUND)
198             return
199         if os.path.isdir(path):
200             # by default, use frames
201             use_frames = (params.get('frames', "yes") == "yes")
202             if use_frames:
203                 # emit the frameset
204                 # don't use frames in windows which are part of the frameset
205                 params['frames'] = "no"
206                 dir_url = session.default_session.add_to_url(url, params)
207                 html = template.FRAMESET % {
208                   'project_title': cgi.escape(config.project_title),
209                   'special_dir': config.SPECIAL_DIR,
210                   'dir_url':
211                     cgi.escape('%s#%s' % (dir_url, params.get('anchor', ""))),
212                   'dir_target': config.DIR_WINDOW_TARGET,
213                   'source_target': config.SOURCE_WINDOW_TARGET}
214                 use_header_and_footer = False
215             else:
216                 # emit the frame
217                 html = converter.dir_to_html(path, params)
218                 h1_class = "Dir"
219         elif self.is_image_path(path):
220             html = converter.image_to_html(url)
221         else:
222             data = self.get_file(path, raw=True,
223                                  size=self._binary_test_size)
224             if self.is_binary(data):
225                 data = self.get_file(path, raw=True)
226                 html = converter.binary_to_html(data)
227             else:
228                 text = self.get_file(path)
229                 html = converter.text_to_html(text, path)
230         # assume title is UTF-8-encoded though we don't know for sure
231         title = coding.decode(url[1:]) or u"."
232         title = urllib.unquote(title)
233         self.emit_data(html, title=title, mtime=os.path.getmtime(path),
234                        h1_class=h1_class,
235                        use_header_and_footer=use_header_and_footer)
236
237     #
238     # utility methods
239     #
240     def get_file(self, path, raw=False, size=None):
241         """
242         Read the file identified by `path` and return its contents
243         decoded as unicode string unless `raw` is true (by default,
244         `raw` is false). If `size` is given, that many
245         bytes/characters are read; by default, the whole file is read.
246         """
247         mode = ['r', 'rb'][bool(raw)]
248         f = open(path, mode)
249         try:
250             if size is None:
251                 data = f.read()
252             else:
253                 data = f.read(size)
254         finally:
255             f.close()
256         if raw:
257             return data
258         else:
259             return coding.decode(data)
260
261     def is_image_path(self, path):
262         """
263         Return `True` if the `path` presumably represents an image,
264         else return `False`.
265         """
266         mime_type = mimetypes.guess_type(path)[0]
267         if mime_type is None:
268             return False
269         return mime_type.split("/", 1)[0] == "image"
270
271     # number of bytes to read to test for binary data
272     _binary_test_size = 1024
273
274     def is_binary(self, data):
275         """
276         Return `True`, if `data` assumedly represents binary data,
277         not text data. Else return `False`.
278         """
279         if not data:
280             return False
281         # assume binary data if over 5 % control codes
282         threshold = 0.05
283         control_codes = [byte for byte in data
284                               if ord(byte) < 32 and not byte in "\n\r\t"]
285         return (len(control_codes) / float(len(data)) > threshold)
286
287
288 def startup_info():
289     print "Trying to listen on host %s, port %d." % \
290           (config.http_host, config.http_port)
291     print ("Type http://%s:%d/ into the address field of your " \
292            "webbrowser.") % (config.http_host, config.http_port)
293     print "Access is allowed from these IP addresses:",
294     if config.allowed_clients is config.ALL_CLIENTS:
295         print "all (running as public server)."
296     else:
297         print "%s." % (", ".join(sorted(config.allowed_clients)))
298     if config.invalid_clients:
299         print "Ignored invalid client addresses: %s." % \
300               ", ".join(config.invalid_clients)
301     if pygmentsfinder.found_pygments:
302         print "Using Pygments library for syntax highlighting."
303     else:
304         print "Pygments library not found - no syntax highlighting possible."
305         if config.line_numbers:
306             print "Pygments library not found - omitting line numbers."
307     if 'win' in sys.platform:
308         exit_key = "Ctrl-Break"
309     else:
310         exit_key = "Ctrl-C"
311     print "Press %s to exit." % exit_key
312
313
314 def _main():
315     config.set_from_environment()
316     config.set_from_args()
317     startup_info()
318     server_address = (config.http_host, config.http_port)
319     httpd = BaseHTTPServer.HTTPServer(server_address, SourceBrowserHandler)
320     httpd.serve_forever()
321
322 def main():
323     try:
324         _main()
325     except KeyboardInterrupt:
326         print "Aborted by keyboard interrupt from user."
327
328
329 if __name__ == '__main__':
330     main()
331
Note: See TracBrowser for help on using the browser.