lib/tornado/httpserver.py

   1 #!/usr/bin/env python
   2 #
   3 # Copyright 2009 Facebook
   4 #
   5 # Licensed under the Apache License, Version 2.0 (the "License"); you may
   6 # not use this file except in compliance with the License. You may obtain
   7 # a copy of the License at
   8 #
   9 #     http://www.apache.org/licenses/LICENSE-2.0
  10 #
  11 # Unless required by applicable law or agreed to in writing, software
  12 # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
  13 # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
  14 # License for the specific language governing permissions and limitations
  15 # under the License.
  16
  17 """A non-blocking, single-threaded HTTP server."""
  18
  19 import cgi
  20 import errno
  21 import httputil
  22 import ioloop
  23 import iostream
  24 import logging
  25 import os
  26 import socket
  27 import time
  28 import urlparse
  29
  30 try:
  31     import fcntl
  32 except ImportError:
  33     if os.name == 'nt':
  34         import win32_support as fcntl
  35     else:
  36         raise
  37
  38 try:
  39     import ssl # Python 2.6+
  40 except ImportError:
  41     ssl = None
  42
  43 class HTTPServer(object):
  44     """A non-blocking, single-threaded HTTP server.
  45
  46     A server is defined by a request callback that takes an HTTPRequest
  47     instance as an argument and writes a valid HTTP response with
  48     request.write(). request.finish() finishes the request (but does not
  49     necessarily close the connection in the case of HTTP/1.1 keep-alive
  50     requests). A simple example server that echoes back the URI you
  51     requested:
  52
  53         import httpserver
  54         import ioloop
  55
  56         def handle_request(request):
  57            message = "You requested %s\n" % request.uri
  58            request.write("HTTP/1.1 200 OK\r\nContent-Length: %d\r\n\r\n%s" % (
  59                          len(message), message))
  60            request.finish()
  61
  62         http_server = httpserver.HTTPServer(handle_request)
  63         http_server.listen(8888)
  64         ioloop.IOLoop.instance().start()
  65
  66     HTTPServer is a very basic connection handler. Beyond parsing the
  67     HTTP request body and headers, the only HTTP semantics implemented
  68     in HTTPServer is HTTP/1.1 keep-alive connections. We do not, however,
  69     implement chunked encoding, so the request callback must provide a
  70     Content-Length header or implement chunked encoding for HTTP/1.1
  71     requests for the server to run correctly for HTTP/1.1 clients. If
  72     the request handler is unable to do this, you can provide the
  73     no_keep_alive argument to the HTTPServer constructor, which will
  74     ensure the connection is closed on every request no matter what HTTP
  75     version the client is using.
  76
  77     If xheaders is True, we support the X-Real-Ip and X-Scheme headers,
  78     which override the remote IP and HTTP scheme for all requests. These
  79     headers are useful when running Tornado behind a reverse proxy or
  80     load balancer.
  81
  82     HTTPServer can serve HTTPS (SSL) traffic with Python 2.6+ and OpenSSL.
  83     To make this server serve SSL traffic, send the ssl_options dictionary
  84     argument with the arguments required for the ssl.wrap_socket() method,
  85     including "certfile" and "keyfile":
  86
  87        HTTPServer(applicaton, ssl_options={
  88            "certfile": os.path.join(data_dir, "mydomain.crt"),
  89            "keyfile": os.path.join(data_dir, "mydomain.key"),
  90        })
  91
  92     By default, listen() runs in a single thread in a single process. You
  93     can utilize all available CPUs on this machine by calling bind() and
  94     start() instead of listen():
  95
  96         http_server = httpserver.HTTPServer(handle_request)
  97         http_server.bind(8888)
  98         http_server.start() # Forks multiple sub-processes
  99         ioloop.IOLoop.instance().start()
 100
 101     start() detects the number of CPUs on this machine and "pre-forks" that
 102     number of child processes so that we have one Tornado process per CPU,
 103     all with their own IOLoop. You can also pass in the specific number of
 104     child processes you want to run with if you want to override this
 105     auto-detection.
 106     """
 107     def __init__(self, request_callback, no_keep_alive=False, io_loop=None,
 108                  xheaders=False, ssl_options=None):
 109         """Initializes the server with the given request callback.
 110
 111         If you use pre-forking/start() instead of the listen() method to
 112         start your server, you should not pass an IOLoop instance to this
 113         constructor. Each pre-forked child process will create its own
 114         IOLoop instance after the forking process.
 115         """
 116         self.request_callback = request_callback
 117         self.no_keep_alive = no_keep_alive
 118         self.io_loop = io_loop
 119         self.xheaders = xheaders
 120         self.ssl_options = ssl_options
 121         self._socket = None
 122         self._started = False
 123
 124     def listen(self, port, address=""):
 125         """Binds to the given port and starts the server in a single process.
 126
 127         This method is a shortcut for:
 128
 129             server.bind(port, address)
 130             server.start(1)
 131
 132         """
 133         self.bind(port, address)
 134         self.start(1)
 135
 136     def bind(self, port, address=""):
 137         """Binds this server to the given port on the given IP address.
 138
 139         To start the server, call start(). If you want to run this server
 140         in a single process, you can call listen() as a shortcut to the
 141         sequence of bind() and start() calls.
 142         """
 143         assert not self._socket
 144         self._socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM, 0)
 145         flags = fcntl.fcntl(self._socket.fileno(), fcntl.F_GETFD)
 146         flags |= fcntl.FD_CLOEXEC
 147         fcntl.fcntl(self._socket.fileno(), fcntl.F_SETFD, flags)
 148         self._socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
 149         self._socket.setblocking(0)
 150         self._socket.bind((address, port))
 151         self._socket.listen(128)
 152
 153     def start(self, num_processes=1):
 154         """Starts this server in the IOLoop.
 155
 156         By default, we run the server in this process and do not fork any
 157         additional child process.
 158
 159         If num_processes is None or <= 0, we detect the number of cores
 160         available on this machine and fork that number of child
 161         processes. If num_processes is given and > 1, we fork that
 162         specific number of sub-processes.
 163
 164         Since we use processes and not threads, there is no shared memory
 165         between any server code.
 166         """
 167         assert not self._started
 168         self._started = True
 169         if num_processes is None or num_processes <= 0:
 170             # Use sysconf to detect the number of CPUs (cores)
 171             try:
 172                 num_processes = os.sysconf("SC_NPROCESSORS_CONF")
 173             except ValueError:
 174                 logging.error("Could not get num processors from sysconf; "
 175                               "running with one process")
 176                 num_processes = 1
 177         if num_processes > 1 and ioloop.IOLoop.initialized():
 178             logging.error("Cannot run in multiple processes: IOLoop instance "
 179                           "has already been initialized. You cannot call "
 180                           "IOLoop.instance() before calling start()")
 181             num_processes = 1
 182         if num_processes > 1:
 183             logging.info("Pre-forking %d server processes", num_processes)
 184             for i in range(num_processes):
 185                 if os.fork() == 0:
 186                     self.io_loop = ioloop.IOLoop.instance()
 187                     self.io_loop.add_handler(
 188                         self._socket.fileno(), self._handle_events,
 189                         ioloop.IOLoop.READ)
 190                     return
 191             os.waitpid(-1, 0)
 192         else:
 193             if not self.io_loop:
 194                 self.io_loop = ioloop.IOLoop.instance()
 195             self.io_loop.add_handler(self._socket.fileno(),
 196                                      self._handle_events,
 197                                      ioloop.IOLoop.READ)
 198
 199     def stop(self):
 200       self.io_loop.remove_handler(self._socket.fileno())
 201       self._socket.close()
 202
 203     def _handle_events(self, fd, events):
 204         while True:
 205             try:
 206                 connection, address = self._socket.accept()
 207             except socket.error, e:
 208                 if e[0] in (errno.EWOULDBLOCK, errno.EAGAIN):
 209                     return
 210                 raise
 211             if self.ssl_options is not None:
 212                 assert ssl, "Python 2.6+ and OpenSSL required for SSL"
 213                 connection = ssl.wrap_socket(
 214                     connection, server_side=True, **self.ssl_options)
 215             try:
 216                 stream = iostream.IOStream(connection, io_loop=self.io_loop)
 217                 HTTPConnection(stream, address, self.request_callback,
 218                                self.no_keep_alive, self.xheaders)
 219             except:
 220                 logging.error("Error in connection callback", exc_info=True)
 221
 222
 223 class HTTPConnection(object):
 224     """Handles a connection to an HTTP client, executing HTTP requests.
 225
 226     We parse HTTP headers and bodies, and execute the request callback
 227     until the HTTP conection is closed.
 228     """
 229     def __init__(self, stream, address, request_callback, no_keep_alive=False,
 230                  xheaders=False):
 231         self.stream = stream
 232         self.address = address
 233         self.request_callback = request_callback
 234         self.no_keep_alive = no_keep_alive
 235         self.xheaders = xheaders
 236         self._request = None
 237         self._request_finished = False
 238         self.stream.read_until("\r\n\r\n", self._on_headers)
 239
 240     def write(self, chunk):
 241         assert self._request, "Request closed"
 242         if not self.stream.closed():
 243             self.stream.write(chunk, self._on_write_complete)
 244
 245     def finish(self):
 246         assert self._request, "Request closed"
 247         self._request_finished = True
 248         if not self.stream.writing():
 249             self._finish_request()
 250
 251     def _on_write_complete(self):
 252         if self._request_finished:
 253             self._finish_request()
 254
 255     def _finish_request(self):
 256         if self.no_keep_alive:
 257             disconnect = True
 258         else:
 259             connection_header = self._request.headers.get("Connection")
 260             if self._request.supports_http_1_1():
 261                 disconnect = connection_header == "close"
 262             elif ("Content-Length" in self._request.headers
 263                     or self._request.method in ("HEAD", "GET")):
 264                 disconnect = connection_header != "Keep-Alive"
 265             else:
 266                 disconnect = True
 267         self._request = None
 268         self._request_finished = False
 269         if disconnect:
 270             self.stream.close()
 271             return
 272         self.stream.read_until("\r\n\r\n", self._on_headers)
 273
 274     def _on_headers(self, data):
 275         eol = data.find("\r\n")
 276         start_line = data[:eol]
 277         method, uri, version = start_line.split(" ")
 278         if not version.startswith("HTTP/"):
 279             raise Exception("Malformed HTTP version in HTTP Request-Line")
 280         headers = httputil.HTTPHeaders.parse(data[eol:])
 281         self._request = HTTPRequest(
 282             connection=self, method=method, uri=uri, version=version,
 283             headers=headers, remote_ip=self.address[0])
 284
 285         content_length = headers.get("Content-Length")
 286         if content_length:
 287             content_length = int(content_length)
 288             if content_length > self.stream.max_buffer_size:
 289                 raise Exception("Content-Length too long")
 290             if headers.get("Expect") == "100-continue":
 291                 self.stream.write("HTTP/1.1 100 (Continue)\r\n\r\n")
 292             self.stream.read_bytes(content_length, self._on_request_body)
 293             return
 294
 295         self.request_callback(self._request)
 296
 297     def _on_request_body(self, data):
 298         self._request.body = data
 299         content_type = self._request.headers.get("Content-Type", "")
 300         if self._request.method == "POST":
 301             if content_type.startswith("application/x-www-form-urlencoded"):
 302                 arguments = cgi.parse_qs(self._request.body)
 303                 for name, values in arguments.iteritems():
 304                     values = [v for v in values if v]
 305                     if values:
 306                         self._request.arguments.setdefault(name, []).extend(
 307                             values)
 308             elif content_type.startswith("multipart/form-data"):
 309                 if 'boundary=' in content_type:
 310                     boundary = content_type.split('boundary=',1)[1]
 311                     if boundary: self._parse_mime_body(boundary, data)
 312                 else:
 313                     logging.warning("Invalid multipart/form-data")
 314         self.request_callback(self._request)
 315
 316     def _parse_mime_body(self, boundary, data):
 317         # The standard allows for the boundary to be quoted in the header,
 318         # although it's rare (it happens at least for google app engine
 319         # xmpp).  I think we're also supposed to handle backslash-escapes
 320         # here but I'll save that until we see a client that uses them
 321         # in the wild.
 322         if boundary.startswith('"') and boundary.endswith('"'):
 323             boundary = boundary[1:-1]
 324         if data.endswith("\r\n"):
 325             footer_length = len(boundary) + 6
 326         else:
 327             footer_length = len(boundary) + 4
 328         parts = data[:-footer_length].split("--" + boundary + "\r\n")
 329         for part in parts:
 330             if not part: continue
 331             eoh = part.find("\r\n\r\n")
 332             if eoh == -1:
 333                 logging.warning("multipart/form-data missing headers")
 334                 continue
 335             headers = httputil.HTTPHeaders.parse(part[:eoh])
 336             name_header = headers.get("Content-Disposition", "")
 337             if not name_header.startswith("form-data;") or \
 338                not part.endswith("\r\n"):
 339                 logging.warning("Invalid multipart/form-data")
 340                 continue
 341             value = part[eoh + 4:-2]
 342             name_values = {}
 343             for name_part in name_header[10:].split(";"):
 344                 name, name_value = name_part.strip().split("=", 1)
 345                 name_values[name] = name_value.strip('"').decode("utf-8")
 346             if not name_values.get("name"):
 347                 logging.warning("multipart/form-data value missing name")
 348                 continue
 349             name = name_values["name"]
 350             if name_values.get("filename"):
 351                 ctype = headers.get("Content-Type", "application/unknown")
 352                 self._request.files.setdefault(name, []).append(dict(
 353                     filename=name_values["filename"], body=value,
 354                     content_type=ctype))
 355             else:
 356                 self._request.arguments.setdefault(name, []).append(value)
 357
 358
 359 class HTTPRequest(object):
 360     """A single HTTP request.
 361
 362     GET/POST arguments are available in the arguments property, which
 363     maps arguments names to lists of values (to support multiple values
 364     for individual names). Names and values are both unicode always.
 365
 366     File uploads are available in the files property, which maps file
 367     names to list of files. Each file is a dictionary of the form
 368     {"filename":..., "content_type":..., "body":...}. The content_type
 369     comes from the provided HTTP header and should not be trusted
 370     outright given that it can be easily forged.
 371
 372     An HTTP request is attached to a single HTTP connection, which can
 373     be accessed through the "connection" attribute. Since connections
 374     are typically kept open in HTTP/1.1, multiple requests can be handled
 375     sequentially on a single connection.
 376     """
 377     def __init__(self, method, uri, version="HTTP/1.0", headers=None,
 378                  body=None, remote_ip=None, protocol=None, host=None,
 379                  files=None, connection=None):
 380         self.method = method
 381         self.uri = uri
 382         self.version = version
 383         self.headers = headers or httputil.HTTPHeaders()
 384         self.body = body or ""
 385         if connection and connection.xheaders:
 386             # Squid uses X-Forwarded-For, others use X-Real-Ip
 387             self.remote_ip = self.headers.get(
 388                 "X-Real-Ip", self.headers.get("X-Forwarded-For", remote_ip))
 389             self.protocol = self.headers.get("X-Scheme", protocol) or "http"
 390         else:
 391             self.remote_ip = remote_ip
 392             self.protocol = protocol or "http"
 393         self.host = host or self.headers.get("Host") or "127.0.0.1"
 394         self.files = files or {}
 395         self.connection = connection
 396         self._start_time = time.time()
 397         self._finish_time = None
 398
 399         scheme, netloc, path, query, fragment = urlparse.urlsplit(uri)
 400         self.path = path
 401         self.query = query
 402         arguments = cgi.parse_qs(query)
 403         self.arguments = {}
 404         for name, values in arguments.iteritems():
 405             values = [v for v in values if v]
 406             if values: self.arguments[name] = values
 407
 408     def supports_http_1_1(self):
 409         """Returns True if this request supports HTTP/1.1 semantics"""
 410         return self.version == "HTTP/1.1"
 411
 412     def write(self, chunk):
 413         """Writes the given chunk to the response stream."""
 414         assert isinstance(chunk, str)
 415         self.connection.write(chunk)
 416
 417     def finish(self):
 418         """Finishes this HTTP request on the open connection."""
 419         self.connection.finish()
 420         self._finish_time = time.time()
 421
 422     def full_url(self):
 423         """Reconstructs the full URL for this request."""
 424         return self.protocol + "://" + self.host + self.uri
 425
 426     def request_time(self):
 427         """Returns the amount of time it took for this request to execute."""
 428         if self._finish_time is None:
 429             return time.time() - self._start_time
 430         else:
 431             return self._finish_time - self._start_time
 432
 433     def __repr__(self):
 434         attrs = ("protocol", "host", "method", "uri", "version", "remote_ip",
 435                  "remote_ip", "body")
 436         args = ", ".join(["%s=%r" % (n, getattr(self, n)) for n in attrs])
 437         return "%s(%s, headers=%s)" % (
 438             self.__class__.__name__, args, dict(self.headers))
 439