1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355
|
# A simple HTTP server implemented using h11 and Trio:
# http://trio.readthedocs.io/en/latest/index.html
#
# All requests get echoed back a JSON document containing information about
# the request.
#
# This is a rather involved example, since it attempts to both be
# fully-HTTP-compliant and also demonstrate error handling.
#
# The main difference between an HTTP client and an HTTP server is that in a
# client, if something goes wrong, you can just throw away that connection and
# make a new one. In a server, you're expected to handle all kinds of garbage
# input and internal errors and recover with grace and dignity. And that's
# what this code does.
#
# I recommend pushing on it to see how it works -- e.g. watch what happens if
# you visit http://localhost:8080 in a webbrowser that supports keep-alive,
# hit reload a few times, and then wait for the keep-alive to time out on the
# server.
#
# Or try using curl to start a chunked upload and then hit control-C in the
# middle of the upload:
#
# (for CHUNK in $(seq 10); do echo $CHUNK; sleep 1; done) \
# | curl -T - http://localhost:8080/foo
#
# (Note that curl will send Expect: 100-Continue, too.)
#
# Or, heck, try letting curl complete successfully ;-).
# Some potential improvements, if you wanted to try and extend this to a real
# general-purpose HTTP server (and to give you some hints about the many
# considerations that go into making a robust HTTP server):
#
# - The timeout handling is rather crude -- we impose a flat 10 second timeout
# on each request (starting from the end of the previous
# response). Something finer-grained would be better. Also, if a timeout is
# triggered we unconditionally send a 500 Internal Server Error; it would be
# better to keep track of whether the timeout is the client's fault, and if
# so send a 408 Request Timeout.
#
# - The error handling policy here is somewhat crude as well. It handles a lot
# of cases perfectly, but there are corner cases where the ideal behavior is
# more debateable. For example, if a client starts uploading a large
# request, uses 100-Continue, and we send an error response, then we'll shut
# down the connection immediately (for well-behaved clients) or after
# spending TIMEOUT seconds reading and discarding their upload (for
# ill-behaved ones that go on and try to upload their request anyway). And
# for clients that do this without 100-Continue, we'll send the error
# response and then shut them down after TIMEOUT seconds. This might or
# might not be your preferred policy, though -- maybe you want to shut such
# clients down immediately (even if this risks their not seeing the
# response), or maybe you're happy to let them continue sending all the data
# and wasting your bandwidth if this is what it takes to guarantee that they
# see your error response. Up to you, really.
#
# - Another example of a debateable choice: if a response handler errors out
# without having done *anything* -- hasn't started responding, hasn't read
# the request body -- then this connection actually is salvagable, if the
# server sends an error response + reads and discards the request body. This
# code sends the error response, but it doesn't try to salvage the
# connection by reading the request body, it just closes the
# connection. This is quite possibly the best option, but again this is a
# policy decision.
#
# - Our error pages always include the exception text. In real life you might
# want to log the exception but not send that information to the client.
#
# - Our error responses perhaps should include Connection: close when we know
# we're going to close this connection.
#
# - We don't support the HEAD method, but ought to.
#
# - We should probably do something cleverer with buffering responses and
# TCP_CORK and suchlike.
import datetime
import email.utils
import json
from itertools import count
import trio
import h11
MAX_RECV = 2**16
TIMEOUT = 10
# We are using email.utils.format_datetime to generate the Date header.
# It may sound weird, but it actually follows the RFC.
# Please see: https://stackoverflow.com/a/59416334/14723771
#
# See also:
# [1] https://www.rfc-editor.org/rfc/rfc9110#section-5.6.7
# [2] https://www.rfc-editor.org/rfc/rfc7231#section-7.1.1.1
# [3] https://www.rfc-editor.org/rfc/rfc5322#section-3.3
def format_date_time(dt=None):
"""Generate a RFC 7231 / RFC 9110 IMF-fixdate string"""
if dt is None:
dt = datetime.datetime.now(datetime.timezone.utc)
return email.utils.format_datetime(dt, usegmt=True)
################################################################
# I/O adapter: h11 <-> trio
################################################################
# The core of this could be factored out to be usable for trio-based clients
# too, as well as servers. But as a simplified pedagogical example we don't
# attempt this here.
class TrioHTTPWrapper:
_next_id = count()
def __init__(self, stream):
self.stream = stream
self.conn = h11.Connection(h11.SERVER)
# Our Server: header
self.ident = " ".join(
["h11-example-trio-server/{}".format(h11.__version__), h11.PRODUCT_ID]
).encode("ascii")
# A unique id for this connection, to include in debugging output
# (useful for understanding what's going on if there are multiple
# simultaneous clients).
self._obj_id = next(TrioHTTPWrapper._next_id)
async def send(self, event):
# The code below doesn't send ConnectionClosed, so we don't bother
# handling it here either -- it would require that we do something
# appropriate when 'data' is None.
assert type(event) is not h11.ConnectionClosed
data = self.conn.send(event)
try:
await self.stream.send_all(data)
except BaseException:
# If send_all raises an exception (especially trio.Cancelled),
# we have no choice but to give it up.
self.conn.send_failed()
raise
async def _read_from_peer(self):
if self.conn.they_are_waiting_for_100_continue:
self.info("Sending 100 Continue")
go_ahead = h11.InformationalResponse(
status_code=100, headers=self.basic_headers()
)
await self.send(go_ahead)
try:
data = await self.stream.receive_some(MAX_RECV)
except ConnectionError:
# They've stopped listening. Not much we can do about it here.
data = b""
self.conn.receive_data(data)
async def next_event(self):
while True:
event = self.conn.next_event()
if event is h11.NEED_DATA:
await self._read_from_peer()
continue
return event
async def shutdown_and_clean_up(self):
# When this method is called, it's because we definitely want to kill
# this connection, either as a clean shutdown or because of some kind
# of error or loss-of-sync bug, and we no longer care if that violates
# the protocol or not. So we ignore the state of self.conn, and just
# go ahead and do the shutdown on the socket directly. (If you're
# implementing a client you might prefer to send ConnectionClosed()
# and let it raise an exception if that violates the protocol.)
#
try:
await self.stream.send_eof()
except trio.BrokenResourceError:
# They're already gone, nothing to do
return
# Wait and read for a bit to give them a chance to see that we closed
# things, but eventually give up and just close the socket.
# XX FIXME: possibly we should set SO_LINGER to 0 here, so
# that in the case where the client has ignored our shutdown and
# declined to initiate the close themselves, we do a violent shutdown
# (RST) and avoid the TIME_WAIT?
# it looks like nginx never does this for keepalive timeouts, and only
# does it for regular timeouts (slow clients I guess?) if explicitly
# enabled ("Default: reset_timedout_connection off")
with trio.move_on_after(TIMEOUT):
try:
while True:
# Attempt to read until EOF
got = await self.stream.receive_some(MAX_RECV)
if not got:
break
except trio.BrokenResourceError:
pass
finally:
await self.stream.aclose()
def basic_headers(self):
# HTTP requires these headers in all responses (client would do
# something different here)
return [
("Date", format_date_time().encode("ascii")),
("Server", self.ident),
]
def info(self, *args):
# Little debugging method
print("{}:".format(self._obj_id), *args)
################################################################
# Server main loop
################################################################
# General theory:
#
# If everything goes well:
# - we'll get a Request
# - our response handler will read the request body and send a full response
# - that will either leave us in MUST_CLOSE (if the client doesn't
# support keepalive) or DONE/DONE (if the client does).
#
# But then there are many, many different ways that things can go wrong
# here. For example:
# - we don't actually get a Request, but rather a ConnectionClosed
# - exception is raised from somewhere (naughty client, broken
# response handler, whatever)
# - depending on what went wrong and where, we might or might not be
# able to send an error response, and the connection might or
# might not be salvagable after that
# - response handler doesn't fully read the request or doesn't send a
# full response
#
# But these all have one thing in common: they involve us leaving the
# nice easy path up above. So we can just proceed on the assumption
# that the nice easy thing is what's happening, and whenever something
# goes wrong do our best to get back onto that path, and h11 will keep
# track of how successful we were and raise new errors if things don't work
# out.
async def http_serve(stream):
wrapper = TrioHTTPWrapper(stream)
wrapper.info("Got new connection")
while True:
assert wrapper.conn.states == {h11.CLIENT: h11.IDLE, h11.SERVER: h11.IDLE}
try:
with trio.fail_after(TIMEOUT):
wrapper.info("Server main loop waiting for request")
event = await wrapper.next_event()
wrapper.info("Server main loop got event:", event)
if type(event) is h11.Request:
await send_echo_response(wrapper, event)
except Exception as exc:
wrapper.info("Error during response handler: {!r}".format(exc))
await maybe_send_error_response(wrapper, exc)
if wrapper.conn.our_state is h11.MUST_CLOSE:
wrapper.info("connection is not reusable, so shutting down")
await wrapper.shutdown_and_clean_up()
return
else:
try:
wrapper.info("trying to re-use connection")
wrapper.conn.start_next_cycle()
except h11.ProtocolError:
states = wrapper.conn.states
wrapper.info("unexpected state", states, "-- bailing out")
await maybe_send_error_response(
wrapper, RuntimeError("unexpected state {}".format(states))
)
await wrapper.shutdown_and_clean_up()
return
################################################################
# Actual response handlers
################################################################
# Helper function
async def send_simple_response(wrapper, status_code, content_type, body):
wrapper.info("Sending", status_code, "response with", len(body), "bytes")
headers = wrapper.basic_headers()
headers.append(("Content-Type", content_type))
headers.append(("Content-Length", str(len(body))))
res = h11.Response(status_code=status_code, headers=headers)
await wrapper.send(res)
await wrapper.send(h11.Data(data=body))
await wrapper.send(h11.EndOfMessage())
async def maybe_send_error_response(wrapper, exc):
# If we can't send an error, oh well, nothing to be done
wrapper.info("trying to send error response...")
if wrapper.conn.our_state not in {h11.IDLE, h11.SEND_RESPONSE}:
wrapper.info("...but I can't, because our state is", wrapper.conn.our_state)
return
try:
if isinstance(exc, h11.RemoteProtocolError):
status_code = exc.error_status_hint
elif isinstance(exc, trio.TooSlowError):
status_code = 408 # Request Timeout
else:
status_code = 500
body = str(exc).encode("utf-8")
await send_simple_response(
wrapper, status_code, "text/plain; charset=utf-8", body
)
except Exception as exc:
wrapper.info("error while sending error response:", exc)
async def send_echo_response(wrapper, request):
wrapper.info("Preparing echo response")
if request.method not in {b"GET", b"POST"}:
# Laziness: we should send a proper 405 Method Not Allowed with the
# appropriate Accept: header, but we don't.
raise RuntimeError("unsupported method")
response_json = {
"method": request.method.decode("ascii"),
"target": request.target.decode("ascii"),
"headers": [
(name.decode("ascii"), value.decode("ascii"))
for (name, value) in request.headers
],
"body": "",
}
while True:
event = await wrapper.next_event()
if type(event) is h11.EndOfMessage:
break
assert type(event) is h11.Data
response_json["body"] += event.data.decode("ascii")
response_body_unicode = json.dumps(
response_json, sort_keys=True, indent=4, separators=(",", ": ")
)
response_body_bytes = response_body_unicode.encode("utf-8")
await send_simple_response(
wrapper, 200, "application/json; charset=utf-8", response_body_bytes
)
async def serve(port):
print("listening on http://localhost:{}".format(port))
try:
await trio.serve_tcp(http_serve, port)
except KeyboardInterrupt:
print("KeyboardInterrupt - shutting down")
################################################################
# Run the server
################################################################
if __name__ == "__main__":
trio.run(serve, 8080)
|