1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309
|
import functools
import sys
from sentry_sdk.hub import Hub, _should_send_default_pii
from sentry_sdk.utils import (
ContextVar,
capture_internal_exceptions,
event_from_exception,
)
from sentry_sdk._compat import PY2, reraise, iteritems
from sentry_sdk.tracing import Span
from sentry_sdk.sessions import auto_session_tracking
from sentry_sdk.integrations._wsgi_common import _filter_headers
from sentry_sdk._types import MYPY
if MYPY:
from typing import Callable
from typing import Dict
from typing import Iterator
from typing import Any
from typing import Tuple
from typing import Optional
from typing import TypeVar
from typing import Protocol
from sentry_sdk.utils import ExcInfo
from sentry_sdk._types import EventProcessor
WsgiResponseIter = TypeVar("WsgiResponseIter")
WsgiResponseHeaders = TypeVar("WsgiResponseHeaders")
WsgiExcInfo = TypeVar("WsgiExcInfo")
class StartResponse(Protocol):
def __call__(self, status, response_headers, exc_info=None):
# type: (str, WsgiResponseHeaders, Optional[WsgiExcInfo]) -> WsgiResponseIter
pass
_wsgi_middleware_applied = ContextVar("sentry_wsgi_middleware_applied")
if PY2:
def wsgi_decoding_dance(s, charset="utf-8", errors="replace"):
# type: (str, str, str) -> str
return s.decode(charset, errors)
else:
def wsgi_decoding_dance(s, charset="utf-8", errors="replace"):
# type: (str, str, str) -> str
return s.encode("latin1").decode(charset, errors)
def get_host(environ):
# type: (Dict[str, str]) -> str
"""Return the host for the given WSGI environment. Yanked from Werkzeug."""
if environ.get("HTTP_HOST"):
rv = environ["HTTP_HOST"]
if environ["wsgi.url_scheme"] == "http" and rv.endswith(":80"):
rv = rv[:-3]
elif environ["wsgi.url_scheme"] == "https" and rv.endswith(":443"):
rv = rv[:-4]
elif environ.get("SERVER_NAME"):
rv = environ["SERVER_NAME"]
if (environ["wsgi.url_scheme"], environ["SERVER_PORT"]) not in (
("https", "443"),
("http", "80"),
):
rv += ":" + environ["SERVER_PORT"]
else:
# In spite of the WSGI spec, SERVER_NAME might not be present.
rv = "unknown"
return rv
def get_request_url(environ):
# type: (Dict[str, str]) -> str
"""Return the absolute URL without query string for the given WSGI
environment."""
return "%s://%s/%s" % (
environ.get("wsgi.url_scheme"),
get_host(environ),
wsgi_decoding_dance(environ.get("PATH_INFO") or "").lstrip("/"),
)
class SentryWsgiMiddleware(object):
__slots__ = ("app",)
def __init__(self, app):
# type: (Callable[[Dict[str, str], Callable[..., Any]], Any]) -> None
self.app = app
def __call__(self, environ, start_response):
# type: (Dict[str, str], Callable[..., Any]) -> _ScopedResponse
if _wsgi_middleware_applied.get(False):
return self.app(environ, start_response)
_wsgi_middleware_applied.set(True)
try:
hub = Hub(Hub.current)
with auto_session_tracking(hub):
with hub:
with capture_internal_exceptions():
with hub.configure_scope() as scope:
scope.clear_breadcrumbs()
scope._name = "wsgi"
scope.add_event_processor(
_make_wsgi_event_processor(environ)
)
span = Span.continue_from_environ(environ)
span.op = "http.server"
span.transaction = "generic WSGI request"
with hub.start_span(span) as span:
try:
rv = self.app(
environ,
functools.partial(
_sentry_start_response, start_response, span
),
)
except BaseException:
reraise(*_capture_exception(hub))
finally:
_wsgi_middleware_applied.set(False)
return _ScopedResponse(hub, rv)
def _sentry_start_response(
old_start_response, # type: StartResponse
span, # type: Span
status, # type: str
response_headers, # type: WsgiResponseHeaders
exc_info=None, # type: Optional[WsgiExcInfo]
):
# type: (...) -> WsgiResponseIter
with capture_internal_exceptions():
status_int = int(status.split(" ", 1)[0])
span.set_http_status(status_int)
if exc_info is None:
# The Django Rest Framework WSGI test client, and likely other
# (incorrect) implementations, cannot deal with the exc_info argument
# if one is present. Avoid providing a third argument if not necessary.
return old_start_response(status, response_headers)
else:
return old_start_response(status, response_headers, exc_info)
def _get_environ(environ):
# type: (Dict[str, str]) -> Iterator[Tuple[str, str]]
"""
Returns our whitelisted environment variables.
"""
keys = ["SERVER_NAME", "SERVER_PORT"]
if _should_send_default_pii():
# make debugging of proxy setup easier. Proxy headers are
# in headers.
keys += ["REMOTE_ADDR"]
for key in keys:
if key in environ:
yield key, environ[key]
# `get_headers` comes from `werkzeug.datastructures.EnvironHeaders`
#
# We need this function because Django does not give us a "pure" http header
# dict. So we might as well use it for all WSGI integrations.
def _get_headers(environ):
# type: (Dict[str, str]) -> Iterator[Tuple[str, str]]
"""
Returns only proper HTTP headers.
"""
for key, value in iteritems(environ):
key = str(key)
if key.startswith("HTTP_") and key not in (
"HTTP_CONTENT_TYPE",
"HTTP_CONTENT_LENGTH",
):
yield key[5:].replace("_", "-").title(), value
elif key in ("CONTENT_TYPE", "CONTENT_LENGTH"):
yield key.replace("_", "-").title(), value
def get_client_ip(environ):
# type: (Dict[str, str]) -> Optional[Any]
"""
Infer the user IP address from various headers. This cannot be used in
security sensitive situations since the value may be forged from a client,
but it's good enough for the event payload.
"""
try:
return environ["HTTP_X_FORWARDED_FOR"].split(",")[0].strip()
except (KeyError, IndexError):
pass
try:
return environ["HTTP_X_REAL_IP"]
except KeyError:
pass
return environ.get("REMOTE_ADDR")
def _capture_exception(hub):
# type: (Hub) -> ExcInfo
exc_info = sys.exc_info()
# Check client here as it might have been unset while streaming response
if hub.client is not None:
e = exc_info[1]
# SystemExit(0) is the only uncaught exception that is expected behavior
should_skip_capture = isinstance(e, SystemExit) and e.code in (0, None)
if not should_skip_capture:
event, hint = event_from_exception(
exc_info,
client_options=hub.client.options,
mechanism={"type": "wsgi", "handled": False},
)
hub.capture_event(event, hint=hint)
return exc_info
class _ScopedResponse(object):
__slots__ = ("_response", "_hub")
def __init__(self, hub, response):
# type: (Hub, Iterator[bytes]) -> None
self._hub = hub
self._response = response
def __iter__(self):
# type: () -> Iterator[bytes]
iterator = iter(self._response)
while True:
with self._hub:
try:
chunk = next(iterator)
except StopIteration:
break
except BaseException:
reraise(*_capture_exception(self._hub))
yield chunk
def close(self):
# type: () -> None
with self._hub:
try:
self._response.close() # type: ignore
except AttributeError:
pass
except BaseException:
reraise(*_capture_exception(self._hub))
def _make_wsgi_event_processor(environ):
# type: (Dict[str, str]) -> EventProcessor
# It's a bit unfortunate that we have to extract and parse the request data
# from the environ so eagerly, but there are a few good reasons for this.
#
# We might be in a situation where the scope/hub never gets torn down
# properly. In that case we will have an unnecessary strong reference to
# all objects in the environ (some of which may take a lot of memory) when
# we're really just interested in a few of them.
#
# Keeping the environment around for longer than the request lifecycle is
# also not necessarily something uWSGI can deal with:
# https://github.com/unbit/uwsgi/issues/1950
client_ip = get_client_ip(environ)
request_url = get_request_url(environ)
query_string = environ.get("QUERY_STRING")
method = environ.get("REQUEST_METHOD")
env = dict(_get_environ(environ))
headers = _filter_headers(dict(_get_headers(environ)))
def event_processor(event, hint):
# type: (Dict[str, Any], Dict[str, Any]) -> Dict[str, Any]
with capture_internal_exceptions():
# if the code below fails halfway through we at least have some data
request_info = event.setdefault("request", {})
if _should_send_default_pii():
user_info = event.setdefault("user", {})
if client_ip:
user_info.setdefault("ip_address", client_ip)
request_info["url"] = request_url
request_info["query_string"] = query_string
request_info["method"] = method
request_info["env"] = env
request_info["headers"] = headers
return event
return event_processor
|