1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126
|
import string
from urllib import parse
from hypothesis import strategies
HTTP_METHODS = ["OPTIONS", "GET", "HEAD", "POST", "PUT", "DELETE", "TRACE", "CONNECT"]
# Unicode characters of the "Letter" category
letters = strategies.characters(
whitelist_categories=("Lu", "Ll", "Lt", "Lm", "Lo", "Nl")
)
def http_method():
return strategies.sampled_from(HTTP_METHODS)
def _http_path_portion():
alphabet = string.ascii_letters + string.digits + "-._~"
return strategies.text(min_size=1, max_size=128, alphabet=alphabet)
def http_path():
"""
Returns a URL path (not encoded).
"""
return strategies.lists(_http_path_portion(), min_size=0, max_size=10).map(
lambda s: "/" + "/".join(s)
)
def http_body():
"""
Returns random binary body data.
"""
return strategies.binary(min_size=0, max_size=1500)
def valid_bidi(value):
"""
Rejects strings which nonsensical Unicode text direction flags.
Relying on random Unicode characters means that some combinations don't make sense, from a
direction of text point of view. This little helper just rejects those.
"""
try:
value.encode("idna")
except UnicodeError:
return False
else:
return True
def _domain_label():
return strategies.text(alphabet=letters, min_size=1, max_size=63).filter(valid_bidi)
def international_domain_name():
"""
Returns a byte string of a domain name, IDNA-encoded.
"""
return strategies.lists(_domain_label(), min_size=2).map(
lambda s: (".".join(s)).encode("idna")
)
def _query_param():
return strategies.text(alphabet=letters, min_size=1, max_size=255).map(
lambda s: s.encode("utf8")
)
def query_params():
"""
Returns a list of two-tuples byte strings, ready for encoding with urlencode.
We're aiming for a total length of a URL below 2083 characters, so this strategy
ensures that the total urlencoded query string is not longer than 1500 characters.
"""
return strategies.lists(
strategies.tuples(_query_param(), _query_param()), min_size=0
).filter(lambda x: len(parse.urlencode(x)) < 1500)
def header_name():
"""
Strategy returning something that looks like a HTTP header field
https://en.wikipedia.org/wiki/List_of_HTTP_header_fields suggests they are between 4
and 20 characters long
"""
return strategies.text(
alphabet=string.ascii_letters + string.digits + "-", min_size=1, max_size=30
).map(lambda s: s.encode("utf-8"))
def header_value():
"""
Strategy returning something that looks like a HTTP header value
"For example, the Apache 2.3 server by default limits the size of each field to 8190 bytes"
https://en.wikipedia.org/wiki/List_of_HTTP_header_fields
"""
return (
strategies.text(
alphabet=string.ascii_letters
+ string.digits
+ string.punctuation.replace(",", "")
+ " /t",
min_size=1,
max_size=8190,
)
.map(lambda s: s.encode("utf-8"))
.filter(lambda s: len(s) < 8190)
)
def headers():
"""
Strategy returning a list of tuples, containing HTTP header fields and their values.
"[Apache 2.3] there can be at most 100 header fields in a single request."
https://en.wikipedia.org/wiki/List_of_HTTP_header_fields
"""
return strategies.lists(
strategies.tuples(header_name(), header_value()), min_size=0, max_size=100
)
|