1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302
|
import datetime
import pytz
import sys
from collections import namedtuple
def __merge_dicts(*dict_args):
# Only needed for Python <3.5 support. In Python 3.5+, you can use the {**a, **b} syntax.
"""
From: https://stackoverflow.com/a/26853961
Given any number of dicts, shallow copy and merge into a new dict,
precedence goes to key value pairs in latter dicts.
"""
result = {}
for dictionary in dict_args:
result.update(dictionary)
return result
NumberField = namedtuple('NumberField', ['min_width', 'max_width', 'min_value', 'max_value'])
NUMBER_FIELDS = {
"year": NumberField(4, 4, 1, 9999),
"month": NumberField(2, 2, 1, 12),
"day": NumberField(2, 2, 1, 31),
"ordinal_day": NumberField(3, 3, 1, 365), # Intentionally missing leap year case
"iso_week": NumberField(2, 2, 1, 53),
"iso_day": NumberField(1, 1, 1, 7),
"hour": NumberField(2, 2, 0, 24), # 24 = special midnight value
"minute": NumberField(2, 2, 0, 59),
"second": NumberField(2, 2, 0, 60), # 60 = Leap second
"microsecond": NumberField(1, None, 0, None), # Can have unbounded characters
"tzhour": NumberField(2, 2, 0, 23),
"tzminute": NumberField(2, 2, 0, 59),
}
PADDED_NUMBER_FIELD_FORMATS = {
field_name: "{{{field_name}:0>{max_width}}}".format(
field_name=field_name,
max_width=field.max_width if field.max_width is not None else 1,
)
for field_name, field in NUMBER_FIELDS.items()
}
def __generate_valid_formats(year=2014, month=2, day=3, iso_week=6, iso_day=1, ordinal_day=34, hour=1, minute=23, second=45, microsecond=123456, tzhour=4, tzminute=30):
# Given a set of values, generates the 400+ different combinations of those values within a valid ISO 8601 string.
# Returns a Python format string, the fields in the format string, and the corresponding parameters you could pass to the datetime constructor
# These can be used by generate_valid_timestamp_and_datetime and generate_invalid_timestamp_and_datetime to produce test cases
valid_basic_calendar_date_formats = [
("{year}{month}{day}", set(["year", "month", "day"]), {"year": year, "month": month, "day": day})
]
valid_extended_calendar_date_formats = [
("{year}-{month}", set(["year", "month"]), {"year": year, "month": month, "day": 1}),
("{year}-{month}-{day}", set(["year", "month", "day"]), {"year": year, "month": month, "day": day}),
]
valid_basic_week_date_formats = [
("{year}W{iso_week}", set(["year", "iso_week"]), {"year": year, "iso_week": iso_week, "iso_day": 1}),
("{year}W{iso_week}{iso_day}", set(["year", "iso_week", "iso_day"]), {"year": year, "iso_week": iso_week, "iso_day": iso_day})
]
valid_extended_week_date_formats = [
("{year}-W{iso_week}", set(["year", "iso_week"]), {"year": year, "iso_week": iso_week, "iso_day": 1}),
("{year}-W{iso_week}-{iso_day}", set(["year", "iso_week", "iso_day"]), {"year": year, "iso_week": iso_week, "iso_day": iso_day})
]
valid_basic_ordinal_date_formats = [
("{year}{ordinal_day}", set(["year", "ordinal_day"]), {"year": year, "ordinal_day": ordinal_day}),
]
valid_extended_ordinal_date_formats = [
("{year}-{ordinal_day}", set(["year", "ordinal_day"]), {"year": year, "ordinal_day": ordinal_day}),
]
valid_date_and_time_separators = [None, "T", "t", " "]
valid_basic_time_formats = [
("{hour}", set(["hour"]), {"hour": hour}),
("{hour}{minute}", set(["hour", "minute"]), {"hour": hour, "minute": minute}),
("{hour}{minute}{second}", set(["hour", "minute", "second"]), {"hour": hour, "minute": minute, "second": second})
]
valid_extended_time_formats = [
("{hour}", set(["hour"]), {"hour": hour}),
("{hour}:{minute}", set(["hour", "minute"]), {"hour": hour, "minute": minute}),
("{hour}:{minute}:{second}", set(["hour", "minute", "second"]), {"hour": hour, "minute": minute, "second": second}),
]
valid_subseconds = [
("", set(), {}),
(".{microsecond}", set(["microsecond"]), {"microsecond": microsecond}), # TODO: Generate the trimmed 0's version?
(",{microsecond}", set(["microsecond"]), {"microsecond": microsecond}),
]
valid_tz_info_formats = [
("", set(), {}),
("Z", set(), {"tzinfo": pytz.UTC}),
("z", set(), {"tzinfo": pytz.UTC}),
("-{tzhour}", set(["tzhour"]), {"tzinfo": pytz.FixedOffset(-1 * tzhour * 60)}),
("+{tzhour}", set(["tzhour"]), {"tzinfo": pytz.FixedOffset(1 * tzhour * 60)}),
("-{tzhour}{tzminute}", set(["tzhour", "tzminute"]), {"tzinfo": pytz.FixedOffset(-1 * ((tzhour * 60) + tzminute))}),
("+{tzhour}{tzminute}", set(["tzhour", "tzminute"]), {"tzinfo": pytz.FixedOffset(1 * ((tzhour * 60) + tzminute))}),
("-{tzhour}:{tzminute}", set(["tzhour", "tzminute"]), {"tzinfo": pytz.FixedOffset(-1 * ((tzhour * 60) + tzminute))}),
("+{tzhour}:{tzminute}", set(["tzhour", "tzminute"]), {"tzinfo": pytz.FixedOffset(1 * ((tzhour * 60) + tzminute))})
]
format_date_time_combinations = [
(valid_basic_calendar_date_formats, valid_basic_time_formats),
(valid_extended_calendar_date_formats, valid_extended_time_formats),
(valid_basic_ordinal_date_formats, valid_basic_time_formats),
(valid_extended_ordinal_date_formats, valid_extended_time_formats),
]
if (sys.version_info.major, sys.version_info.minor) >= (3, 8):
# We rely on datetime.datetime.fromisocalendar
# to generate the expected values, but that was added in Python 3.8
format_date_time_combinations += [
(valid_basic_week_date_formats, valid_basic_time_formats),
(valid_extended_week_date_formats, valid_extended_time_formats)
]
for valid_calendar_date_formats, valid_time_formats in format_date_time_combinations:
for calendar_format, calendar_fields, calendar_params in valid_calendar_date_formats:
if "iso_week" in calendar_fields:
dt = datetime.datetime.fromisocalendar(calendar_params["year"], calendar_params["iso_week"], calendar_params["iso_day"])
calendar_params = __merge_dicts(calendar_params, { "month": dt.month, "day": dt.day })
del(calendar_params["iso_week"])
del(calendar_params["iso_day"])
if "ordinal_day" in calendar_fields:
dt = datetime.datetime(calendar_params["year"], 1, 1) + (datetime.timedelta(days=(calendar_params["ordinal_day"] - 1)))
calendar_params = __merge_dicts(calendar_params, { "month": dt.month, "day": dt.day })
del(calendar_params["ordinal_day"])
for date_and_time_separator in valid_date_and_time_separators:
if date_and_time_separator is None:
full_format = calendar_format
datetime_params = calendar_params
yield (full_format, calendar_fields, datetime_params)
else:
for time_format, time_fields, time_params in valid_time_formats:
for subsecond_format, subsecond_fields, subsecond_params in valid_subseconds:
for tz_info_format, tz_info_fields, tz_info_params in valid_tz_info_formats:
if "second" in time_fields:
# Add subsecond
full_format = calendar_format + date_and_time_separator + time_format + subsecond_format + tz_info_format
fields = set().union(calendar_fields, time_fields, subsecond_fields, tz_info_fields)
datetime_params = __merge_dicts(calendar_params, time_params, subsecond_params, tz_info_params)
elif subsecond_format == "": # Arbitrary choice of subsecond format. We don't want duplicates, so we only yield for one of them.
full_format = calendar_format + date_and_time_separator + time_format + tz_info_format
fields = set().union(calendar_fields, time_fields, tz_info_fields)
datetime_params = __merge_dicts(calendar_params, time_params, tz_info_params)
else:
# Ignore other subsecond formats
continue
yield (full_format, fields, datetime_params)
def __pad_params(**kwargs):
# Pads parameters to the required field widths.
return {key: PADDED_NUMBER_FIELD_FORMATS[key].format(**{key: value}) if key in PADDED_NUMBER_FIELD_FORMATS else value for key, value in kwargs.items()}
def generate_valid_timestamp_and_datetime(year=2014, month=2, day=3, iso_week=6, iso_day=1, ordinal_day=34, hour=1, minute=23, second=45, microsecond=123456, tzhour=4, tzminute=30):
# Given a set of values, generates the 400+ different combinations of those values within a valid ISO 8601 string, and the corresponding datetime
# This can be used to generate test cases of valid ISO 8601 timestamps.
# Note that this will produce many test cases that exercise the exact same code pathways (i.e., offer no additional coverage).
# Given a knowledge of the code, this is excessive, but these serve as a good set of black box tests (i.e., You could apply these to any ISO 8601 parse).
kwargs = {
"year": year,
"month": month,
"day": day,
"iso_week": iso_week,
"iso_day": iso_day,
"ordinal_day": ordinal_day,
"hour": hour,
"minute": minute,
"second": second,
"microsecond": microsecond,
"tzhour": tzhour,
"tzminute": tzminute,
}
for timestamp_format, _fields, datetime_params in __generate_valid_formats(**kwargs):
# Pad each field to the appropriate width
padded_kwargs = __pad_params(**kwargs)
timestamp = timestamp_format.format(**padded_kwargs)
yield (timestamp, datetime.datetime(**datetime_params))
def generate_invalid_timestamp(year=2014, month=2, day=3, iso_week=6, iso_day=1, ordinal_day=34, hour=1, minute=23, second=45, microsecond=123456, tzhour=4, tzminute=30):
# At the very least, each field can be invalid in the following ways:
# - Have too few characters
# - Have too many characters
# - Contain invalid characters
# - Have a value that is too small
# - Have a value that is too large
#
# This function takes each valid format (from `__generate_valid_formats()`), and mangles each field within the format to be invalid in each of the above ways.
# It also tests the case of trailing characters after each format.
# Note that this will produce many test cases that exercise the exact same code pathways (i.e., offer no additional coverage).
# Given a knowledge of the code, this is excessive, but these serve as a good set of black box tests (i.e., You could apply these to any ISO 8601 parse).
# This does not produce every invalid timestamp format though. For simplicity of the code, it does not cover the cases of:
# - The fields having 0 characters (Many fields (like day, minute, second etc.) are optional. So unless the field follows a separator, it is valid to have 0 characters)
# - Invalid day numbers for a given month (ex. "2014-02-31")
# - Invalid separators (ex. "2014=04=01")
# - Ordinal dates in leap years
# - Missing/Mismatched separators (ex. "2014-0101T0000:00")
# - Hour = 24, but not Special midnight case (ex. "24:00:01")
# - Timestamps that bear no resemblance to ISO 8601
# These cases will need to be test separately
kwargs = {
"year": year,
"month": month,
"day": day,
"iso_week": iso_week,
"iso_day": iso_day,
"ordinal_day": ordinal_day,
"hour": hour,
"minute": minute,
"second": second,
"microsecond": microsecond,
"tzhour": tzhour,
"tzminute": tzminute,
}
for timestamp_format, fields, _datetime_params in __generate_valid_formats(**kwargs):
for field_name in fields:
mangled_kwargs = __pad_params(**kwargs)
field = NUMBER_FIELDS.get(field_name, None)
if field is not None:
# Too few characters
for length in range(1, field.min_width):
if timestamp_format.startswith("{year}W{iso_week}{iso_day}") and field_name == "iso_week":
# If you reduce the iso_week field to 1 character, then the iso_day will make it into
# a valid "{year}W{iso_week}" timestamp
continue
if timestamp_format.startswith("{year}{month}{day}") and (field_name == "month" or field_name == "day"):
# If you reduce the month or day field to 1 character, then it will make it into
# a valid "{year}{ordinal_day}" timestamp
continue
if timestamp_format.startswith("{year}{month}{day}") and field_name == "year" and length == 3:
# If you reduce the year field to 3 characters, then it will make it into
# a valid "{year}{ordinal_day}" timestamp
continue
if timestamp_format.startswith("{year}-{ordinal_day}") and field_name == "ordinal_day" and length == 2:
# If you reduce the ordinal_day field to 2 characters, then it will make it into
# a valid "{year}-{month}" timestamp
continue
str_value = str(__pad_params(**{field_name: kwargs[field_name]})[field_name])[0:length]
mangled_kwargs[field_name] = "{{:0>{length}}}".format(length=length).format(str_value)
timestamp = timestamp_format.format(**mangled_kwargs)
yield (timestamp, "{0} has too few characters".format(field_name))
# Too many characters
if field.max_width is not None:
if timestamp_format.startswith("{year}-{month}") and field_name == "month":
# If you extend the month field to 3 characters, then it will make it into
# a valid "{year}{ordinal_day}" timestamp
continue
mangled_kwargs[field_name] = "{{:0>{length}}}".format(length=field.max_width + 1).format(kwargs[field_name])
timestamp = timestamp_format.format(**mangled_kwargs)
yield (timestamp, "{0} has too many characters".format(field_name))
# Too small of value
if (field.min_value - 1) >= 0:
mangled_kwargs[field_name] = __pad_params(**{field_name: field.min_value - 1})[field_name]
timestamp = timestamp_format.format(**mangled_kwargs)
yield (timestamp, "{0} has too small value".format(field_name))
# Too large of value
if field.max_value is not None:
mangled_kwargs[field_name] = __pad_params(**{field_name: field.max_value + 1})[field_name]
timestamp = timestamp_format.format(**mangled_kwargs)
yield (timestamp, "{0} has too large value".format(field_name))
# Invalid characters
max_invalid_characters = field.max_width if field.max_width is not None else 1
# ex. 2014 -> a, aa, aaa
for length in range(1, max_invalid_characters):
mangled_kwargs[field_name] = "a" * length
timestamp = timestamp_format.format(**mangled_kwargs)
yield (timestamp, "{0} has invalid characters".format(field_name))
# ex. 2014 -> aaaa, 2aaa, 20aa, 201a
for length in range(0, max_invalid_characters):
str_value = str(__pad_params(**{field_name: kwargs[field_name]})[field_name])[0:length]
mangled_kwargs[field_name] = "{{:a<{length}}}".format(length=max_invalid_characters).format(str_value)
timestamp = timestamp_format.format(**mangled_kwargs)
yield (timestamp, "{0} has invalid characters".format(field_name))
# Trailing characters
timestamp = timestamp_format.format(**__pad_params(**kwargs)) + "EXTRA"
yield (timestamp, "{0} has extra characters".format(field_name))
|