1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230
|
"""This submodule contains file format related utility code for Prance."""
__author__ = "Jens Finkhaeuser"
__copyright__ = "Copyright (c) 2016-2021 Jens Finkhaeuser"
__license__ = "MIT"
__all__ = ()
class ParseError(ValueError):
pass # pragma: nocover
def __format_preferences(filename, content_type): # noqa: N802
"""
Detect the format based on file name and content type.
Each parameter may be None, so a heuristic can be used in the end.
:return: A tuple of format strings, in the optimal order to try.
:rtype: tuple
"""
# Select the correct format.
# 1) If there is neither file name nor content type, use a heuristic.
# 2) If there is a file name but no content type, use the file extension.
# 3) If there is no file name, but a content type, use the content type.
# 4) If both are present, prefer the content type.
# 5) use a heuristic either way to catch bad content types, file names,
# etc. The selection process above is just the most likely match!
best = None
if filename and not content_type:
from os.path import splitext
_, ext = splitext(filename)
for extensions in __EXT_TO_FORMAT.keys():
if ext in extensions:
best = __EXT_TO_FORMAT[extensions]
elif content_type:
# Split off the first part of the content type; for us, that's enough.
content_type = content_type.split(";")[0]
for ctypes in __MIME_TO_FORMAT.keys():
if content_type in ctypes:
best = __MIME_TO_FORMAT[ctypes]
# If we have no best format yet, we need to use a heuristic. This is tricky;
# Swagger is largely YAML-based, but JSON is used for remote references. In
# the end, JSON is probably more likely to match.
if not best:
best = "JSON"
# Now assemble an ordered list of formats to return, with the best format
# first.
formats = list(__EXT_TO_FORMAT.values())
formats.remove(best)
formats.insert(0, best)
return tuple(formats)
# Basic parse functions
def __parse_yaml(spec_str): # noqa: N802
from ruamel.yaml import YAML, parser
try:
yaml = YAML(typ="safe")
return yaml.load(str(spec_str))
except parser.ParserError as err:
raise ParseError(str(err))
def __parse_json(spec_str): # noqa: N802
import json
try:
return json.loads(str(spec_str))
except ValueError as err:
raise ParseError(str(err))
# Basic serialization functions
def __serialize_yaml(specs): # noqa: N802
import io
from ruamel.yaml import YAML
yaml = YAML()
buf = io.BytesIO()
yaml.dump(specs, buf)
return buf.getvalue().decode("UTF-8")
def __serialize_json(specs): # noqa: N802
# The default encoding is utf-8, no need to specify it. But we need to switch
# off ensure_ascii, otherwise we do not get a unicode string back.
import json
utf = json.dumps(specs, ensure_ascii=False, indent=2)
return str(utf)
# Map file name extensions to parse/serialize functions
__EXT_TO_FORMAT = {
(".yaml", ".yml"): "YAML",
(".json", ".js"): "JSON",
}
__MIME_TO_FORMAT = {
("application/json", "application/javascript"): "JSON",
("application/yaml", "text/yaml"): "YAML",
}
__FORMAT_TO_PARSER = {
"YAML": __parse_yaml,
"JSON": __parse_json,
}
__FORMAT_TO_SERIALIZER = {
"YAML": __serialize_yaml,
"JSON": __serialize_json,
}
def format_info(format_name):
"""
Return content type and extension for a supported format.
Valid formats are `YAML` or `JSON`.
:param str format_name: The name of the format.
:return: The preferred content type and file name extension, or
(None, None) if the format name is not supported.
:rtype: tuple
"""
format_name = format_name.upper()
content_type = None
for content_types, name in __MIME_TO_FORMAT.items():
if name == format_name:
content_type = content_types[0]
extension = None
for extensions, name in __EXT_TO_FORMAT.items():
if name == format_name:
extension = extensions[0]
return content_type, extension
def parse_spec_details(spec_str, filename=None, **kwargs):
"""
Return a parsed dict of the given spec string.
Also returned are the detected mime type and file name extension.
The default format is assumed to be JSON, but if you provide a filename,
its extension is used to determine whether YAML or JSON should be
parsed.
:param str spec_str: The specifications as string.
:param str filename: [optional] Filename to determine the format from.
:param str content_type: [optional] Content type to determine the format
from.
:return: The specifications, mime type, and extension.
:rtype: tuple
:raises ParseError: when parsing fails.
"""
# Fetch optional content type & determine formats
content_type = kwargs.get("content_type", None)
formats = __format_preferences(filename, content_type)
# Try parsing each format in order
for f in formats:
parser = __FORMAT_TO_PARSER[f]
try:
result = parser(spec_str)
ctype, ext = format_info(f)
return result, ctype, ext
except ParseError:
pass
# All failed!
raise ParseError("Could not detect format of spec string!")
def parse_spec(spec_str, filename=None, **kwargs):
"""
Return a parsed dict of the given spec string.
The function exists for legacy reasons and just wraps parse_spec_details,
returning only the parsed specs.
:param str spec_str: The specifications as string.
:param str filename: [optional] Filename to determine the format from.
:param str content_type: [optional] Content type to determine the format
from.
:return: The specifications.
:rtype: dict
:raises ParseError: when parsing fails.
"""
result, ctype, ext = parse_spec_details(spec_str, filename, **kwargs)
return result
def serialize_spec(specs, filename=None, **kwargs):
"""
Return a serialized version of the given spec.
The default format is assumed to be JSON, but if you provide a filename,
its extension is used to determine whether YAML or JSON should be
parsed.
:param dict specs: The specifications as dict.
:param str filename: [optional] Filename to determine the format from.
:param str content_type: [optional] Content type to determine the format
from.
:return: The serialized specifications.
:rtype: str
"""
# Fetch optional content type & determine formats
content_type = kwargs.get("content_type", None)
formats = __format_preferences(filename, content_type)
# Instead of trying to parse various formats, we only serialize to the first
# one in the list - nothing else makes much sense.
serializer = __FORMAT_TO_SERIALIZER[formats[0]]
return serializer(specs)
|