1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138
|
from __future__ import annotations
import argparse
import warnings
from http.cookies import SimpleCookie
from shlex import split
from typing import TYPE_CHECKING, Any, NoReturn
from urllib.parse import urlparse
from w3lib.http import basic_auth_header
if TYPE_CHECKING:
from collections.abc import Sequence
class DataAction(argparse.Action):
def __call__(
self,
parser: argparse.ArgumentParser,
namespace: argparse.Namespace,
values: str | Sequence[Any] | None,
option_string: str | None = None,
) -> None:
value = str(values)
value = value.removeprefix("$")
setattr(namespace, self.dest, value)
class CurlParser(argparse.ArgumentParser):
def error(self, message: str) -> NoReturn:
error_msg = f"There was an error parsing the curl command: {message}"
raise ValueError(error_msg)
curl_parser = CurlParser()
curl_parser.add_argument("url")
curl_parser.add_argument("-H", "--header", dest="headers", action="append")
curl_parser.add_argument("-X", "--request", dest="method")
curl_parser.add_argument("-b", "--cookie", dest="cookies", action="append")
curl_parser.add_argument("-d", "--data", "--data-raw", dest="data", action=DataAction)
curl_parser.add_argument("-u", "--user", dest="auth")
safe_to_ignore_arguments = [
["--compressed"],
# `--compressed` argument is not safe to ignore, but it's included here
# because the `HttpCompressionMiddleware` is enabled by default
["-s", "--silent"],
["-v", "--verbose"],
["-#", "--progress-bar"],
]
for argument in safe_to_ignore_arguments:
curl_parser.add_argument(*argument, action="store_true")
def _parse_headers_and_cookies(
parsed_args: argparse.Namespace,
) -> tuple[list[tuple[str, bytes]], dict[str, str]]:
headers: list[tuple[str, bytes]] = []
cookies: dict[str, str] = {}
for header in parsed_args.headers or ():
name, val = header.split(":", 1)
name = name.strip()
val = val.strip()
if name.title() == "Cookie":
for name, morsel in SimpleCookie(val).items():
cookies[name] = morsel.value
else:
headers.append((name, val))
for cookie_param in parsed_args.cookies or ():
# curl can treat this parameter as either "key=value; key2=value2" pairs, or a filename.
# Scrapy will only support key-value pairs.
if "=" not in cookie_param:
continue
for name, morsel in SimpleCookie(cookie_param).items():
cookies[name] = morsel.value
if parsed_args.auth:
user, password = parsed_args.auth.split(":", 1)
headers.append(("Authorization", basic_auth_header(user, password)))
return headers, cookies
def curl_to_request_kwargs(
curl_command: str, ignore_unknown_options: bool = True
) -> dict[str, Any]:
"""Convert a cURL command syntax to Request kwargs.
:param str curl_command: string containing the curl command
:param bool ignore_unknown_options: If true, only a warning is emitted when
cURL options are unknown. Otherwise
raises an error. (default: True)
:return: dictionary of Request kwargs
"""
curl_args = split(curl_command)
if curl_args[0] != "curl":
raise ValueError('A curl command must start with "curl"')
parsed_args, argv = curl_parser.parse_known_args(curl_args[1:])
if argv:
msg = f"Unrecognized options: {', '.join(argv)}"
if ignore_unknown_options:
warnings.warn(msg)
else:
raise ValueError(msg)
url = parsed_args.url
# curl automatically prepends 'http' if the scheme is missing, but Request
# needs the scheme to work
parsed_url = urlparse(url)
if not parsed_url.scheme:
url = "http://" + url
method = parsed_args.method or "GET"
result: dict[str, Any] = {"method": method.upper(), "url": url}
headers, cookies = _parse_headers_and_cookies(parsed_args)
if headers:
result["headers"] = headers
if cookies:
result["cookies"] = cookies
if parsed_args.data:
result["body"] = parsed_args.data
if not parsed_args.method:
# if the "data" is specified but the "method" is not specified,
# the default method is 'POST'
result["method"] = "POST"
return result
|