1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180
|
from typing import Optional, Union, TYPE_CHECKING
import unicodedata
from .exceptions_types import EmailSyntaxError, ValidatedEmail
from .syntax import split_email, validate_email_local_part, validate_email_domain_name, validate_email_domain_literal, validate_email_length
from .rfc_constants import CASE_INSENSITIVE_MAILBOX_NAMES
if TYPE_CHECKING:
import dns.resolver
_Resolver = dns.resolver.Resolver
else:
_Resolver = object
def validate_email(
email: Union[str, bytes],
/, # prior arguments are positional-only
*, # subsequent arguments are keyword-only
allow_smtputf8: Optional[bool] = None,
allow_empty_local: bool = False,
allow_quoted_local: Optional[bool] = None,
allow_domain_literal: Optional[bool] = None,
allow_display_name: Optional[bool] = None,
check_deliverability: Optional[bool] = None,
test_environment: Optional[bool] = None,
globally_deliverable: Optional[bool] = None,
timeout: Optional[int] = None,
dns_resolver: Optional[_Resolver] = None
) -> ValidatedEmail:
"""
Given an email address, and some options, returns a ValidatedEmail instance
with information about the address if it is valid or, if the address is not
valid, raises an EmailNotValidError. This is the main function of the module.
"""
# Fill in default values of arguments.
from . import ALLOW_SMTPUTF8, ALLOW_QUOTED_LOCAL, ALLOW_DOMAIN_LITERAL, ALLOW_DISPLAY_NAME, \
GLOBALLY_DELIVERABLE, CHECK_DELIVERABILITY, TEST_ENVIRONMENT, DEFAULT_TIMEOUT
if allow_smtputf8 is None:
allow_smtputf8 = ALLOW_SMTPUTF8
if allow_quoted_local is None:
allow_quoted_local = ALLOW_QUOTED_LOCAL
if allow_domain_literal is None:
allow_domain_literal = ALLOW_DOMAIN_LITERAL
if allow_display_name is None:
allow_display_name = ALLOW_DISPLAY_NAME
if check_deliverability is None:
check_deliverability = CHECK_DELIVERABILITY
if test_environment is None:
test_environment = TEST_ENVIRONMENT
if globally_deliverable is None:
globally_deliverable = GLOBALLY_DELIVERABLE
if timeout is None and dns_resolver is None:
timeout = DEFAULT_TIMEOUT
# Allow email to be a str or bytes instance. If bytes,
# it must be ASCII because that's how the bytes work
# on the wire with SMTP.
if not isinstance(email, str):
try:
email = email.decode("ascii")
except ValueError as e:
raise EmailSyntaxError("The email address is not valid ASCII.") from e
# Split the address into the display name (or None), the local part
# (before the @-sign), and the domain part (after the @-sign).
# Normally, there is only one @-sign. But the awkward "quoted string"
# local part form (RFC 5321 4.1.2) allows @-signs in the local
# part if the local part is quoted.
display_name, local_part, domain_part, is_quoted_local_part \
= split_email(email)
# Collect return values in this instance.
ret = ValidatedEmail()
ret.original = ((local_part if not is_quoted_local_part
else ('"' + local_part + '"'))
+ "@" + domain_part) # drop the display name, if any, for email length tests at the end
ret.display_name = display_name
# Validate the email address's local part syntax and get a normalized form.
# If the original address was quoted and the decoded local part is a valid
# unquoted local part, then we'll get back a normalized (unescaped) local
# part.
local_part_info = validate_email_local_part(local_part,
allow_smtputf8=allow_smtputf8,
allow_empty_local=allow_empty_local,
quoted_local_part=is_quoted_local_part)
ret.local_part = local_part_info["local_part"]
ret.ascii_local_part = local_part_info["ascii_local_part"]
ret.smtputf8 = local_part_info["smtputf8"]
# RFC 6532 section 3.1 says that Unicode NFC normalization should be applied,
# so we'll return the NFC-normalized local part. Since the caller may use that
# string in place of the original string, ensure it is also valid.
normalized_local_part = unicodedata.normalize("NFC", ret.local_part)
if normalized_local_part != ret.local_part:
try:
validate_email_local_part(normalized_local_part,
allow_smtputf8=allow_smtputf8,
allow_empty_local=allow_empty_local,
quoted_local_part=is_quoted_local_part)
except EmailSyntaxError as e:
raise EmailSyntaxError("After Unicode normalization: " + str(e)) from e
ret.local_part = normalized_local_part
# If a quoted local part isn't allowed but is present, now raise an exception.
# This is done after any exceptions raised by validate_email_local_part so
# that mandatory checks have highest precedence.
if is_quoted_local_part and not allow_quoted_local:
raise EmailSyntaxError("Quoting the part before the @-sign is not allowed here.")
# Some local parts are required to be case-insensitive, so we should normalize
# to lowercase.
# RFC 2142
if ret.ascii_local_part is not None \
and ret.ascii_local_part.lower() in CASE_INSENSITIVE_MAILBOX_NAMES \
and ret.local_part is not None:
ret.ascii_local_part = ret.ascii_local_part.lower()
ret.local_part = ret.local_part.lower()
# Validate the email address's domain part syntax and get a normalized form.
is_domain_literal = False
if len(domain_part) == 0:
raise EmailSyntaxError("There must be something after the @-sign.")
elif domain_part.startswith("[") and domain_part.endswith("]"):
# Parse the address in the domain literal and get back a normalized domain.
domain_literal_info = validate_email_domain_literal(domain_part[1:-1])
if not allow_domain_literal:
raise EmailSyntaxError("A bracketed IP address after the @-sign is not allowed here.")
ret.domain = domain_literal_info["domain"]
ret.ascii_domain = domain_literal_info["domain"] # Domain literals are always ASCII.
ret.domain_address = domain_literal_info["domain_address"]
is_domain_literal = True # Prevent deliverability checks.
else:
# Check the syntax of the domain and get back a normalized
# internationalized and ASCII form.
domain_name_info = validate_email_domain_name(domain_part, test_environment=test_environment, globally_deliverable=globally_deliverable)
ret.domain = domain_name_info["domain"]
ret.ascii_domain = domain_name_info["ascii_domain"]
# Construct the complete normalized form.
ret.normalized = ret.local_part + "@" + ret.domain
# If the email address has an ASCII form, add it.
if not ret.smtputf8:
if not ret.ascii_domain:
raise Exception("Missing ASCII domain.")
ret.ascii_email = (ret.ascii_local_part or "") + "@" + ret.ascii_domain
else:
ret.ascii_email = None
# Check the length of the address.
validate_email_length(ret)
# Check that a display name is permitted. It's the last syntax check
# because we always check against optional parsing features last.
if display_name is not None and not allow_display_name:
raise EmailSyntaxError("A display name and angle brackets around the email address are not permitted here.")
if check_deliverability and not test_environment:
# Validate the email address's deliverability using DNS
# and update the returned ValidatedEmail object with metadata.
if is_domain_literal:
# There is nothing to check --- skip deliverability checks.
return ret
# Lazy load `deliverability` as it is slow to import (due to dns.resolver)
from .deliverability import validate_email_deliverability
deliverability_info = validate_email_deliverability(
ret.ascii_domain, ret.domain, timeout, dns_resolver
)
mx = deliverability_info.get("mx")
if mx is not None:
ret.mx = mx
ret.mx_fallback_type = deliverability_info.get("mx_fallback_type")
return ret
|