Major fixes and new features
All checks were successful
continuous-integration/drone/push Build is passing
All checks were successful
continuous-integration/drone/push Build is passing
This commit is contained in:
@@ -0,0 +1,98 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Export the main method, helper methods, and the public data types.
|
||||
from .exceptions_types import ValidatedEmail, EmailNotValidError, \
|
||||
EmailSyntaxError, EmailUndeliverableError
|
||||
from .validate_email import validate_email
|
||||
from .version import __version__
|
||||
|
||||
__all__ = ["validate_email",
|
||||
"ValidatedEmail", "EmailNotValidError",
|
||||
"EmailSyntaxError", "EmailUndeliverableError",
|
||||
"caching_resolver", "__version__"]
|
||||
|
||||
|
||||
def caching_resolver(*args, **kwargs):
|
||||
# Lazy load `deliverability` as it is slow to import (due to dns.resolver)
|
||||
from .deliverability import caching_resolver
|
||||
|
||||
return caching_resolver(*args, **kwargs)
|
||||
|
||||
|
||||
# These global attributes are a part of the library's API and can be
|
||||
# changed by library users.
|
||||
|
||||
# Default values for keyword arguments.
|
||||
|
||||
ALLOW_SMTPUTF8 = True
|
||||
ALLOW_QUOTED_LOCAL = False
|
||||
ALLOW_DOMAIN_LITERAL = False
|
||||
GLOBALLY_DELIVERABLE = True
|
||||
CHECK_DELIVERABILITY = True
|
||||
TEST_ENVIRONMENT = False
|
||||
DEFAULT_TIMEOUT = 15 # secs
|
||||
|
||||
# IANA Special Use Domain Names
|
||||
# Last Updated 2021-09-21
|
||||
# https://www.iana.org/assignments/special-use-domain-names/special-use-domain-names.txt
|
||||
#
|
||||
# The domain names without dots would be caught by the check that the domain
|
||||
# name in an email address must have a period, but this list will also catch
|
||||
# subdomains of these domains, which are also reserved.
|
||||
SPECIAL_USE_DOMAIN_NAMES = [
|
||||
# The "arpa" entry here is consolidated from a lot of arpa subdomains
|
||||
# for private address (i.e. non-routable IP addresses like 172.16.x.x)
|
||||
# reverse mapping, plus some other subdomains. Although RFC 6761 says
|
||||
# that application software should not treat these domains as special,
|
||||
# they are private-use domains and so cannot have globally deliverable
|
||||
# email addresses, which is an assumption of this library, and probably
|
||||
# all of arpa is similarly special-use, so we reject it all.
|
||||
"arpa",
|
||||
|
||||
# RFC 6761 says applications "SHOULD NOT" treat the "example" domains
|
||||
# as special, i.e. applications should accept these domains.
|
||||
#
|
||||
# The domain "example" alone fails our syntax validation because it
|
||||
# lacks a dot (we assume no one has an email address on a TLD directly).
|
||||
# "@example.com/net/org" will currently fail DNS-based deliverability
|
||||
# checks because IANA publishes a NULL MX for these domains, and
|
||||
# "@mail.example[.com/net/org]" and other subdomains will fail DNS-
|
||||
# based deliverability checks because IANA does not publish MX or A
|
||||
# DNS records for these subdomains.
|
||||
# "example", # i.e. "wwww.example"
|
||||
# "example.com",
|
||||
# "example.net",
|
||||
# "example.org",
|
||||
|
||||
# RFC 6761 says that applications are permitted to treat this domain
|
||||
# as special and that DNS should return an immediate negative response,
|
||||
# so we also immediately reject this domain, which also follows the
|
||||
# purpose of the domain.
|
||||
"invalid",
|
||||
|
||||
# RFC 6762 says that applications "may" treat ".local" as special and
|
||||
# that "name resolution APIs and libraries SHOULD recognize these names
|
||||
# as special," and since ".local" has no global definition, we reject
|
||||
# it, as we expect email addresses to be gloally routable.
|
||||
"local",
|
||||
|
||||
# RFC 6761 says that applications (like this library) are permitted
|
||||
# to treat "localhost" as special, and since it cannot have a globally
|
||||
# deliverable email address, we reject it.
|
||||
"localhost",
|
||||
|
||||
# RFC 7686 says "applications that do not implement the Tor protocol
|
||||
# SHOULD generate an error upon the use of .onion and SHOULD NOT
|
||||
# perform a DNS lookup.
|
||||
"onion",
|
||||
|
||||
# Although RFC 6761 says that application software should not treat
|
||||
# these domains as special, it also warns users that the address may
|
||||
# resolve differently in different systems, and therefore it cannot
|
||||
# have a globally routable email address, which is an assumption of
|
||||
# this library, so we reject "@test" and "@*.test" addresses, unless
|
||||
# the test_environment keyword argument is given, to allow their use
|
||||
# in application-level test environments. These domains will generally
|
||||
# fail deliverability checks because "test" is not an actual TLD.
|
||||
"test",
|
||||
]
|
||||
@@ -0,0 +1,59 @@
|
||||
# A command-line tool for testing.
|
||||
#
|
||||
# Usage:
|
||||
#
|
||||
# python -m email_validator test@example.org
|
||||
# python -m email_validator < LIST_OF_ADDRESSES.TXT
|
||||
#
|
||||
# Provide email addresses to validate either as a command-line argument
|
||||
# or in STDIN separated by newlines. Validation errors will be printed for
|
||||
# invalid email addresses. When passing an email address on the command
|
||||
# line, if the email address is valid, information about it will be printed.
|
||||
# When using STDIN, no output will be given for valid email addresses.
|
||||
#
|
||||
# Keyword arguments to validate_email can be set in environment variables
|
||||
# of the same name but upprcase (see below).
|
||||
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
|
||||
from .validate_email import validate_email
|
||||
from .deliverability import caching_resolver
|
||||
from .exceptions_types import EmailNotValidError
|
||||
|
||||
|
||||
def main(dns_resolver=None):
|
||||
# The dns_resolver argument is for tests.
|
||||
|
||||
# Set options from environment variables.
|
||||
options = {}
|
||||
for varname in ('ALLOW_SMTPUTF8', 'ALLOW_QUOTED_LOCAL', 'ALLOW_DOMAIN_LITERAL',
|
||||
'GLOBALLY_DELIVERABLE', 'CHECK_DELIVERABILITY', 'TEST_ENVIRONMENT'):
|
||||
if varname in os.environ:
|
||||
options[varname.lower()] = bool(os.environ[varname])
|
||||
for varname in ('DEFAULT_TIMEOUT',):
|
||||
if varname in os.environ:
|
||||
options[varname.lower()] = float(os.environ[varname])
|
||||
|
||||
if len(sys.argv) == 1:
|
||||
# Validate the email addresses pased line-by-line on STDIN.
|
||||
dns_resolver = dns_resolver or caching_resolver()
|
||||
for line in sys.stdin:
|
||||
email = line.strip()
|
||||
try:
|
||||
validate_email(email, dns_resolver=dns_resolver, **options)
|
||||
except EmailNotValidError as e:
|
||||
print(f"{email} {e}")
|
||||
else:
|
||||
# Validate the email address passed on the command line.
|
||||
email = sys.argv[1]
|
||||
try:
|
||||
result = validate_email(email, dns_resolver=dns_resolver, **options)
|
||||
print(json.dumps(result.as_dict(), indent=2, sort_keys=True, ensure_ascii=False))
|
||||
except EmailNotValidError as e:
|
||||
print(e)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,127 @@
|
||||
from typing import Optional, Any, Dict
|
||||
|
||||
from .exceptions_types import EmailUndeliverableError
|
||||
|
||||
import dns.resolver
|
||||
import dns.exception
|
||||
|
||||
|
||||
def caching_resolver(*, timeout: Optional[int] = None, cache=None):
|
||||
if timeout is None:
|
||||
from . import DEFAULT_TIMEOUT
|
||||
timeout = DEFAULT_TIMEOUT
|
||||
resolver = dns.resolver.Resolver()
|
||||
resolver.cache = cache or dns.resolver.LRUCache() # type: ignore
|
||||
resolver.lifetime = timeout # type: ignore # timeout, in seconds
|
||||
return resolver
|
||||
|
||||
|
||||
def validate_email_deliverability(domain: str, domain_i18n: str, timeout: Optional[int] = None, dns_resolver=None):
|
||||
# Check that the domain resolves to an MX record. If there is no MX record,
|
||||
# try an A or AAAA record which is a deprecated fallback for deliverability.
|
||||
# Raises an EmailUndeliverableError on failure. On success, returns a dict
|
||||
# with deliverability information.
|
||||
|
||||
# If no dns.resolver.Resolver was given, get dnspython's default resolver.
|
||||
# Override the default resolver's timeout. This may affect other uses of
|
||||
# dnspython in this process.
|
||||
if dns_resolver is None:
|
||||
from . import DEFAULT_TIMEOUT
|
||||
if timeout is None:
|
||||
timeout = DEFAULT_TIMEOUT
|
||||
dns_resolver = dns.resolver.get_default_resolver()
|
||||
dns_resolver.lifetime = timeout
|
||||
elif timeout is not None:
|
||||
raise ValueError("It's not valid to pass both timeout and dns_resolver.")
|
||||
|
||||
deliverability_info: Dict[str, Any] = {}
|
||||
|
||||
try:
|
||||
try:
|
||||
# Try resolving for MX records (RFC 5321 Section 5).
|
||||
response = dns_resolver.resolve(domain, "MX")
|
||||
|
||||
# For reporting, put them in priority order and remove the trailing dot in the qnames.
|
||||
mtas = sorted([(r.preference, str(r.exchange).rstrip('.')) for r in response])
|
||||
|
||||
# RFC 7505: Null MX (0, ".") records signify the domain does not accept email.
|
||||
# Remove null MX records from the mtas list (but we've stripped trailing dots,
|
||||
# so the 'exchange' is just "") so we can check if there are no non-null MX
|
||||
# records remaining.
|
||||
mtas = [(preference, exchange) for preference, exchange in mtas
|
||||
if exchange != ""]
|
||||
if len(mtas) == 0: # null MX only, if there were no MX records originally a NoAnswer exception would have occurred
|
||||
raise EmailUndeliverableError(f"The domain name {domain_i18n} does not accept email.")
|
||||
|
||||
deliverability_info["mx"] = mtas
|
||||
deliverability_info["mx_fallback_type"] = None
|
||||
|
||||
except dns.resolver.NoAnswer:
|
||||
# If there was no MX record, fall back to an A record. (RFC 5321 Section 5)
|
||||
try:
|
||||
response = dns_resolver.resolve(domain, "A")
|
||||
deliverability_info["mx"] = [(0, str(r)) for r in response]
|
||||
deliverability_info["mx_fallback_type"] = "A"
|
||||
|
||||
except dns.resolver.NoAnswer:
|
||||
|
||||
# If there was no A record, fall back to an AAAA record.
|
||||
# (It's unclear if SMTP servers actually do this.)
|
||||
try:
|
||||
response = dns_resolver.resolve(domain, "AAAA")
|
||||
deliverability_info["mx"] = [(0, str(r)) for r in response]
|
||||
deliverability_info["mx_fallback_type"] = "AAAA"
|
||||
|
||||
except dns.resolver.NoAnswer:
|
||||
# If there was no MX, A, or AAAA record, then mail to
|
||||
# this domain is not deliverable, although the domain
|
||||
# name has other records (otherwise NXDOMAIN would
|
||||
# have been raised).
|
||||
raise EmailUndeliverableError(f"The domain name {domain_i18n} does not accept email.")
|
||||
|
||||
# Check for a SPF (RFC 7208) reject-all record ("v=spf1 -all") which indicates
|
||||
# no emails are sent from this domain (similar to a Null MX record
|
||||
# but for sending rather than receiving). In combination with the
|
||||
# absence of an MX record, this is probably a good sign that the
|
||||
# domain is not used for email.
|
||||
try:
|
||||
response = dns_resolver.resolve(domain, "TXT")
|
||||
for rec in response:
|
||||
value = b"".join(rec.strings)
|
||||
if value.startswith(b"v=spf1 "):
|
||||
deliverability_info["spf"] = value.decode("ascii", errors='replace')
|
||||
if value == b"v=spf1 -all":
|
||||
raise EmailUndeliverableError(f"The domain name {domain_i18n} does not send email.")
|
||||
except dns.resolver.NoAnswer:
|
||||
# No TXT records means there is no SPF policy, so we cannot take any action.
|
||||
pass
|
||||
|
||||
except dns.resolver.NXDOMAIN:
|
||||
# The domain name does not exist --- there are no records of any sort
|
||||
# for the domain name.
|
||||
raise EmailUndeliverableError(f"The domain name {domain_i18n} does not exist.")
|
||||
|
||||
except dns.resolver.NoNameservers:
|
||||
# All nameservers failed to answer the query. This might be a problem
|
||||
# with local nameservers, maybe? We'll allow the domain to go through.
|
||||
return {
|
||||
"unknown-deliverability": "no_nameservers",
|
||||
}
|
||||
|
||||
except dns.exception.Timeout:
|
||||
# A timeout could occur for various reasons, so don't treat it as a failure.
|
||||
return {
|
||||
"unknown-deliverability": "timeout",
|
||||
}
|
||||
|
||||
except EmailUndeliverableError:
|
||||
# Don't let these get clobbered by the wider except block below.
|
||||
raise
|
||||
|
||||
except Exception as e:
|
||||
# Unhandled conditions should not propagate.
|
||||
raise EmailUndeliverableError(
|
||||
"There was an error while checking if the domain name in the email address is deliverable: " + str(e)
|
||||
)
|
||||
|
||||
return deliverability_info
|
||||
@@ -0,0 +1,144 @@
|
||||
import warnings
|
||||
from typing import Optional
|
||||
|
||||
|
||||
class EmailNotValidError(ValueError):
|
||||
"""Parent class of all exceptions raised by this module."""
|
||||
pass
|
||||
|
||||
|
||||
class EmailSyntaxError(EmailNotValidError):
|
||||
"""Exception raised when an email address fails validation because of its form."""
|
||||
pass
|
||||
|
||||
|
||||
class EmailUndeliverableError(EmailNotValidError):
|
||||
"""Exception raised when an email address fails validation because its domain name does not appear deliverable."""
|
||||
pass
|
||||
|
||||
|
||||
class ValidatedEmail(object):
|
||||
"""The validate_email function returns objects of this type holding the normalized form of the email address
|
||||
and other information."""
|
||||
|
||||
"""The email address that was passed to validate_email. (If passed as bytes, this will be a string.)"""
|
||||
original: str
|
||||
|
||||
"""The normalized email address, which should always be used in preferance to the original address.
|
||||
The normalized address converts an IDNA ASCII domain name to Unicode, if possible, and performs
|
||||
Unicode normalization on the local part and on the domain (if originally Unicode). It is the
|
||||
concatenation of the local_part and domain attributes, separated by an @-sign."""
|
||||
normalized: str
|
||||
|
||||
"""The local part of the email address after Unicode normalization."""
|
||||
local_part: str
|
||||
|
||||
"""The domain part of the email address after Unicode normalization or conversion to
|
||||
Unicode from IDNA ascii."""
|
||||
domain: str
|
||||
|
||||
"""If the domain part is a domain literal, the IPv4Address or IPv6Address object."""
|
||||
domain_address: object
|
||||
|
||||
"""If not None, a form of the email address that uses 7-bit ASCII characters only."""
|
||||
ascii_email: Optional[str]
|
||||
|
||||
"""If not None, the local part of the email address using 7-bit ASCII characters only."""
|
||||
ascii_local_part: Optional[str]
|
||||
|
||||
"""A form of the domain name that uses 7-bit ASCII characters only."""
|
||||
ascii_domain: str
|
||||
|
||||
"""If True, the SMTPUTF8 feature of your mail relay will be required to transmit messages
|
||||
to this address. This flag is True just when ascii_local_part is missing. Otherwise it
|
||||
is False."""
|
||||
smtputf8: bool
|
||||
|
||||
"""If a deliverability check is performed and if it succeeds, a list of (priority, domain)
|
||||
tuples of MX records specified in the DNS for the domain."""
|
||||
mx: list
|
||||
|
||||
"""If no MX records are actually specified in DNS and instead are inferred, through an obsolete
|
||||
mechanism, from A or AAAA records, the value is the type of DNS record used instead (`A` or `AAAA`)."""
|
||||
mx_fallback_type: str
|
||||
|
||||
"""Tests use this constructor."""
|
||||
def __init__(self, **kwargs):
|
||||
for k, v in kwargs.items():
|
||||
setattr(self, k, v)
|
||||
|
||||
def __repr__(self):
|
||||
return f"<ValidatedEmail {self.normalized}>"
|
||||
|
||||
"""For backwards compatibility, support old field names."""
|
||||
def __getattr__(self, key):
|
||||
if key == "original_email":
|
||||
return self.original
|
||||
if key == "email":
|
||||
return self.normalized
|
||||
raise AttributeError(key)
|
||||
|
||||
@property
|
||||
def email(self):
|
||||
import warnings
|
||||
warnings.warn("ValidatedEmail.email is deprecated and will be removed, use ValidatedEmail.normalized instead", DeprecationWarning)
|
||||
return self.normalized
|
||||
|
||||
"""For backwards compatibility, some fields are also exposed through a dict-like interface. Note
|
||||
that some of the names changed when they became attributes."""
|
||||
def __getitem__(self, key):
|
||||
warnings.warn("dict-like access to the return value of validate_email is deprecated and may not be supported in the future.", DeprecationWarning, stacklevel=2)
|
||||
if key == "email":
|
||||
return self.normalized
|
||||
if key == "email_ascii":
|
||||
return self.ascii_email
|
||||
if key == "local":
|
||||
return self.local_part
|
||||
if key == "domain":
|
||||
return self.ascii_domain
|
||||
if key == "domain_i18n":
|
||||
return self.domain
|
||||
if key == "smtputf8":
|
||||
return self.smtputf8
|
||||
if key == "mx":
|
||||
return self.mx
|
||||
if key == "mx-fallback":
|
||||
return self.mx_fallback_type
|
||||
raise KeyError()
|
||||
|
||||
"""Tests use this."""
|
||||
def __eq__(self, other):
|
||||
if not isinstance(other, ValidatedEmail):
|
||||
return False
|
||||
return (
|
||||
self.normalized == other.normalized
|
||||
and self.local_part == other.local_part
|
||||
and self.domain == other.domain
|
||||
and getattr(self, 'ascii_email', None) == getattr(other, 'ascii_email', None)
|
||||
and getattr(self, 'ascii_local_part', None) == getattr(other, 'ascii_local_part', None)
|
||||
and getattr(self, 'ascii_domain', None) == getattr(other, 'ascii_domain', None)
|
||||
and self.smtputf8 == other.smtputf8
|
||||
and repr(sorted(self.mx) if getattr(self, 'mx', None) else None)
|
||||
== repr(sorted(other.mx) if getattr(other, 'mx', None) else None)
|
||||
and getattr(self, 'mx_fallback_type', None) == getattr(other, 'mx_fallback_type', None)
|
||||
)
|
||||
|
||||
"""This helps producing the README."""
|
||||
def as_constructor(self):
|
||||
return "ValidatedEmail(" \
|
||||
+ ",".join("\n {}={}".format(
|
||||
key,
|
||||
repr(getattr(self, key)))
|
||||
for key in ('normalized', 'local_part', 'domain',
|
||||
'ascii_email', 'ascii_local_part', 'ascii_domain',
|
||||
'smtputf8', 'mx', 'mx_fallback_type')
|
||||
if hasattr(self, key)
|
||||
) \
|
||||
+ ")"
|
||||
|
||||
"""Convenience method for accessing ValidatedEmail as a dict"""
|
||||
def as_dict(self):
|
||||
d = self.__dict__
|
||||
if d.get('domain_address'):
|
||||
d['domain_address'] = repr(d['domain_address'])
|
||||
return d
|
||||
@@ -0,0 +1,52 @@
|
||||
# These constants are defined by the email specifications.
|
||||
|
||||
import re
|
||||
|
||||
# Based on RFC 5322 3.2.3, these characters are permitted in email
|
||||
# addresses (not taking into account internationalization) separated by dots:
|
||||
ATEXT = r'a-zA-Z0-9_!#\$%&\'\*\+\-/=\?\^`\{\|\}~'
|
||||
ATEXT_RE = re.compile('[.' + ATEXT + ']') # ATEXT plus dots
|
||||
DOT_ATOM_TEXT = re.compile('[' + ATEXT + ']+(?:\\.[' + ATEXT + r']+)*\Z')
|
||||
|
||||
# RFC 6531 3.3 extends the allowed characters in internationalized
|
||||
# addresses to also include three specific ranges of UTF8 defined in
|
||||
# RFC 3629 section 4, which appear to be the Unicode code points from
|
||||
# U+0080 to U+10FFFF.
|
||||
ATEXT_INTL = ATEXT + u"\u0080-\U0010FFFF"
|
||||
ATEXT_INTL_RE = re.compile('[.' + ATEXT_INTL + ']') # ATEXT_INTL plus dots
|
||||
DOT_ATOM_TEXT_INTL = re.compile('[' + ATEXT_INTL + ']+(?:\\.[' + ATEXT_INTL + r']+)*\Z')
|
||||
|
||||
# The domain part of the email address, after IDNA (ASCII) encoding,
|
||||
# must also satisfy the requirements of RFC 952/RFC 1123 2.1 which
|
||||
# restrict the allowed characters of hostnames further.
|
||||
ATEXT_HOSTNAME_INTL = re.compile(r"[a-zA-Z0-9\-\." + "\u0080-\U0010FFFF" + "]")
|
||||
HOSTNAME_LABEL = r'(?:(?:[a-zA-Z0-9][a-zA-Z0-9\-]*)?[a-zA-Z0-9])'
|
||||
DOT_ATOM_TEXT_HOSTNAME = re.compile(HOSTNAME_LABEL + r'(?:\.' + HOSTNAME_LABEL + r')*\Z')
|
||||
DOMAIN_NAME_REGEX = re.compile(r"[A-Za-z]\Z") # all TLDs currently end with a letter
|
||||
|
||||
# Domain literal (RFC 5322 3.4.1)
|
||||
DOMAIN_LITERAL_CHARS = re.compile(r"[\u0021-\u00FA\u005E-\u007E]")
|
||||
|
||||
# Quoted-string local part (RFC 5321 4.1.2, internationalized by RFC 6531 3.3)
|
||||
# The permitted characters in a quoted string are the characters in the range
|
||||
# 32-126, except that quotes and (literal) backslashes can only appear when escaped
|
||||
# by a backslash. When internationalized, UTF8 strings are also permitted except
|
||||
# the ASCII characters that are not previously permitted (see above).
|
||||
# QUOTED_LOCAL_PART_ADDR = re.compile(r"^\"((?:[\u0020-\u0021\u0023-\u005B\u005D-\u007E]|\\[\u0020-\u007E])*)\"@(.*)")
|
||||
QUOTED_LOCAL_PART_ADDR = re.compile(r"^\"((?:[^\"\\]|\\.)*)\"@(.*)")
|
||||
QTEXT_INTL = re.compile(r"[\u0020-\u007E\u0080-\U0010FFFF]")
|
||||
|
||||
# Length constants
|
||||
# RFC 3696 + errata 1003 + errata 1690 (https://www.rfc-editor.org/errata_search.php?rfc=3696&eid=1690)
|
||||
# explains the maximum length of an email address is 254 octets.
|
||||
EMAIL_MAX_LENGTH = 254
|
||||
LOCAL_PART_MAX_LENGTH = 64
|
||||
DNS_LABEL_LENGTH_LIMIT = 63 # in "octets", RFC 1035 2.3.1
|
||||
DOMAIN_MAX_LENGTH = 255 # in "octets", RFC 1035 2.3.4 and RFC 5321 4.5.3.1.2
|
||||
|
||||
# RFC 2142
|
||||
CASE_INSENSITIVE_MAILBOX_NAMES = [
|
||||
'info', 'marking', 'sales', 'support', # section 3
|
||||
'abuse', 'noc', 'security', # section 4
|
||||
'postmaster', 'hostmaster', 'usenet', 'news', 'webmaster', 'www', 'uucp', 'ftp', # section 5
|
||||
]
|
||||
557
venv/lib/python3.12/site-packages/email_validator/syntax.py
Normal file
557
venv/lib/python3.12/site-packages/email_validator/syntax.py
Normal file
@@ -0,0 +1,557 @@
|
||||
from .exceptions_types import EmailSyntaxError
|
||||
from .rfc_constants import EMAIL_MAX_LENGTH, LOCAL_PART_MAX_LENGTH, DOMAIN_MAX_LENGTH, \
|
||||
DOT_ATOM_TEXT, DOT_ATOM_TEXT_INTL, ATEXT_RE, ATEXT_INTL_RE, ATEXT_HOSTNAME_INTL, QTEXT_INTL, \
|
||||
DNS_LABEL_LENGTH_LIMIT, DOT_ATOM_TEXT_HOSTNAME, DOMAIN_NAME_REGEX, DOMAIN_LITERAL_CHARS, \
|
||||
QUOTED_LOCAL_PART_ADDR
|
||||
|
||||
import re
|
||||
import unicodedata
|
||||
import idna # implements IDNA 2008; Python's codec is only IDNA 2003
|
||||
import ipaddress
|
||||
from typing import Optional
|
||||
|
||||
|
||||
def split_email(email):
|
||||
# Return the local part and domain part of the address and
|
||||
# whether the local part was quoted as a three-tuple.
|
||||
|
||||
# Typical email addresses have a single @-sign, but the
|
||||
# awkward "quoted string" local part form (RFC 5321 4.1.2)
|
||||
# allows @-signs (and escaped quotes) to appear in the local
|
||||
# part if the local part is quoted. If the address is quoted,
|
||||
# split it at a non-escaped @-sign and unescape the escaping.
|
||||
if m := QUOTED_LOCAL_PART_ADDR.match(email):
|
||||
local_part, domain_part = m.groups()
|
||||
|
||||
# Since backslash-escaping is no longer needed because
|
||||
# the quotes are removed, remove backslash-escaping
|
||||
# to return in the normalized form.
|
||||
import re
|
||||
local_part = re.sub(r"\\(.)", "\\1", local_part)
|
||||
|
||||
return local_part, domain_part, True
|
||||
|
||||
else:
|
||||
# Split at the one and only at-sign.
|
||||
parts = email.split('@')
|
||||
if len(parts) != 2:
|
||||
raise EmailSyntaxError("The email address is not valid. It must have exactly one @-sign.")
|
||||
local_part, domain_part = parts
|
||||
return local_part, domain_part, False
|
||||
|
||||
|
||||
def get_length_reason(addr, utf8=False, limit=EMAIL_MAX_LENGTH):
|
||||
"""Helper function to return an error message related to invalid length."""
|
||||
diff = len(addr) - limit
|
||||
prefix = "at least " if utf8 else ""
|
||||
suffix = "s" if diff > 1 else ""
|
||||
return f"({prefix}{diff} character{suffix} too many)"
|
||||
|
||||
|
||||
def safe_character_display(c):
|
||||
# Return safely displayable characters in quotes.
|
||||
if c == '\\':
|
||||
return f"\"{c}\"" # can't use repr because it escapes it
|
||||
if unicodedata.category(c)[0] in ("L", "N", "P", "S"):
|
||||
return repr(c)
|
||||
|
||||
# Construct a hex string in case the unicode name doesn't exist.
|
||||
if ord(c) < 0xFFFF:
|
||||
h = f"U+{ord(c):04x}".upper()
|
||||
else:
|
||||
h = f"U+{ord(c):08x}".upper()
|
||||
|
||||
# Return the character name or, if it has no name, the hex string.
|
||||
return unicodedata.name(c, h)
|
||||
|
||||
|
||||
def validate_email_local_part(local: str, allow_smtputf8: bool = True, allow_empty_local: bool = False,
|
||||
quoted_local_part: bool = False):
|
||||
"""Validates the syntax of the local part of an email address."""
|
||||
|
||||
if len(local) == 0:
|
||||
if not allow_empty_local:
|
||||
raise EmailSyntaxError("There must be something before the @-sign.")
|
||||
else:
|
||||
# The caller allows an empty local part. Useful for validating certain
|
||||
# Postfix aliases.
|
||||
return {
|
||||
"local_part": local,
|
||||
"ascii_local_part": local,
|
||||
"smtputf8": False,
|
||||
}
|
||||
|
||||
# Check the length of the local part by counting characters.
|
||||
# (RFC 5321 4.5.3.1.1)
|
||||
# We're checking the number of characters here. If the local part
|
||||
# is ASCII-only, then that's the same as bytes (octets). If it's
|
||||
# internationalized, then the UTF-8 encoding may be longer, but
|
||||
# that may not be relevant. We will check the total address length
|
||||
# instead.
|
||||
if len(local) > LOCAL_PART_MAX_LENGTH:
|
||||
reason = get_length_reason(local, limit=LOCAL_PART_MAX_LENGTH)
|
||||
raise EmailSyntaxError(f"The email address is too long before the @-sign {reason}.")
|
||||
|
||||
# Check the local part against the non-internationalized regular expression.
|
||||
# Most email addresses match this regex so it's probably fastest to check this first.
|
||||
# (RFC 5322 3.2.3)
|
||||
# All local parts matching the dot-atom rule are also valid as a quoted string
|
||||
# so if it was originally quoted (quoted_local_part is True) and this regex matches,
|
||||
# it's ok.
|
||||
# (RFC 5321 4.1.2 / RFC 5322 3.2.4).
|
||||
if DOT_ATOM_TEXT.match(local):
|
||||
# It's valid. And since it's just the permitted ASCII characters,
|
||||
# it's normalized and safe. If the local part was originally quoted,
|
||||
# the quoting was unnecessary and it'll be returned as normalized to
|
||||
# non-quoted form.
|
||||
|
||||
# Return the local part and flag that SMTPUTF8 is not needed.
|
||||
return {
|
||||
"local_part": local,
|
||||
"ascii_local_part": local,
|
||||
"smtputf8": False,
|
||||
}
|
||||
|
||||
# The local part failed the basic dot-atom check. Try the extended character set
|
||||
# for internationalized addresses. It's the same pattern but with additional
|
||||
# characters permitted.
|
||||
# RFC 6531 section 3.3.
|
||||
valid: Optional[str] = None
|
||||
requires_smtputf8 = False
|
||||
if DOT_ATOM_TEXT_INTL.match(local):
|
||||
# But international characters in the local part may not be permitted.
|
||||
if not allow_smtputf8:
|
||||
# Check for invalid characters against the non-internationalized
|
||||
# permitted character set.
|
||||
# (RFC 5322 3.2.3)
|
||||
bad_chars = set(
|
||||
safe_character_display(c)
|
||||
for c in local
|
||||
if not ATEXT_RE.match(c)
|
||||
)
|
||||
if bad_chars:
|
||||
raise EmailSyntaxError("Internationalized characters before the @-sign are not supported: " + ", ".join(sorted(bad_chars)) + ".")
|
||||
|
||||
# Although the check above should always find something, fall back to this just in case.
|
||||
raise EmailSyntaxError("Internationalized characters before the @-sign are not supported.")
|
||||
|
||||
# It's valid.
|
||||
valid = "dot-atom"
|
||||
requires_smtputf8 = True
|
||||
|
||||
# There are no syntactic restrictions on quoted local parts, so if
|
||||
# it was originally quoted, it is probably valid. More characters
|
||||
# are allowed, like @-signs, spaces, and quotes, and there are no
|
||||
# restrictions on the placement of dots, as in dot-atom local parts.
|
||||
elif quoted_local_part:
|
||||
# Check for invalid characters in a quoted string local part.
|
||||
# (RFC 5321 4.1.2. RFC 5322 lists additional permitted *obsolete*
|
||||
# characters which are *not* allowed here. RFC 6531 section 3.3
|
||||
# extends the range to UTF8 strings.)
|
||||
bad_chars = set(
|
||||
safe_character_display(c)
|
||||
for c in local
|
||||
if not QTEXT_INTL.match(c)
|
||||
)
|
||||
if bad_chars:
|
||||
raise EmailSyntaxError("The email address contains invalid characters in quotes before the @-sign: " + ", ".join(sorted(bad_chars)) + ".")
|
||||
|
||||
# See if any characters are outside of the ASCII range.
|
||||
bad_chars = set(
|
||||
safe_character_display(c)
|
||||
for c in local
|
||||
if not (32 <= ord(c) <= 126)
|
||||
)
|
||||
if bad_chars:
|
||||
requires_smtputf8 = True
|
||||
|
||||
# International characters in the local part may not be permitted.
|
||||
if not allow_smtputf8:
|
||||
raise EmailSyntaxError("Internationalized characters before the @-sign are not supported: " + ", ".join(sorted(bad_chars)) + ".")
|
||||
|
||||
# It's valid.
|
||||
valid = "quoted"
|
||||
|
||||
# If the local part matches the internationalized dot-atom form or was quoted,
|
||||
# perform normalization and additional checks for Unicode strings.
|
||||
if valid:
|
||||
# RFC 6532 section 3.1 says that Unicode NFC normalization should be applied,
|
||||
# so we'll return the normalized local part in the return value.
|
||||
local = unicodedata.normalize("NFC", local)
|
||||
|
||||
# Check that the local part is a valid, safe, and sensible Unicode string.
|
||||
# Some of this may be redundant with the range U+0080 to U+10FFFF that is checked
|
||||
# by DOT_ATOM_TEXT_INTL and QTEXT_INTL. Other characters may be permitted by the
|
||||
# email specs, but they may not be valid, safe, or sensible Unicode strings.
|
||||
# See the function for rationale.
|
||||
check_unsafe_chars(local, allow_space=(valid == "quoted"))
|
||||
|
||||
# Try encoding to UTF-8. Failure is possible with some characters like
|
||||
# surrogate code points, but those are checked above. Still, we don't
|
||||
# want to have an unhandled exception later.
|
||||
try:
|
||||
local.encode("utf8")
|
||||
except ValueError:
|
||||
raise EmailSyntaxError("The email address contains an invalid character.")
|
||||
|
||||
# If this address passes only by the quoted string form, re-quote it
|
||||
# and backslash-escape quotes and backslashes (removing any unnecessary
|
||||
# escapes). Per RFC 5321 4.1.2, "all quoted forms MUST be treated as equivalent,
|
||||
# and the sending system SHOULD transmit the form that uses the minimum quoting possible."
|
||||
if valid == "quoted":
|
||||
local = '"' + re.sub(r'(["\\])', r'\\\1', local) + '"'
|
||||
|
||||
return {
|
||||
"local_part": local,
|
||||
"ascii_local_part": local if not requires_smtputf8 else None,
|
||||
"smtputf8": requires_smtputf8,
|
||||
}
|
||||
|
||||
# It's not a valid local part. Let's find out why.
|
||||
# (Since quoted local parts are all valid or handled above, these checks
|
||||
# don't apply in those cases.)
|
||||
|
||||
# Check for invalid characters.
|
||||
# (RFC 5322 3.2.3, plus RFC 6531 3.3)
|
||||
bad_chars = set(
|
||||
safe_character_display(c)
|
||||
for c in local
|
||||
if not ATEXT_INTL_RE.match(c)
|
||||
)
|
||||
if bad_chars:
|
||||
raise EmailSyntaxError("The email address contains invalid characters before the @-sign: " + ", ".join(sorted(bad_chars)) + ".")
|
||||
|
||||
# Check for dot errors imposted by the dot-atom rule.
|
||||
# (RFC 5322 3.2.3)
|
||||
check_dot_atom(local, 'An email address cannot start with a {}.', 'An email address cannot have a {} immediately before the @-sign.', is_hostname=False)
|
||||
|
||||
# All of the reasons should already have been checked, but just in case
|
||||
# we have a fallback message.
|
||||
raise EmailSyntaxError("The email address contains invalid characters before the @-sign.")
|
||||
|
||||
|
||||
def check_unsafe_chars(s, allow_space=False):
|
||||
# Check for unsafe characters or characters that would make the string
|
||||
# invalid or non-sensible Unicode.
|
||||
bad_chars = set()
|
||||
for i, c in enumerate(s):
|
||||
category = unicodedata.category(c)
|
||||
if category[0] in ("L", "N", "P", "S"):
|
||||
# Letters, numbers, punctuation, and symbols are permitted.
|
||||
pass
|
||||
elif category[0] == "M":
|
||||
# Combining character in first position would combine with something
|
||||
# outside of the email address if concatenated, so they are not safe.
|
||||
# We also check if this occurs after the @-sign, which would not be
|
||||
# sensible.
|
||||
if i == 0:
|
||||
bad_chars.add(c)
|
||||
elif category == "Zs":
|
||||
# Spaces outside of the ASCII range are not specifically disallowed in
|
||||
# internationalized addresses as far as I can tell, but they violate
|
||||
# the spirit of the non-internationalized specification that email
|
||||
# addresses do not contain ASCII spaces when not quoted. Excluding
|
||||
# ASCII spaces when not quoted is handled directly by the atom regex.
|
||||
#
|
||||
# In quoted-string local parts, spaces are explicitly permitted, and
|
||||
# the ASCII space has category Zs, so we must allow it here, and we'll
|
||||
# allow all Unicode spaces to be consistent.
|
||||
if not allow_space:
|
||||
bad_chars.add(c)
|
||||
elif category[0] == "Z":
|
||||
# The two line and paragraph separator characters (in categories Zl and Zp)
|
||||
# are not specifically disallowed in internationalized addresses
|
||||
# as far as I can tell, but they violate the spirit of the non-internationalized
|
||||
# specification that email addresses do not contain line breaks when not quoted.
|
||||
bad_chars.add(c)
|
||||
elif category[0] in ("C", "Z"):
|
||||
# Control, format, surrogate, private use, and unassigned code points (C)
|
||||
# are all unsafe in various ways. Control and format characters can affect
|
||||
# text rendering if the email address is concatenated with other text.
|
||||
# Bidirectional format characters are unsafe, even if used properly, because
|
||||
# they cause an email address to render as a different email address.
|
||||
# Private use characters do not make sense for publicly deliverable
|
||||
# email addresses.
|
||||
bad_chars.add(c)
|
||||
else:
|
||||
# All categories should be handled above, but in case there is something new
|
||||
# to the Unicode specification in the future, reject all other categories.
|
||||
bad_chars.add(c)
|
||||
if bad_chars:
|
||||
raise EmailSyntaxError("The email address contains unsafe characters: "
|
||||
+ ", ".join(safe_character_display(c) for c in sorted(bad_chars)) + ".")
|
||||
|
||||
|
||||
def check_dot_atom(label, start_descr, end_descr, is_hostname):
|
||||
# RFC 5322 3.2.3
|
||||
if label.endswith("."):
|
||||
raise EmailSyntaxError(end_descr.format("period"))
|
||||
if label.startswith("."):
|
||||
raise EmailSyntaxError(start_descr.format("period"))
|
||||
if ".." in label:
|
||||
raise EmailSyntaxError("An email address cannot have two periods in a row.")
|
||||
|
||||
if is_hostname:
|
||||
# RFC 952
|
||||
if label.endswith("-"):
|
||||
raise EmailSyntaxError(end_descr.format("hyphen"))
|
||||
if label.startswith("-"):
|
||||
raise EmailSyntaxError(start_descr.format("hyphen"))
|
||||
if ".-" in label or "-." in label:
|
||||
raise EmailSyntaxError("An email address cannot have a period and a hyphen next to each other.")
|
||||
|
||||
|
||||
def validate_email_domain_name(domain, test_environment=False, globally_deliverable=True):
|
||||
"""Validates the syntax of the domain part of an email address."""
|
||||
|
||||
# Check for invalid characters before normalization.
|
||||
# (RFC 952 plus RFC 6531 section 3.3 for internationalized addresses)
|
||||
bad_chars = set(
|
||||
safe_character_display(c)
|
||||
for c in domain
|
||||
if not ATEXT_HOSTNAME_INTL.match(c)
|
||||
)
|
||||
if bad_chars:
|
||||
raise EmailSyntaxError("The part after the @-sign contains invalid characters: " + ", ".join(sorted(bad_chars)) + ".")
|
||||
|
||||
# Check for unsafe characters.
|
||||
# Some of this may be redundant with the range U+0080 to U+10FFFF that is checked
|
||||
# by DOT_ATOM_TEXT_INTL. Other characters may be permitted by the email specs, but
|
||||
# they may not be valid, safe, or sensible Unicode strings.
|
||||
check_unsafe_chars(domain)
|
||||
|
||||
# Perform UTS-46 normalization, which includes casefolding, NFC normalization,
|
||||
# and converting all label separators (the period/full stop, fullwidth full stop,
|
||||
# ideographic full stop, and halfwidth ideographic full stop) to basic periods.
|
||||
# It will also raise an exception if there is an invalid character in the input,
|
||||
# such as "⒈" which is invalid because it would expand to include a period.
|
||||
try:
|
||||
domain = idna.uts46_remap(domain, std3_rules=False, transitional=False)
|
||||
except idna.IDNAError as e:
|
||||
raise EmailSyntaxError(f"The part after the @-sign contains invalid characters ({e}).")
|
||||
|
||||
# The domain part is made up period-separated "labels." Each label must
|
||||
# have at least one character and cannot start or end with dashes, which
|
||||
# means there are some surprising restrictions on periods and dashes.
|
||||
# Check that before we do IDNA encoding because the IDNA library gives
|
||||
# unfriendly errors for these cases, but after UTS-46 normalization because
|
||||
# it can insert periods and hyphens (from fullwidth characters).
|
||||
# (RFC 952, RFC 1123 2.1, RFC 5322 3.2.3)
|
||||
check_dot_atom(domain, 'An email address cannot have a {} immediately after the @-sign.', 'An email address cannot end with a {}.', is_hostname=True)
|
||||
|
||||
# Check for RFC 5890's invalid R-LDH labels, which are labels that start
|
||||
# with two characters other than "xn" and two dashes.
|
||||
for label in domain.split("."):
|
||||
if re.match(r"(?!xn)..--", label, re.I):
|
||||
raise EmailSyntaxError("An email address cannot have two letters followed by two dashes immediately after the @-sign or after a period, except Punycode.")
|
||||
|
||||
if DOT_ATOM_TEXT_HOSTNAME.match(domain):
|
||||
# This is a valid non-internationalized domain.
|
||||
ascii_domain = domain
|
||||
else:
|
||||
# If international characters are present in the domain name, convert
|
||||
# the domain to IDNA ASCII. If internationalized characters are present,
|
||||
# the MTA must either support SMTPUTF8 or the mail client must convert the
|
||||
# domain name to IDNA before submission.
|
||||
#
|
||||
# Unfortunately this step incorrectly 'fixes' domain names with leading
|
||||
# periods by removing them, so we have to check for this above. It also gives
|
||||
# a funky error message ("No input") when there are two periods in a
|
||||
# row, also checked separately above.
|
||||
#
|
||||
# For ASCII-only domains, the transformation does nothing and is safe to
|
||||
# apply. However, to ensure we don't rely on the idna library for basic
|
||||
# syntax checks, we don't use it if it's not needed.
|
||||
try:
|
||||
ascii_domain = idna.encode(domain, uts46=False).decode("ascii")
|
||||
except idna.IDNAError as e:
|
||||
if "Domain too long" in str(e):
|
||||
# We can't really be more specific because UTS-46 normalization means
|
||||
# the length check is applied to a string that is different from the
|
||||
# one the user supplied. Also I'm not sure if the length check applies
|
||||
# to the internationalized form, the IDNA ASCII form, or even both!
|
||||
raise EmailSyntaxError("The email address is too long after the @-sign.")
|
||||
raise EmailSyntaxError(f"The part after the @-sign contains invalid characters ({e}).")
|
||||
|
||||
# Check the syntax of the string returned by idna.encode.
|
||||
# It should never fail.
|
||||
if not DOT_ATOM_TEXT_HOSTNAME.match(ascii_domain):
|
||||
raise EmailSyntaxError("The email address contains invalid characters after the @-sign after IDNA encoding.")
|
||||
|
||||
# Check the length of the domain name in bytes.
|
||||
# (RFC 1035 2.3.4 and RFC 5321 4.5.3.1.2)
|
||||
# We're checking the number of bytes ("octets") here, which can be much
|
||||
# higher than the number of characters in internationalized domains,
|
||||
# on the assumption that the domain may be transmitted without SMTPUTF8
|
||||
# as IDNA ASCII. (This is also checked by idna.encode, so this exception
|
||||
# is never reached for internationalized domains.)
|
||||
if len(ascii_domain) > DOMAIN_MAX_LENGTH:
|
||||
reason = get_length_reason(ascii_domain, limit=DOMAIN_MAX_LENGTH)
|
||||
raise EmailSyntaxError(f"The email address is too long after the @-sign {reason}.")
|
||||
|
||||
# Also check the label length limit.
|
||||
# (RFC 1035 2.3.1)
|
||||
for label in ascii_domain.split("."):
|
||||
if len(label) > DNS_LABEL_LENGTH_LIMIT:
|
||||
reason = get_length_reason(label, limit=DNS_LABEL_LENGTH_LIMIT)
|
||||
raise EmailSyntaxError(f"After the @-sign, periods cannot be separated by so many characters {reason}.")
|
||||
|
||||
if globally_deliverable:
|
||||
# All publicly deliverable addresses have domain names with at least
|
||||
# one period, at least for gTLDs created since 2013 (per the ICANN Board
|
||||
# New gTLD Program Committee, https://www.icann.org/en/announcements/details/new-gtld-dotless-domain-names-prohibited-30-8-2013-en).
|
||||
# We'll consider the lack of a period a syntax error
|
||||
# since that will match people's sense of what an email address looks
|
||||
# like. We'll skip this in test environments to allow '@test' email
|
||||
# addresses.
|
||||
if "." not in ascii_domain and not (ascii_domain == "test" and test_environment):
|
||||
raise EmailSyntaxError("The part after the @-sign is not valid. It should have a period.")
|
||||
|
||||
# We also know that all TLDs currently end with a letter.
|
||||
if not DOMAIN_NAME_REGEX.search(ascii_domain):
|
||||
raise EmailSyntaxError("The part after the @-sign is not valid. It is not within a valid top-level domain.")
|
||||
|
||||
# Check special-use and reserved domain names.
|
||||
# Some might fail DNS-based deliverability checks, but that
|
||||
# can be turned off, so we should fail them all sooner.
|
||||
# See the references in __init__.py.
|
||||
from . import SPECIAL_USE_DOMAIN_NAMES
|
||||
for d in SPECIAL_USE_DOMAIN_NAMES:
|
||||
# See the note near the definition of SPECIAL_USE_DOMAIN_NAMES.
|
||||
if d == "test" and test_environment:
|
||||
continue
|
||||
|
||||
if ascii_domain == d or ascii_domain.endswith("." + d):
|
||||
raise EmailSyntaxError("The part after the @-sign is a special-use or reserved name that cannot be used with email.")
|
||||
|
||||
# We may have been given an IDNA ASCII domain to begin with. Check
|
||||
# that the domain actually conforms to IDNA. It could look like IDNA
|
||||
# but not be actual IDNA. For ASCII-only domains, the conversion out
|
||||
# of IDNA just gives the same thing back.
|
||||
#
|
||||
# This gives us the canonical internationalized form of the domain.
|
||||
try:
|
||||
domain_i18n = idna.decode(ascii_domain.encode('ascii'))
|
||||
except idna.IDNAError as e:
|
||||
raise EmailSyntaxError(f"The part after the @-sign is not valid IDNA ({e}).")
|
||||
|
||||
# Check for invalid characters after normalization. These
|
||||
# should never arise. See the similar checks above.
|
||||
bad_chars = set(
|
||||
safe_character_display(c)
|
||||
for c in domain
|
||||
if not ATEXT_HOSTNAME_INTL.match(c)
|
||||
)
|
||||
if bad_chars:
|
||||
raise EmailSyntaxError("The part after the @-sign contains invalid characters: " + ", ".join(sorted(bad_chars)) + ".")
|
||||
check_unsafe_chars(domain)
|
||||
|
||||
# Return the IDNA ASCII-encoded form of the domain, which is how it
|
||||
# would be transmitted on the wire (except when used with SMTPUTF8
|
||||
# possibly), as well as the canonical Unicode form of the domain,
|
||||
# which is better for display purposes. This should also take care
|
||||
# of RFC 6532 section 3.1's suggestion to apply Unicode NFC
|
||||
# normalization to addresses.
|
||||
return {
|
||||
"ascii_domain": ascii_domain,
|
||||
"domain": domain_i18n,
|
||||
}
|
||||
|
||||
|
||||
def validate_email_length(addrinfo):
|
||||
# If the email address has an ASCII representation, then we assume it may be
|
||||
# transmitted in ASCII (we can't assume SMTPUTF8 will be used on all hops to
|
||||
# the destination) and the length limit applies to ASCII characters (which is
|
||||
# the same as octets). The number of characters in the internationalized form
|
||||
# may be many fewer (because IDNA ASCII is verbose) and could be less than 254
|
||||
# Unicode characters, and of course the number of octets over the limit may
|
||||
# not be the number of characters over the limit, so if the email address is
|
||||
# internationalized, we can't give any simple information about why the address
|
||||
# is too long.
|
||||
if addrinfo.ascii_email and len(addrinfo.ascii_email) > EMAIL_MAX_LENGTH:
|
||||
if addrinfo.ascii_email == addrinfo.normalized:
|
||||
reason = get_length_reason(addrinfo.ascii_email)
|
||||
elif len(addrinfo.normalized) > EMAIL_MAX_LENGTH:
|
||||
# If there are more than 254 characters, then the ASCII
|
||||
# form is definitely going to be too long.
|
||||
reason = get_length_reason(addrinfo.normalized, utf8=True)
|
||||
else:
|
||||
reason = "(when converted to IDNA ASCII)"
|
||||
raise EmailSyntaxError(f"The email address is too long {reason}.")
|
||||
|
||||
# In addition, check that the UTF-8 encoding (i.e. not IDNA ASCII and not
|
||||
# Unicode characters) is at most 254 octets. If the addres is transmitted using
|
||||
# SMTPUTF8, then the length limit probably applies to the UTF-8 encoded octets.
|
||||
# If the email address has an ASCII form that differs from its internationalized
|
||||
# form, I don't think the internationalized form can be longer, and so the ASCII
|
||||
# form length check would be sufficient. If there is no ASCII form, then we have
|
||||
# to check the UTF-8 encoding. The UTF-8 encoding could be up to about four times
|
||||
# longer than the number of characters.
|
||||
#
|
||||
# See the length checks on the local part and the domain.
|
||||
if len(addrinfo.normalized.encode("utf8")) > EMAIL_MAX_LENGTH:
|
||||
if len(addrinfo.normalized) > EMAIL_MAX_LENGTH:
|
||||
# If there are more than 254 characters, then the UTF-8
|
||||
# encoding is definitely going to be too long.
|
||||
reason = get_length_reason(addrinfo.normalized, utf8=True)
|
||||
else:
|
||||
reason = "(when encoded in bytes)"
|
||||
raise EmailSyntaxError(f"The email address is too long {reason}.")
|
||||
|
||||
|
||||
def validate_email_domain_literal(domain_literal):
|
||||
# This is obscure domain-literal syntax. Parse it and return
|
||||
# a compressed/normalized address.
|
||||
# RFC 5321 4.1.3 and RFC 5322 3.4.1.
|
||||
|
||||
# Try to parse the domain literal as an IPv4 address.
|
||||
# There is no tag for IPv4 addresses, so we can never
|
||||
# be sure if the user intends an IPv4 address.
|
||||
if re.match(r"^[0-9\.]+$", domain_literal):
|
||||
try:
|
||||
addr = ipaddress.IPv4Address(domain_literal)
|
||||
except ValueError as e:
|
||||
raise EmailSyntaxError(f"The address in brackets after the @-sign is not valid: It is not an IPv4 address ({e}) or is missing an address literal tag.")
|
||||
|
||||
# Return the IPv4Address object and the domain back unchanged.
|
||||
return {
|
||||
"domain_address": addr,
|
||||
"domain": f"[{addr}]",
|
||||
}
|
||||
|
||||
# If it begins with "IPv6:" it's an IPv6 address.
|
||||
if domain_literal.startswith("IPv6:"):
|
||||
try:
|
||||
addr = ipaddress.IPv6Address(domain_literal[5:])
|
||||
except ValueError as e:
|
||||
raise EmailSyntaxError(f"The IPv6 address in brackets after the @-sign is not valid ({e}).")
|
||||
|
||||
# Return the IPv6Address object and construct a normalized
|
||||
# domain literal.
|
||||
return {
|
||||
"domain_address": addr,
|
||||
"domain": f"[IPv6:{addr.compressed}]",
|
||||
}
|
||||
|
||||
# Nothing else is valid.
|
||||
|
||||
if ":" not in domain_literal:
|
||||
raise EmailSyntaxError("The part after the @-sign in brackets is not an IPv4 address and has no address literal tag.")
|
||||
|
||||
# The tag (the part before the colon) has character restrictions,
|
||||
# but since it must come from a registry of tags (in which only "IPv6" is defined),
|
||||
# there's no need to check the syntax of the tag. See RFC 5321 4.1.2.
|
||||
|
||||
# Check for permitted ASCII characters. This actually doesn't matter
|
||||
# since there will be an exception after anyway.
|
||||
bad_chars = set(
|
||||
safe_character_display(c)
|
||||
for c in domain_literal
|
||||
if not DOMAIN_LITERAL_CHARS.match(c)
|
||||
)
|
||||
if bad_chars:
|
||||
raise EmailSyntaxError("The part after the @-sign contains invalid characters in brackets: " + ", ".join(sorted(bad_chars)) + ".")
|
||||
|
||||
# There are no other domain literal tags.
|
||||
# https://www.iana.org/assignments/address-literal-tags/address-literal-tags.xhtml
|
||||
raise EmailSyntaxError("The part after the @-sign contains an invalid address literal tag in brackets.")
|
||||
@@ -0,0 +1,146 @@
|
||||
from typing import Optional, Union
|
||||
|
||||
from .exceptions_types import EmailSyntaxError, ValidatedEmail
|
||||
from .syntax import split_email, validate_email_local_part, validate_email_domain_name, validate_email_domain_literal, validate_email_length
|
||||
from .rfc_constants import CASE_INSENSITIVE_MAILBOX_NAMES
|
||||
|
||||
|
||||
def validate_email(
|
||||
email: Union[str, bytes],
|
||||
/, # prior arguments are positional-only
|
||||
*, # subsequent arguments are keyword-only
|
||||
allow_smtputf8: Optional[bool] = None,
|
||||
allow_empty_local: bool = False,
|
||||
allow_quoted_local: Optional[bool] = None,
|
||||
allow_domain_literal: Optional[bool] = None,
|
||||
check_deliverability: Optional[bool] = None,
|
||||
test_environment: Optional[bool] = None,
|
||||
globally_deliverable: Optional[bool] = None,
|
||||
timeout: Optional[int] = None,
|
||||
dns_resolver: Optional[object] = None
|
||||
) -> ValidatedEmail:
|
||||
"""
|
||||
Given an email address, and some options, returns a ValidatedEmail instance
|
||||
with information about the address if it is valid or, if the address is not
|
||||
valid, raises an EmailNotValidError. This is the main function of the module.
|
||||
"""
|
||||
|
||||
# Fill in default values of arguments.
|
||||
from . import ALLOW_SMTPUTF8, ALLOW_QUOTED_LOCAL, ALLOW_DOMAIN_LITERAL, \
|
||||
GLOBALLY_DELIVERABLE, CHECK_DELIVERABILITY, TEST_ENVIRONMENT, DEFAULT_TIMEOUT
|
||||
if allow_smtputf8 is None:
|
||||
allow_smtputf8 = ALLOW_SMTPUTF8
|
||||
if allow_quoted_local is None:
|
||||
allow_quoted_local = ALLOW_QUOTED_LOCAL
|
||||
if allow_domain_literal is None:
|
||||
allow_domain_literal = ALLOW_DOMAIN_LITERAL
|
||||
if check_deliverability is None:
|
||||
check_deliverability = CHECK_DELIVERABILITY
|
||||
if test_environment is None:
|
||||
test_environment = TEST_ENVIRONMENT
|
||||
if globally_deliverable is None:
|
||||
globally_deliverable = GLOBALLY_DELIVERABLE
|
||||
if timeout is None and dns_resolver is None:
|
||||
timeout = DEFAULT_TIMEOUT
|
||||
|
||||
# Allow email to be a str or bytes instance. If bytes,
|
||||
# it must be ASCII because that's how the bytes work
|
||||
# on the wire with SMTP.
|
||||
if not isinstance(email, str):
|
||||
try:
|
||||
email = email.decode("ascii")
|
||||
except ValueError:
|
||||
raise EmailSyntaxError("The email address is not valid ASCII.")
|
||||
|
||||
# Split the address into the local part (before the @-sign)
|
||||
# and the domain part (after the @-sign). Normally, there
|
||||
# is only one @-sign. But the awkward "quoted string" local
|
||||
# part form (RFC 5321 4.1.2) allows @-signs in the local
|
||||
# part if the local part is quoted.
|
||||
local_part, domain_part, is_quoted_local_part \
|
||||
= split_email(email)
|
||||
|
||||
# Collect return values in this instance.
|
||||
ret = ValidatedEmail()
|
||||
ret.original = email
|
||||
|
||||
# Validate the email address's local part syntax and get a normalized form.
|
||||
# If the original address was quoted and the decoded local part is a valid
|
||||
# unquoted local part, then we'll get back a normalized (unescaped) local
|
||||
# part.
|
||||
local_part_info = validate_email_local_part(local_part,
|
||||
allow_smtputf8=allow_smtputf8,
|
||||
allow_empty_local=allow_empty_local,
|
||||
quoted_local_part=is_quoted_local_part)
|
||||
ret.local_part = local_part_info["local_part"]
|
||||
ret.ascii_local_part = local_part_info["ascii_local_part"]
|
||||
ret.smtputf8 = local_part_info["smtputf8"]
|
||||
|
||||
# If a quoted local part isn't allowed but is present, now raise an exception.
|
||||
# This is done after any exceptions raised by validate_email_local_part so
|
||||
# that mandatory checks have highest precedence.
|
||||
if is_quoted_local_part and not allow_quoted_local:
|
||||
raise EmailSyntaxError("Quoting the part before the @-sign is not allowed here.")
|
||||
|
||||
# Some local parts are required to be case-insensitive, so we should normalize
|
||||
# to lowercase.
|
||||
# RFC 2142
|
||||
if ret.ascii_local_part is not None \
|
||||
and ret.ascii_local_part.lower() in CASE_INSENSITIVE_MAILBOX_NAMES \
|
||||
and ret.local_part is not None:
|
||||
ret.ascii_local_part = ret.ascii_local_part.lower()
|
||||
ret.local_part = ret.local_part.lower()
|
||||
|
||||
# Validate the email address's domain part syntax and get a normalized form.
|
||||
is_domain_literal = False
|
||||
if len(domain_part) == 0:
|
||||
raise EmailSyntaxError("There must be something after the @-sign.")
|
||||
|
||||
elif domain_part.startswith("[") and domain_part.endswith("]"):
|
||||
# Parse the address in the domain literal and get back a normalized domain.
|
||||
domain_part_info = validate_email_domain_literal(domain_part[1:-1])
|
||||
if not allow_domain_literal:
|
||||
raise EmailSyntaxError("A bracketed IP address after the @-sign is not allowed here.")
|
||||
ret.domain = domain_part_info["domain"]
|
||||
ret.ascii_domain = domain_part_info["domain"] # Domain literals are always ASCII.
|
||||
ret.domain_address = domain_part_info["domain_address"]
|
||||
is_domain_literal = True # Prevent deliverability checks.
|
||||
|
||||
else:
|
||||
# Check the syntax of the domain and get back a normalized
|
||||
# internationalized and ASCII form.
|
||||
domain_part_info = validate_email_domain_name(domain_part, test_environment=test_environment, globally_deliverable=globally_deliverable)
|
||||
ret.domain = domain_part_info["domain"]
|
||||
ret.ascii_domain = domain_part_info["ascii_domain"]
|
||||
|
||||
# Construct the complete normalized form.
|
||||
ret.normalized = ret.local_part + "@" + ret.domain
|
||||
|
||||
# If the email address has an ASCII form, add it.
|
||||
if not ret.smtputf8:
|
||||
if not ret.ascii_domain:
|
||||
raise Exception("Missing ASCII domain.")
|
||||
ret.ascii_email = (ret.ascii_local_part or "") + "@" + ret.ascii_domain
|
||||
else:
|
||||
ret.ascii_email = None
|
||||
|
||||
# Check the length of the address.
|
||||
validate_email_length(ret)
|
||||
|
||||
if check_deliverability and not test_environment:
|
||||
# Validate the email address's deliverability using DNS
|
||||
# and update the returned ValidatedEmail object with metadata.
|
||||
|
||||
if is_domain_literal:
|
||||
# There is nothing to check --- skip deliverability checks.
|
||||
return ret
|
||||
|
||||
# Lazy load `deliverability` as it is slow to import (due to dns.resolver)
|
||||
from .deliverability import validate_email_deliverability
|
||||
deliverability_info = validate_email_deliverability(
|
||||
ret.ascii_domain, ret.domain, timeout, dns_resolver
|
||||
)
|
||||
for key, value in deliverability_info.items():
|
||||
setattr(ret, key, value)
|
||||
|
||||
return ret
|
||||
@@ -0,0 +1 @@
|
||||
__version__ = "2.1.0"
|
||||
Reference in New Issue
Block a user