From b94f13cba85e5d72e2f41c5f6c222af63ea7e9b9 Mon Sep 17 00:00:00 2001 From: caedenph Date: Sat, 29 Jul 2023 22:44:43 +0100 Subject: [PATCH] feat(strings): Create is_valid_email_address algorithm --- strings/is_valid_email_address.py | 62 ++++++++++++++++++++++++++++--- 1 file changed, 56 insertions(+), 6 deletions(-) diff --git a/strings/is_valid_email_address.py b/strings/is_valid_email_address.py index 8ce960760..f7aa9002e 100644 --- a/strings/is_valid_email_address.py +++ b/strings/is_valid_email_address.py @@ -4,6 +4,8 @@ Implements an is valid email address algorithm @ https://en.wikipedia.org/wiki/Email_address """ +import re +import string email_tests: tuple[tuple[str, bool], ...] = ( ("simple@example.com", True), @@ -16,16 +18,17 @@ email_tests: tuple[tuple[str, bool], ...] = ( ("example-indeed@strange-example.com", True), ("test/test@test.com", True), ( - "1234567890123456789012345678901234567890123456789012345678901234567890@example.com", + "123456789012345678901234567890123456789012345678901234567890123@example.com", True, ), ("admin@mailserver1", True), ("example@s.example", True), ("Abc.example.com", False), ("A@b@c@example.com", False), + ("abc@example..com", False), ("a(c)d,e:f;gi[j\\k]l@example.com", False), ( - "12345678901234567890123456789012345678901234567890123456789012345678901@example.com", + "12345678901234567890123456789012345678901234567890123456789012345@example.com", False, ), ("i.like.underscores@but_its_not_allowed_in_this_part", False), @@ -45,19 +48,65 @@ def is_valid_email_address(email: str) -> bool: is associated with a display-name. For example, "john.smith" The domain is stricter than the local part and follows the @ symbol. + Global email checks: + 1. There can only be one @ symbol in the email address. Technically if the + @ symbol is quoted in the local-part, then it is valid, however this + implementation ignores "" for now. + (See https://en.wikipedia.org/wiki/Email_address#:~:text=If%20quoted,) + 2. The local-part and the domain are limited to a certain number of octets. With + unicode storing a single character in one byte, each octet is equivalent to + a character. Hence, we can just check the length of the string. + Checks for the local-part: + 3. The local-part may contain: upper and lowercase latin letters, digits 0 to 9, + and printable characters (!#$%&'*+-/=?^_`{|}~) + 4. The local-part may also contain a "." in any place that is not the first or + last character, and may not have more than one "." consecutively. + + Checks for the domain: + 5. The domain may contain: upper and lowercase latin letters and digits 0 to 9 + 6. Hyphen "-", provided that it is not the first or last character + 7. The domain may also contain a "." in any place that is not the first or + last character, and may not have more than one "." consecutively. + >>> for email, valid in email_tests: ... assert is_valid_email_address(email) is valid """ - # Make sure that there is only one @ symbol in the email address + # (1.) Make sure that there is only one @ symbol in the email address if email.count("@") != 1: return False local_part, domain = email.split("@") - # Check octet length of the local part and domain + # (2.) Check octet length of the local part and domain if len(local_part) > MAX_LOCAL_PART_OCTETS or len(domain) > MAX_DOMAIN_OCTETS: return False + # (3.) Validate the characters in the local-part + if any( + char not in string.ascii_letters + string.digits + ".(!#$%&'*+-/=?^_`{|}~)" + for char in local_part + ): + return False + + # (4.) Validate the placement of "." characters + if ( + local_part.startswith(".") + or local_part.endswith(".") + or re.search(r"\.\.+", local_part) + ): + return False + + # (5.) Validate the characters in the domain + if any(char not in string.ascii_letters + string.digits + ".-" for char in domain): + return False + + # (6.) Validate the placement of "-" characters + if domain.startswith("-") or domain.endswith("."): + return False + + # (7.) Validate the placement of "." characters + if domain.startswith(".") or domain.endswith(".") or re.search(r"\.\.+", domain): + return False return True @@ -66,6 +115,7 @@ if __name__ == "__main__": doctest.testmod() - for email, _ in email_tests: + for email, valid in email_tests: is_valid = is_valid_email_address(email) - print(f"Email address {email} is {'not' if is_valid is False else ''} valid") + assert is_valid == valid, f"{email} is {is_valid}" + print(f"Email address {email} is {'not ' if is_valid is False else ''}valid")