sloria / environs

simplified environment variable parsing
MIT License
1.21k stars 85 forks source link

Cannot validate URLs missing the user field component #326

Closed ddennerline3 closed 9 months ago

ddennerline3 commented 9 months ago

The test code below shows an unconfirmed bug in the marshmallow library if the user field is missing (but password is present). I have reported the issue to the marshmallow team. In case anyone else encounters this issue, hopefully they can save some time. Maybe this issue can remain open until the marshmallow bug is fixed.

The code below contains a workaround and use urlparse() call for validation.

# Internal imports
import logging
import os
from urllib.parse import urlparse, ParseResult

# External imports
from environs import Env, EnvValidationError
from marshmallow import Schema, fields, ValidationError
import pytest

# Own imports

logger = logging.getLogger(__name__)

# this unit test is here to demonstrate a bug I found in the environs package. T

def test_env_url2_happy(mocker):
    """Test a new URL parser that avoid marshmallow"""

    mocker.patch.dict(
        os.environ,
        {
            "REDIS_URL": "rediss://:pass1234@master.host-a-1.host-b.host-c.host-d.amazonaws.com:6379"
        },
    )
    env = Env()
    env.read_env()  # read .env file, if it exists

    @env.parser_for("url2")
    def url2_parser(value) -> ParseResult:
        return urlparse(value)

    url_new = env.url2("REDIS_URL")
    assert url_new.scheme == "rediss"
    assert url_new.password == "pass1234"

def test_env_url_unhappy(mocker):
    """Test the original URL validation error"""
    mocker.patch.dict(
        os.environ,
        {
            "REDIS_URL": "rediss://:pass1234@master.host-a-1.host-b.host-c.host-d.amazonaws.com:6379"
        },
    )
    env = Env()
    env.read_env()  # read .env file, if it exists

    with pytest.raises(EnvValidationError):
        env.url("REDIS_URL")

def test_marshmallow_long_url():
    """Test the URL validation that is called by environs"""

    # this is the regular expression that is failing in the marshmallow code
    validate_url_regex = r"^(?:[a-z0-9\.\-\+]*)://(?:[^:@]+?(:[^:@]*?)?@|)(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+(?:[A-Z]{2,6}\.?|[A-Z0-9-]{2,}\.?)|localhost|\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}|\[[A-F0-9]*:[A-F0-9:]+\]|(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.?))(?::\d+)?(?:/?|[/?]\S+)\Z"
    url_invalid = "rediss://:abcDEF!H-IJKLMN.OpqrStUVwxyZ1234@master.host-a-1.host-b.host-c.host-d.amazonaws.com:6379"

    class UrlTest(Schema):
        my_url = fields.Url(schemes=["redis", "rediss"], require_tld=False)

    with pytest.raises(ValidationError):
        UrlTest().load({"my_url": url_invalid})

def test_marshmallow_url():
    """Test marshmallow validating a URL without a username"""

    # When tracing through the code, this the regex that was built to validate URL
    url_happy = "rediss://bill:pass1234@www.amazonaws.com:6379"

    class UrlTest(Schema):
        my_url = fields.Url(schemes=["redis", "rediss"], require_tld=False)

    url_test = UrlTest().load({"my_url": url_happy})
    assert "bill:pass1234" in url_test["my_url"]

    with pytest.raises(ValidationError):
        url_unhappy = "rediss://:pass1234@www.amazonaws.com:6379"
        UrlTest().load({"my_url": url_unhappy})

    # See if urlparse can validate URL
    parse_result = urlparse(url_unhappy)
    assert not parse_result.username
    assert parse_result.password == "pass1234"
sloria commented 9 months ago

thanks for reporting! further discussion can happen in the marshmallow issue. closing this for now