Closed simonw closed 8 months ago
Could solve it with: https://playwright.dev/python/docs/network#http-authentication
context = browser.new_context(
http_credentials={"username": "bill", "password": "pa55w0rd"}
)
page = context.new_page()
page.goto("https://example.com")
Via:
I can't see a way to tell page.goto()
not to throw a ERR_INVALID_AUTH_CREDENTIALS
error if it encounters an authentication needed page, so I don't think I can get shot-scraper auth
to work. I'll need to add new options instead.
Got this working:
diff --git a/shot_scraper/cli.py b/shot_scraper/cli.py
index faf45e5..22c91b7 100644
--- a/shot_scraper/cli.py
+++ b/shot_scraper/cli.py
@@ -65,6 +65,12 @@ def bypass_csp_option(fn):
return fn
+def http_auth_options(fn):
+ click.option("--auth-username", help="Username for HTTP Basic authentication")(fn)
+ click.option("--auth-password", help="Password for HTTP Basic authentication")(fn)
+ return fn
+
+
def skip_or_fail(response, skip, fail):
if skip and fail:
raise click.ClickException("--skip and --fail cannot be used together")
@@ -201,6 +207,7 @@ def cli():
@skip_fail_options
@bypass_csp_option
@silent_option
+@http_auth_options
def shot(
url,
auth,
@@ -230,6 +237,8 @@ def shot(
fail,
bypass_csp,
silent,
+ auth_username,
+ auth_password,
):
"""
Take a single screenshot of a page or portion of a page.
@@ -291,6 +300,8 @@ def shot(
timeout=timeout,
reduced_motion=reduced_motion,
bypass_csp=bypass_csp,
+ auth_username=auth_username,
+ auth_password=auth_password,
)
if interactive or devtools:
use_existing_page = True
@@ -341,6 +352,8 @@ def _browser_context(
timeout=None,
reduced_motion=False,
bypass_csp=False,
+ auth_username=None,
+ auth_password=None,
):
browser_kwargs = dict(headless=not interactive, devtools=devtools)
if browser == "chromium":
@@ -363,6 +376,11 @@ def _browser_context(
context_args["user_agent"] = user_agent
if bypass_csp:
context_args["bypass_csp"] = bypass_csp
+ if auth_username and auth_password:
+ context_args["http_credentials"] = {
+ "username": auth_username,
+ "password": auth_password,
+ }
context = browser_obj.new_context(**context_args)
if timeout:
context.set_default_timeout(timeout)
Then:
shot-scraper https://datasette-auth-passwords-http-basic-demo.datasette.io/ \
--auth-username root \
--auth-password 'password!'
Which produced:
Need to add it to:
multi
pdf
accessibility
html
javascript
Got this working:
shot-scraper javascript https://datasette-auth-passwords-http-basic-demo.datasette.io/ \
--auth-username root \
--auth-password 'password!' \
'document.title'
And this:
echo "- url: https://datasette-auth-passwords-http-basic-demo.datasette.io/\n output: /tmp/out.png" | \
shot-scraper multi - --auth-username root --auth-password 'password!'
And:
shot-scraper pdf https://datasette-auth-passwords-http-basic-demo.datasette.io/ \
--auth-username root \
--auth-password 'password!'
And:
shot-scraper accessibility https://datasette-auth-passwords-http-basic-demo.datasette.io/ \
--auth-username root \
--auth-password 'password!'
Output starts:
{
"role": "WebArea",
"name": "datasette-auth-passwords HTTP Basic auth demo: _internal, public",
"children": [
{
"role": "link",
"name": "home"
},
{
"role": "DisclosureTriangle",
"name": "Menu"
},
{
"role": "text",
"name": "Root"
},
{
"role": "heading",
"name": "datasette-auth-passwords HTTP Basic auth demo",
"level": 1
},
{
"role": "link",
"name": "_internal"
}
And:
shot-scraper html https://datasette-auth-passwords-http-basic-demo.datasette.io/ \
--auth-username root \
--auth-password 'password!'
Works beautifully, thank you very much! And since I typically delete toots after a few weeks, I'll add a screenshot of the toot you refer to above here for posterity. :-)
Idea suggested here: https://mastodon.social/@jpmens/111879657240670040
I thought this would work with
shot-scraper auth
but...Produces: