oxinabox / DataDeps.jl

reproducible data setup for reproducible science
Other
150 stars 43 forks source link

ERROR: HTTP.ExceptionRequest.StatusError #140

Closed briochemc closed 3 years ago

briochemc commented 3 years ago

I have been hitting some HTTP errors when trying to download datasets with DataDeps recently (I noticed it via tests on the AIBECS package). Not sure this is a DataDeps.jl issue, but this MWE taken from your DataDeps blog-post, throws:

julia> register(DataDep("FastText en",
           """
           Dataset: FastText Word Embeddings for English.
           Author: Bojanowski et. al. (Facebook)
           License: CC-SA 3.0
           Website: https://github.com/facebookresearch/fastText/blob/master/pretrained-vectors.md

           300 dimentional FastText word embeddings, trained on Wikipedia
           Citation: P. Bojanowski*, E. Grave*, A. Joulin, T. Mikolov, Enriching Word Vectors with Subword Information

           Notice: this file is over 6.2GB
           """,
           "https://s3-us-west-1.amazonaws.com/fasttext-vectors/wiki.en.vec",
           "ba5420ac217fb34f15f58ded0d911a4370dfb1f3341fa7511a49ae74c87de282"
       ));

julia> readdir(datadep"FastText en")
This program has requested access to the data dependency FastText en.
which is not currently installed. It can be installed automatically, and you will not see this message again.

Dataset: FastText Word Embeddings for English.
Author: Bojanowski et. al. (Facebook)
License: CC-SA 3.0
Website: https://github.com/facebookresearch/fastText/blob/master/pretrained-vectors.md

300 dimentional FastText word embeddings, trained on Wikipedia
Citation: P. Bojanowski*, E. Grave*, A. Joulin, T. Mikolov, Enriching Word Vectors with Subword Information

Notice: this file is over 6.2GB

Do you want to download the dataset from https://s3-us-west-1.amazonaws.com/fasttext-vectors/wiki.en.vec to "/Users/benoitpasquier/.julia/datadeps/FastText en"?
[y/n]
y
┌ Info: Downloading
│   source = "https://s3-us-west-1.amazonaws.com/fasttext-vectors/wiki.en.vec"
│   dest = "/Users/benoitpasquier/.julia/datadeps/FastText en/wiki.en.vec"
│   progress = NaN
│   time_taken = "0.07 s"
│   time_remaining = "NaN s"
│   average_speed = "3.343 KiB/s"
│   downloaded = "243 bytes"
│   remaining = "∞ B"
└   total = "∞ B"
┌ Info: Downloading
│   source = "https://s3-us-west-1.amazonaws.com/fasttext-vectors/wiki.en.vec"
│   dest = "/Users/benoitpasquier/.julia/datadeps/FastText en/wiki.en.vec"
│   progress = NaN
│   time_taken = "0.0 s"
│   time_remaining = "NaN s"
│   average_speed = "∞ B/s"
│   downloaded = "243 bytes"
│   remaining = "∞ B"
└   total = "∞ B"
┌ Info: Downloading
│   source = "https://s3-us-west-1.amazonaws.com/fasttext-vectors/wiki.en.vec"
│   dest = "/Users/benoitpasquier/.julia/datadeps/FastText en/wiki.en.vec"
│   progress = NaN
│   time_taken = "0.0 s"
│   time_remaining = "NaN s"
│   average_speed = "∞ B/s"
│   downloaded = "243 bytes"
│   remaining = "∞ B"
└   total = "∞ B"
┌ Info: Downloading
│   source = "https://s3-us-west-1.amazonaws.com/fasttext-vectors/wiki.en.vec"
│   dest = "/Users/benoitpasquier/.julia/datadeps/FastText en/wiki.en.vec"
│   progress = NaN
│   time_taken = "0.0 s"
│   time_remaining = "NaN s"
│   average_speed = "∞ B/s"
│   downloaded = "243 bytes"
│   remaining = "∞ B"
└   total = "∞ B"
┌ Info: Downloading
│   source = "https://s3-us-west-1.amazonaws.com/fasttext-vectors/wiki.en.vec"
│   dest = "/Users/benoitpasquier/.julia/datadeps/FastText en/wiki.en.vec"
│   progress = NaN
│   time_taken = "0.0 s"
│   time_remaining = "NaN s"
│   average_speed = "∞ B/s"
│   downloaded = "243 bytes"
│   remaining = "∞ B"
└   total = "∞ B"
ERROR: HTTP.ExceptionRequest.StatusError(403, "GET", "/fasttext-vectors/wiki.en.vec", HTTP.Messages.Response:
"""
HTTP/1.1 403 Forbidden
x-amz-request-id: 68F00KSBS5HQ1CMT
x-amz-id-2: sy+mHTgoZjkycB5llAhjAoHmEszNjWVEq3u0cYReZJHC/x2h/hN2/D0X+p0FW8Niv2T+pJUx6uk=
Content-Type: application/xml
Transfer-Encoding: chunked
Date: Wed, 07 Apr 2021 06:32:42 GMT
Server: AmazonS3

""")
Stacktrace:
  [1] request(::Type{HTTP.ExceptionRequest.ExceptionLayer{HTTP.ConnectionRequest.ConnectionPoolLayer{HTTP.StreamRequest.StreamLayer{Union{}}}}}, ::URIs.URI, ::Vararg{Any, N} where N; kw::Base.Iterators.Pairs{Symbol, Any, Tuple{Symbol, Symbol}, NamedTuple{(:iofunction, :reached_redirect_limit), Tuple{HTTP.var"#23#29"{Float32, String, String, HTTP.var"#format_bytes_per_second#28"{HTTP.var"#format_bytes#26"}, HTTP.var"#format_seconds#27", HTTP.var"#format_bytes#26", HTTP.var"#format_progress#25"}, Bool}}})
    @ HTTP.ExceptionRequest ~/.julia/packages/HTTP/cxgat/src/ExceptionRequest.jl:22
  [2] (::Base.var"#70#72"{Base.var"#70#71#73"{ExponentialBackOff, HTTP.RetryRequest.var"#2#3"{Bool, HTTP.Messages.Request}, typeof(HTTP.request)}})(::Type, ::Vararg{Any, N} where N; kwargs::Base.Iterators.Pairs{Symbol, Any, Tuple{Symbol, Symbol}, NamedTuple{(:iofunction, :reached_redirect_limit), Tuple{HTTP.var"#23#29"{Float32, String, String, HTTP.var"#format_bytes_per_second#28"{HTTP.var"#format_bytes#26"}, HTTP.var"#format_seconds#27", HTTP.var"#format_bytes#26", HTTP.var"#format_progress#25"}, Bool}}})
    @ Base ./error.jl:301
  [3] #request#1
    @ ~/.julia/packages/HTTP/cxgat/src/RetryRequest.jl:44 [inlined]
  [4] request(::Type{HTTP.MessageRequest.MessageLayer{HTTP.RetryRequest.RetryLayer{HTTP.ExceptionRequest.ExceptionLayer{HTTP.ConnectionRequest.ConnectionPoolLayer{HTTP.StreamRequest.StreamLayer{Union{}}}}}}}, method::String, url::URIs.URI, headers::Vector{Pair{SubString{String}, SubString{String}}}, body::Nothing; http_version::VersionNumber, target::String, parent::Nothing, iofunction::Function, kw::Base.Iterators.Pairs{Symbol, Bool, Tuple{Symbol}, NamedTuple{(:reached_redirect_limit,), Tuple{Bool}}})
    @ HTTP.MessageRequest ~/.julia/packages/HTTP/cxgat/src/MessageRequest.jl:66
  [5] request(::Type{HTTP.BasicAuthRequest.BasicAuthLayer{HTTP.MessageRequest.MessageLayer{HTTP.RetryRequest.RetryLayer{HTTP.ExceptionRequest.ExceptionLayer{HTTP.ConnectionRequest.ConnectionPoolLayer{HTTP.StreamRequest.StreamLayer{Union{}}}}}}}}, method::String, url::URIs.URI, headers::Vector{Pair{SubString{String}, SubString{String}}}, body::Nothing; kw::Base.Iterators.Pairs{Symbol, Any, Tuple{Symbol, Symbol}, NamedTuple{(:reached_redirect_limit, :iofunction), Tuple{Bool, HTTP.var"#23#29"{Float32, String, String, HTTP.var"#format_bytes_per_second#28"{HTTP.var"#format_bytes#26"}, HTTP.var"#format_seconds#27", HTTP.var"#format_bytes#26", HTTP.var"#format_progress#25"}}}})
    @ HTTP.BasicAuthRequest ~/.julia/packages/HTTP/cxgat/src/BasicAuthRequest.jl:28
  [6] request(::Type{HTTP.RedirectRequest.RedirectLayer{HTTP.BasicAuthRequest.BasicAuthLayer{HTTP.MessageRequest.MessageLayer{HTTP.RetryRequest.RetryLayer{HTTP.ExceptionRequest.ExceptionLayer{HTTP.ConnectionRequest.ConnectionPoolLayer{HTTP.StreamRequest.StreamLayer{Union{}}}}}}}}}, method::String, url::URIs.URI, headers::Vector{Pair{SubString{String}, SubString{String}}}, body::Nothing; redirect_limit::Int64, forwardheaders::Bool, kw::Base.Iterators.Pairs{Symbol, HTTP.var"#23#29"{Float32, String, String, HTTP.var"#format_bytes_per_second#28"{HTTP.var"#format_bytes#26"}, HTTP.var"#format_seconds#27", HTTP.var"#format_bytes#26", HTTP.var"#format_progress#25"}, Tuple{Symbol}, NamedTuple{(:iofunction,), Tuple{HTTP.var"#23#29"{Float32, String, String, HTTP.var"#format_bytes_per_second#28"{HTTP.var"#format_bytes#26"}, HTTP.var"#format_seconds#27", HTTP.var"#format_bytes#26", HTTP.var"#format_progress#25"}}}})
    @ HTTP.RedirectRequest ~/.julia/packages/HTTP/cxgat/src/RedirectRequest.jl:24
  [7] request(method::String, url::String, h::Vector{Pair{SubString{String}, SubString{String}}}, b::Nothing; headers::Vector{Pair{SubString{String}, SubString{String}}}, body::Nothing, query::Nothing, kw::Base.Iterators.Pairs{Symbol, HTTP.var"#23#29"{Float32, String, String, HTTP.var"#format_bytes_per_second#28"{HTTP.var"#format_bytes#26"}, HTTP.var"#format_seconds#27", HTTP.var"#format_bytes#26", HTTP.var"#format_progress#25"}, Tuple{Symbol}, NamedTuple{(:iofunction,), Tuple{HTTP.var"#23#29"{Float32, String, String, HTTP.var"#format_bytes_per_second#28"{HTTP.var"#format_bytes#26"}, HTTP.var"#format_seconds#27", HTTP.var"#format_bytes#26", HTTP.var"#format_progress#25"}}}})
    @ HTTP ~/.julia/packages/HTTP/cxgat/src/HTTP.jl:315
  [8] #open#7
    @ ~/.julia/packages/HTTP/cxgat/src/HTTP.jl:349 [inlined]
  [9] open
    @ ~/.julia/packages/HTTP/cxgat/src/HTTP.jl:349 [inlined]
 [10] #download#22
    @ ~/.julia/packages/HTTP/cxgat/src/download.jl:101 [inlined]
 [11] #fetch_http#26
    @ ~/.julia/packages/DataDeps/ooWXe/src/fetch_helpers.jl:80 [inlined]
 [12] fetch_http(remotepath::String, localdir::String)
    @ DataDeps ~/.julia/packages/DataDeps/ooWXe/src/fetch_helpers.jl:79
 [13] fetch_default(remotepath::String, localdir::String)
    @ DataDeps ~/.julia/packages/DataDeps/ooWXe/src/fetch_helpers.jl:33
 [14] run_fetch
    @ ~/.julia/packages/DataDeps/ooWXe/src/resolution_automatic.jl:99 [inlined]
 [15] download(datadep::DataDep{String, String, typeof(DataDeps.fetch_default), typeof(identity)}, localdir::String; remotepath::String, i_accept_the_terms_of_use::Nothing, skip_checksum::Bool)
    @ DataDeps ~/.julia/packages/DataDeps/ooWXe/src/resolution_automatic.jl:78
 [16] download
    @ ~/.julia/packages/DataDeps/ooWXe/src/resolution_automatic.jl:70 [inlined]
 [17] handle_missing
    @ ~/.julia/packages/DataDeps/ooWXe/src/resolution_automatic.jl:10 [inlined]
 [18] _resolve(datadep::DataDep{String, String, typeof(DataDeps.fetch_default), typeof(identity)}, calling_filepath::String)
    @ DataDeps ~/.julia/packages/DataDeps/ooWXe/src/resolution.jl:83
 [19] resolve(datadep::DataDep{String, String, typeof(DataDeps.fetch_default), typeof(identity)}, inner_filepath::String, calling_filepath::String)
    @ DataDeps ~/.julia/packages/DataDeps/ooWXe/src/resolution.jl:29
 [20] resolve(datadep_name::String, inner_filepath::String, calling_filepath::String)
    @ DataDeps ~/.julia/packages/DataDeps/ooWXe/src/resolution.jl:54
 [21] resolve(namepath::String, calling_filepath::String)
    @ DataDeps ~/.julia/packages/DataDeps/ooWXe/src/resolution.jl:73
 [22] top-level scope
    @ REPL[7]:1

Would you mind taking a look?

briochemc commented 3 years ago

OK as indicated by Fredrik Ekre on Slack, the URL gives a 403 and that's the issue. Sorry for the noise!