nathom / streamrip

A scriptable music downloader for Qobuz, Tidal, SoundCloud, and Deezer
GNU General Public License v3.0
2.71k stars 245 forks source link

[BUG] <Slow speed, download behaviour> #601

Closed GGromadzki closed 7 months ago

GGromadzki commented 9 months ago

Describe the bug

In versions >2.0 I have very slow download speed, about 10 MB/s (whole, not for track). Speeds are equally divided for all tracks...If I download 10 tracks at once I download them with speed about 1.1, it drops sometimes lower.... but all at once (see screen) Before I had 40-60. Also If I download few albums at once albums are downloaded parallel, not full album after finished album.

Command Used

rip url https://open.qobuz.com/album/a50f9yqub56ic https://open.qobuz.com/album/agj113lv19ezb

Debug Traceback

Normal command "rip url ...."

Config File

[downloads]
# Folder where tracks are downloaded to
folder = "F:\\Album"
# Put Qobuz albums in a 'Qobuz' folder, Tidal albums in 'Tidal' etc.
source_subdirectories = false

# Download (and convert) tracks all at once, instead of sequentially. 
# If you are converting the tracks, or have fast internet, this will 
# substantially improve processing speed.
concurrency = true
# The maximum number of tracks to download at once
# If you have very fast internet, you will benefit from a higher value,
# A value that is too high for your bandwidth may cause slowdowns
# Set to -1 for no limit
max_connections = 10
# Max number of API requests per source to handle per minute
# Set to -1 for no limit
requests_per_minute = 120

[qobuz]
# 1: 320kbps MP3, 2: 16/44.1, 3: 24/<=96, 4: 24/>=96
quality = 3
# This will download booklet pdfs that are included with some albums
download_booklets = true

# Authenticate to Qobuz using auth token? Value can be true/false only
use_auth_token = false
# Enter your userid if the above use_auth_token is set to true, else enter your email
email_or_userid = "fgbfgbgfbfgbf"
# Enter your auth token if the above use_auth_token is set to true, else enter the md5 hash of your plaintext password
password_or_token = "fgbfgbfgbfgb"
# Do not change
app_id = "bgfbfgb"
# Do not change
secrets = ["fgbfgbfgb", "fb44tb4b4b4"]

[tidal]
# 0: 256kbps AAC, 1: 320kbps AAC, 2: 16/44.1 "HiFi" FLAC, 3: 24/44.1 "MQA" FLAC
quality = 3
# This will download videos included in Video Albums.
download_videos = true

# Do not change any of the fields below
user_id = ""
country_code = ""
access_token = ""
refresh_token = ""
# Tokens last 1 week after refresh. This is the Unix timestamp of the expiration
# time. If you haven't used streamrip in more than a week, you may have to log
# in again using `rip config --tidal`
token_expiry = ""

[deezer]
# 0, 1, or 2
# This only applies to paid Deezer subscriptions. Those using deezloader
# are automatically limited to quality = 1
quality = 2
# An authentication cookie that allows streamrip to use your Deezer account
# See https://github.com/nathom/streamrip/wiki/Finding-Your-Deezer-ARL-Cookie
# for instructions on how to find this
arl = ""
# This allows for free 320kbps MP3 downloads from Deezer
# If an arl is provided, deezloader is never used
use_deezloader = true
# This warns you when the paid deezer account is not logged in and rip falls
# back to deezloader, which is unreliable
deezloader_warnings = true

[soundcloud]
# Only 0 is available for now
quality = 0
# This changes periodically, so it needs to be updated
client_id = ""
app_version = ""

[youtube]
# Only 0 is available for now
quality = 0
# Download the video along with the audio
download_videos = false
# The path to download the videos to
video_downloads_folder = "C:\\Users\\usssser\\StreamripDownloads\\YouTubeVideos"

[database]
# Create a database that contains all the track IDs downloaded so far
# Any time a track logged in the database is requested, it is skipped
# This can be disabled temporarily with the --no-db flag
downloads_enabled = true
# Path to the downloads database 
downloads_path = "C:\\Users\\usssser\\AppData\\Roaming\\streamrip\\downloads.db"
# If a download fails, the item ID is stored here. Then, `rip repair` can be
# called to retry the downloads
failed_downloads_enabled = true
failed_downloads_path = "C:\\Users\\usssser\\AppData\\Roaming\\streamrip\\failed_downloads.db"

# Convert tracks to a codec after downloading them.
[conversion]
enabled = false
# FLAC, ALAC, OPUS, MP3, VORBIS, or AAC
codec = "ALAC"
# In Hz. Tracks are downsampled if their sampling rate is greater than this. 
# Value of 48000 is recommended to maximize quality and minimize space
sampling_rate = 48000
# Only 16 and 24 are available. It is only applied when the bit depth is higher
# than this value.
bit_depth = 24
# Only applicable for lossy codecs
lossy_bitrate = 320

# Filter a Qobuz artist's discography. Set to 'true' to turn on a filter.
# This will also be applied to other sources, but is not guaranteed to work correctly
[qobuz_filters]
# Remove Collectors Editions, live recordings, etc.
extras = false
# Picks the highest quality out of albums with identical titles.
repeats = false
# Remove EPs and Singles
non_albums = false
# Remove albums whose artist is not the one requested
features = false
# Skip non studio albums
non_studio_albums = false
# Only download remastered albums
non_remaster = false

[artwork]
# Write the image to the audio file
embed = false
# The size of the artwork to embed. Options: thumbnail, small, large, original.
# "original" images can be up to 30MB, and may fail embedding. 
# Using "large" is recommended.
embed_size = "large"
# If this is set to a value > 0, max(width, height) of the embedded art will be set to this value in pixels
# Proportions of the image will remain the same
embed_max_width = -1
# Save the cover image at the highest quality as a seperate jpg file
save_artwork = true
# If this is set to a value > 0, max(width, height) of the saved art will be set to this value in pixels
# Proportions of the image will remain the same
saved_max_width = -1

[metadata]
# Sets the value of the 'ALBUM' field in the metadata to the playlist's name. 
# This is useful if your music library software organizes tracks based on album name.
set_playlist_to_album = true
# If part of a playlist, sets the `tracknumber` field in the metadata to the track's 
# position in the playlist instead of its position in its album
renumber_playlist_tracks = true
# The following metadata tags won't be applied
# See https://github.com/nathom/streamrip/wiki/Metadata-Tag-Names for more info
exclude = []

# Changes the folder and file names generated by streamrip.
[filepaths]
# Create folders for single tracks within the downloads directory using the folder_format
# template
add_singles_to_folder = false
# Available keys: "albumartist", "title", "year", "bit_depth", "sampling_rate",
# "id", and "albumcomposer"
folder_format = "{albumartist}/[{year}] {title}"
# Available keys: "tracknumber", "artist", "albumartist", "composer", "title",
# and "albumcomposer", "explicit"
track_format = "{tracknumber}. {title}"
# Only allow printable ASCII characters in filenames.
restrict_characters = false
# Truncate the filename if it is greater than this number of characters
# Setting this to false may cause downloads to fail on some systems
truncate_to = 120

# Last.fm playlists are downloaded by searching for the titles of the tracks
[lastfm]
# The source on which to search for the tracks.
source = "qobuz"
# If no results were found with the primary source, the item is searched for 
# on this one.
fallback_source = ""

[cli]
# Print "Downloading {Album name}" etc. to screen
text_output = true
# Show resolve, download progress bars
progress_bars = true
# The maximum number of search results to show in the interactive menu
max_search_results = 100

[misc]
# Metadata to identify this config file. Do not change.
version = "2.0.3"
# Print a message if a new version of streamrip is available 
check_for_updates = true

Operating System

Windows 11

streamrip version

2.0.4

Screenshots and recordings

obraz

Additional context

No response

Luisfelipeberchielli commented 9 months ago

Same problem here.

nathom commented 9 months ago

Might be a cpu issue. How does cpu usage look while it's running?

nathom commented 9 months ago

Also see if lowering the maximum concurrent downloads value changes the numbers.

GGromadzki commented 9 months ago

At 5: obraz At 7: obraz At 14: obraz At 14 + CPU (18 tracks album) - messed order, just no 1-2-3-4-5-.... so it is somehow random with few albums at the same time...like few tracks from album 1st, few from 2nd, few from 3rd... obraz

GGromadzki commented 9 months ago

After updating to 2.0.5 - album with 6 tracks: obraz after downloading 3 tracks of 6: obraz

After finishing of download (happens always, since 2.0.x

Exception ignored in: <function _ProactorBasePipeTransport.del at 0x0000025790E216C0> Traceback (most recent call last): File "C:\Users\Grzegorz_PC\AppData\Local\Programs\Python\Python310\lib\asyncio\proactor_events.py", line 116, in del self.close() File "C:\Users\Grzegorz_PC\AppData\Local\Programs\Python\Python310\lib\asyncio\proactor_events.py", line 108, in close self._loop.call_soon(self._call_connection_lost, None) File "C:\Users\Grzegorz_PC\AppData\Local\Programs\Python\Python310\lib\asyncio\base_events.py", line 745, in call_soon self._check_closed() File "C:\Users\Grzegorz_PC\AppData\Local\Programs\Python\Python310\lib\asyncio\base_events.py", line 510, in _check_closed raise RuntimeError('Event loop is closed') RuntimeError: Event loop is closed Exception ignored in: <function _ProactorBasePipeTransport.del at 0x0000025790E216C0> Traceback (most recent call last): File "C:\Users\Grzegorz_PC\AppData\Local\Programs\Python\Python310\lib\asyncio\proactor_events.py", line 116, in del self.close() File "C:\Users\Grzegorz_PC\AppData\Local\Programs\Python\Python310\lib\asyncio\proactor_events.py", line 108, in close self._loop.call_soon(self._call_connection_lost, None) File "C:\Users\Grzegorz_PC\AppData\Local\Programs\Python\Python310\lib\asyncio\base_events.py", line 745, in call_soon self._check_closed() File "C:\Users\Grzegorz_PC\AppData\Local\Programs\Python\Python310\lib\asyncio\base_events.py", line 510, in _check_closed raise RuntimeError('Event loop is closed') RuntimeError: Event loop is closed Exception ignored in: <function _ProactorBasePipeTransport.del at 0x0000025790E216C0> Traceback (most recent call last): File "C:\Users\Grzegorz_PC\AppData\Local\Programs\Python\Python310\lib\asyncio\proactor_events.py", line 116, in del self.close() File "C:\Users\Grzegorz_PC\AppData\Local\Programs\Python\Python310\lib\asyncio\proactor_events.py", line 108, in close self._loop.call_soon(self._call_connection_lost, None) File "C:\Users\Grzegorz_PC\AppData\Local\Programs\Python\Python310\lib\asyncio\base_events.py", line 745, in call_soon self._check_closed() File "C:\Users\Grzegorz_PC\AppData\Local\Programs\Python\Python310\lib\asyncio\base_events.py", line 510, in _check_closed raise RuntimeError('Event loop is closed') RuntimeError: Event loop is closed Exception ignored in: <function _ProactorBasePipeTransport.del at 0x0000025790E216C0> Traceback (most recent call last): File "C:\Users\Grzegorz_PC\AppData\Local\Programs\Python\Python310\lib\asyncio\proactor_events.py", line 116, in del self.close() File "C:\Users\Grzegorz_PC\AppData\Local\Programs\Python\Python310\lib\asyncio\proactor_events.py", line 108, in close self._loop.call_soon(self._call_connection_lost, None) File "C:\Users\Grzegorz_PC\AppData\Local\Programs\Python\Python310\lib\asyncio\base_events.py", line 745, in call_soon self._check_closed() File "C:\Users\Grzegorz_PC\AppData\Local\Programs\Python\Python310\lib\asyncio\base_events.py", line 510, in _check_closed raise RuntimeError('Event loop is closed') RuntimeError: Event loop is closed Exception ignored in: <function _ProactorBasePipeTransport.del at 0x0000025790E216C0> Traceback (most recent call last): File "C:\Users\Grzegorz_PC\AppData\Local\Programs\Python\Python310\lib\asyncio\proactor_events.py", line 116, in del self.close() File "C:\Users\Grzegorz_PC\AppData\Local\Programs\Python\Python310\lib\asyncio\proactor_events.py", line 108, in close self._loop.call_soon(self._call_connection_lost, None) File "C:\Users\Grzegorz_PC\AppData\Local\Programs\Python\Python310\lib\asyncio\base_events.py", line 745, in call_soon self._check_closed() File "C:\Users\Grzegorz_PC\AppData\Local\Programs\Python\Python310\lib\asyncio\base_events.py", line 510, in _check_closed raise RuntimeError('Event loop is closed') RuntimeError: Event loop is closed Exception ignored in: <function _ProactorBasePipeTransport.del at 0x0000025790E216C0> Traceback (most recent call last): File "C:\Users\Grzegorz_PC\AppData\Local\Programs\Python\Python310\lib\asyncio\proactor_events.py", line 116, in del self.close() File "C:\Users\Grzegorz_PC\AppData\Local\Programs\Python\Python310\lib\asyncio\proactor_events.py", line 108, in close self._loop.call_soon(self._call_connection_lost, None) File "C:\Users\Grzegorz_PC\AppData\Local\Programs\Python\Python310\lib\asyncio\base_events.py", line 745, in call_soon self._check_closed() File "C:\Users\Grzegorz_PC\AppData\Local\Programs\Python\Python310\lib\asyncio\base_events.py", line 510, in _check_closed raise RuntimeError('Event loop is closed') RuntimeError: Event loop is closed

GGromadzki commented 9 months ago

EDIT: after upgrading (a lot of hussle due to system PATH) from 3.10 to 3.12 python above errors gone. Download speed is still capped at about about 10-11 Mb/s (all downloads concurrent) obraz

NighthawkUK commented 9 months ago

@GGromadzki @nathom I think this is a Qobuz thing, i too had slower downloads however speeds were improved greatly by using Deezer URLs.

GGromadzki commented 9 months ago

Nope, it is not a Qobuz. My speeds with tidal: obraz

Other app for tidal: obraz

endervad commented 9 months ago

I've just upgraded to 2.0.5, and I can confirm that the speeds became much lower. My bandwidth is 100 Mbit/s, and before it would be evenly split across all current tracks. Now it's capped at like 10-30 Mbit/s total. This happens for me on Debian, a separate home server laptop that has no heavy tasks. The CPU load during download is 150% out of 800. Downloading from Qobuz, though as another user has already mentioned, it probably doesn't matter. I don't have other services to try out.

nathom commented 9 months ago

@NighthawkUK @GGromadzki Can you try upgrading to python 3.12 and installing streamrip on there? Seems like thread context switching is the issue and the newer version might be of help.

GGromadzki commented 9 months ago

I mentioned it a few days ago - I already have 3.12.1. Now I dont get messages after finishing download queue. Still speeds are capped. I will try it on weekend on other CPU that is "pythonless" right now. obraz

crankedguy commented 8 months ago

Same here, capped to 10-12 MB/s, do 6 concurrents you have 6x2, do 4 you have 4x3, switching to 3.12 is not even possible because you do not get even results lists displayed for a search, had to switch to 3.11 for that, see here (did not do further analysis I sadly have no time for that right now), see also here https://github.com/nathom/streamrip/issues/632

nathom commented 8 months ago

@crankedguy Can you try the branch for #648 and see if it improves qobuz downloads on your machine?

crankedguy commented 8 months ago

should i specifically try for qobuz downloads? because this affects all (at least tidal) downloads

crankedguy commented 8 months ago

@nathom my line is not completely free right now and I also cannot cancel the process that is using it for a few hours but what I can tell you is that this version seems considerably faster and was using around 250 to 300Mbit with a 6 concurrent DL I quickly tried out on Qobuz, This is approx. what was available on the line at that time., so this is looking way better on a first look I rather not test Tidal right now because I do not want to interfere with my other process

nathom commented 8 months ago

@crankedguy Can you try experimenting with chunk_size and yield_every variables and see the effect on download speeds? My internet is not fast enough to notice a difference.

crankedguy commented 8 months ago

yea I can see what I can do, but not today because in order for that testing to be effective I need an idle line

crankedguy commented 8 months ago

btw. do you expect a considerable effect by changing them from what you have implemented now? because I told you it seemed to use the full available bandwidth already. but I will have a look at it again as said.

the thing is that I have to download a bigger playlist and have to restart it again and again because I fix one error until it "excepts or asserts" me out caused by the next, this is very cumbersome to say it nicely :) did you ever think of having bigger tasks running when you designed that way of errorhandling? I mean the app quits on like every occasion possible (partly on purpose, partly due to implementation bugs) where in a lot of cases it doesn't have to.

why are you doing that? this is ok if you download the occasional file, but not for playlists, not for artist downloads, not for labels (which you removed, either on error or on purpose I don't know) or anything that is more than 1 track or 1 album.,..honestly not even for an album.

nathom commented 8 months ago

@crankedguy If you can point out where the errors are occurring I can take a look. It is supposed to gracefully handle exceptions and not quit the app.

crankedguy commented 8 months ago

Sorry to say, but I did not encounter a single graceful handling of any error, honestly, I don't want to joke you. OK if you want to take a look or fix the app :

Tidal :

  1. there are albums where the copyright is empty, returns none, you literally assert the user out and quit hard, change that to :
    
    _copyright = typed(resp.get("copyright"), str)

change that to (on both occasions) in order to work:

_copyright = None if resp.get("copyright"): _copyright = typed(resp.get("copyright"), str)


2. if an album has a single non-streamable file you except the user out and quit hard, instead of leaving the file be and download the rest. So the user has to single-download all other files because he otherwise can not download the album content. Very bad UX

3. _api_request : you overwrite the limit parameter hardcoded with 100, the user can call search with 1000 (max allowed on tidal is 300 I think, 300 works at least), it does not matter it is overwritten. Change that to :

if params is None: params = {} params["limit"] = 100


4. lastfm playlist :
I do not know for what use cases this "algorithm" should or can work. Maybe one can download some charts tracks with that but if you leave that line this cannot lead to any results. I mean come on, some parts of the application are quite sophisticated, but you literally conduct a search with title and artist, and in the case it finds "something" you take the first entry of the result list and this is your track then?
I tell you what happens in real world with that. I have a playlist with over 900 tracks of electronic music (where every other track is a "mix" or a "remix". With your "algo" on tidal (which has every single track of the playlist available and downloadable) I got a hitrate of 1%. One. If I used qobuz as a fallback I got 100% hitrate, with 900 completely wrong tracks (of course). I implemented a real algo behind that and get a hitrate of 93% on tidal alone, leaving qobuz out of the equation. I won't write a PR because this is partly specific and would include refactoring I have no time for. But it is based on a non-specific algorithm to search and find actually a correct track., and not ANY track. If you are interested I can outline the algo for you, if not that is also ok with me of course.

5. Playlist parsing : There are cases where last.fm does not know a track, or rather has no link to the track, which has the effect that the trackname on the website is not a href but a span and therefore renders your regex useless,. It then messes up the whole tracklist because one href is missing and therefore from the next title on the title gets the artist and the artist becomes the title. If you use an algo to look for a correct track actually this goes south of course. Change that to in order to work :
`title_tags = re.compile(r'(?:<a\s+href="[^"]+"\s+|<span\s+)title="([^"]+)"')`

6. On Windows here it does not even work with Python 3.12, it shows no menu, just a short blink, I had no time investigate that further because right now I need my downloads done.

7. There are strange async issues time and again, at least on Windows, with Semaphore timeout period expired. Here I also did not get the exact reason when and why that happens until now.

And in general as said, I did not encounter a single graceful error since using it. Graceful would mean for me to skip a file, log a warning or an error and go on with whatever you are doing (except for real severe errors of course). This is not happening, no matter what happens. Even a single 500 server error in one request in a playlistof 100s of files is no reason to quit the application and having to do the whole thing again (except it persists of course). It is partly not really thought out in my opinion. No offense of course, it is your application. But for getting bigger work done efficiently there are quite some things to be done imho and a bit more careful testing wiould also be beneficial. E.g. the copyright thing happens qutite regularly.
Now I can give hints if wished but I have really not the intention to write PRs for it as said or to file countless reports 1 for 1. I have to have an "own" version anyways because I am doing rather specific things like testing if a file exists instead of using this db etc.

Cheers
crankedguy commented 8 months ago

Take this album as an example, the first track is not available, it should just be skipped when downloading the album, otherwise you have to copy 14 links to a file or download them one by one. This is not to be considered efficient. And not available/GEO restricted files happen quite often, especially in Qobuz, I already did a fix for this in my own 1.9 version because it happened in 100s of albums https://tidal.com/browse/album/137689414

crankedguy commented 8 months ago

re the branch and #648 : this only looked good on a first glance, I did some further and concurrent, bigger downloads now and this does not seem to be a reasonable approach, at least if you go by the download indicators, they are starting pretty early on to get real jaggy/jumpy with ups/downs in transfer speed (it is not my line as I compare with the other version which is constant stream-like). there are weird like pauses between the downloads here and the line is also not used by far, it seemed faster in total at first but something does not look right. I don't know how to explain it better how this looks. If you want to test me something in particular you can tell me, but I sadly don't have the time to get deeper into this problem myself, too. I have had enough the last days, sorry.

re the download process itself 1.9 seems to be working supposedly better and smoother overall

nathom commented 8 months ago

@crankedguy Yes the jaggedness is caused by the yield_every variable. Smaller values should make it smoother, but largest values might make it faster. I can't tell because my internet isn't fast enough.