alexgisby / imgur-album-downloader

Python script/class to download an entire Imgur album in one go into a folder of your choice.
MIT License
340 stars 59 forks source link

Download its image twice, i giving the corrected in comment #37

Closed boywigh closed 5 years ago

boywigh commented 7 years ago

!/usr/bin/env python3

encoding: utf-8

""" imguralbum.py - Download a whole imgur album in one go.

Provides both a class and a command line utility in a single script to download Imgur albums.

MIT License Copyright Alex Gisby alex@solution10.com """

import sys import re import urllib.request, urllib.parse, urllib.error import os import math from collections import Counter

help_message = """ Quickly and easily download an album from Imgur.

Format: $ python imguralbum.py [album URL] [destination folder]

Example: $ python imguralbum.py http://imgur.com/a/uOOju#6 /Users/alex/images

If you omit the dest folder name, the utility will create one with the same name as the album (for example for http://imgur.com/a/uOOju it'll create uOOju/ in the cwd) """

class ImgurAlbumException(Exception): def init(self, msg=False): self.msg = msg

class ImgurAlbumDownloader: def init(self, album_url): """ Constructor. Pass in the album_url that you want to download. """ self.album_url = album_url

    # Callback members:
    self.image_callbacks = []
    self.complete_callbacks = []

    # Check the URL is actually imgur:
    match = re.match("(https?)\:\/\/(www\.)?(?:m\.)?imgur\.com/(a|gallery)/([a-zA-Z0-9]+)(#[0-9]+)?", album_url)
    if not match:
        raise ImgurAlbumException("URL must be a valid Imgur Album")

    self.protocol = match.group(1)
    self.album_key = match.group(4)

    # Read the no-script version of the page for all the images:
    fullListURL = "http://imgur.com/a/" + self.album_key + "/layout/blog"

    try:
        self.response = urllib.request.urlopen(url=fullListURL)
        response_code = self.response.getcode()
    except Exception as e:
        self.response = False
        response_code = e.code

    if not self.response or self.response.getcode() != 200:
        raise ImgurAlbumException("Error reading Imgur: Error Code %d" % response_code)

    # Read in the images now so we can get stats and stuff:
    html = self.response.read().decode('utf-8')
    self.imageIDst = re.findall('.*?{"hash":"([a-zA-Z0-9]+)".*?"ext":"(\.[a-zA-Z0-9]+)".*?', html)
    self.imageIDs=list(set(self.imageIDst))

    self.cnt = Counter()
    for i in self.imageIDs:
        self.cnt[i[1]] += 1

def num_images(self):
    """
    Returns the number of images that are present in this album.
    """
    return len(self.imageIDs)

def list_extensions(self):
    """
    Returns list with occurrences of extensions in descending order.
    """  
    return self.cnt.most_common()

def album_key(self):
    """
    Returns the key of this album. Helpful if you plan on generating your own
    folder names.
    """
    return self.album_key

def on_image_download(self, callback):
    """
    Allows you to bind a function that will be called just before an image is
    about to be downloaded. You'll be given the 1-indexed position of the image, it's URL
    and it's destination file in the callback like so:
        my_awesome_callback(1, "http://i.imgur.com/fGWX0.jpg", "~/Downloads/1-fGWX0.jpg")
    """
    self.image_callbacks.append(callback)

def on_complete(self, callback):
    """
    Allows you to bind onto the end of the process, displaying any lovely messages
    to your users, or carrying on with the rest of the program. Whichever.
    """
    self.complete_callbacks.append(callback)

def save_images(self, foldername=False):
    """
    Saves the images from the album into a folder given by foldername.
    If no foldername is given, it'll use the cwd and the album key.
    And if the folder doesn't exist, it'll try and create it.
    """
    # Try and create the album folder:
    if foldername:
        albumFolder = foldername
    else:
        albumFolder = self.album_key

    if not os.path.exists(albumFolder):
        os.makedirs(albumFolder)

    # And finally loop through and save the images:
    for (counter, image) in enumerate(self.imageIDs, start=1):
        image_url = "http://i.imgur.com/"+image[0]+image[1]

        prefix = "%0*d-" % (
            int(math.ceil(math.log(len(self.imageIDs) + 1, 10))),
            counter
        )
        path = os.path.join(albumFolder, prefix + image[0] + image[1])

        # Run the callbacks:
        for fn in self.image_callbacks:
            fn(counter, image_url, path)

        # Actually download the thing
        if os.path.isfile(path):
            print ("Skipping, already exists.")
        else:
            try:
                urllib.request.urlretrieve(image_url, path)
            except:
                print ("Download failed.")
                os.remove(path)

    # Run the complete callbacks:
    for fn in self.complete_callbacks:
        fn()

if name == 'main': args = sys.argv

if len(args) == 1:
    # Print out the help message and exit:
    print (help_message)
    exit()

try:
    # Fire up the class:
    downloader = ImgurAlbumDownloader(args[1])

    print(("Found {0} images in album".format(downloader.num_images())))

    for i in downloader.list_extensions():
        print(("Found {0} files with {1} extension".format(i[1],i[0])))

    # Called when an image is about to download:
    def print_image_progress(index, url, dest):
        print(("Downloading Image %d" % index))
        print(("    %s >> %s" % (url, dest)))
    downloader.on_image_download(print_image_progress)

    # Called when the downloads are all done.
    def all_done():
        print ("")
        print ("Done!")
    downloader.on_complete(all_done)

    # Work out if we have a foldername or not:
    if len(args) == 3:
        albumFolder = args[2]
    else:
        albumFolder = False

    # Enough talk, let's save!
    downloader.save_images(albumFolder)
    exit()

except ImgurAlbumException as e:
    print(("Error: " + e.msg))
    print ("")
    print ("How to use")
    print ("=============")
    print (help_message)
    exit(1)