upbit / pixivpy

Pixiv API for Python
https://pypi.org/project/PixivPy3/#files
The Unlicense
1.77k stars 149 forks source link

download all images from user #236

Open compwron opened 2 years ago

compwron commented 2 years ago

Here is some messy code which uses this library to download all images for a particular user.

# first run:
# pip install pixivpy-async
# pip install requests
# the token will eventually expire- to get a new one follow the doc at https://gist.github.com/ZipFile/c9ebedb224406f4f11845ab700124362
# note- to see NSFW art, log into your account and edit "Viewing restriction" https://www.pixiv.net/setting_user.php

TOKEN=""
ITER_LIMIT = 10
ILLUSTRATIONS_PAGE = 30

from queue import Empty
from pixivpy_async import *
import asyncio
from os.path import exists

def calc_next_url(current_user_id, current_offset):
   return f"https://app-api.pixiv.net/v1/user/illusts?user_id={current_user_id}&filter=for_ios&type=illust&offset={current_offset}"

async def download(aapi, illust):
    # if illust["x_restrict"] == 0:
    #    print("not restricted")
    #    return
    create_date = illust["create_date"][:10].replace("-","_")
    id = illust["id"]
    artist = f"{illust['user']['id']} {illust['user']['name']} {illust['user']['account']}"
    if len(illust.get("meta_single_page", {})):
        await aapi.download(illust["meta_single_page"]["original_image_url"], name=f"{create_date}_01")
        print(f"downloaded {artist} post {id} image 1")
    elif len(illust.get("meta_pages", [])):
        for index, page in enumerate(illust["meta_pages"]):
          await aapi.download(page["image_urls"]["original"], name=f"{create_date}_{index+1:02d}")
          print(f"downloaded {artist} post {id} image {index+1}")
    else:
        print(f"{id} already downloaded")

async def gettem(aapi, artist_id, current_offset, iter=0):
    print("Next page...")
    next_url = calc_next_url(artist_id, current_offset)
    print(next_url)
    await asyncio.sleep(30) # try to not get rate limited?
    next_qs = aapi.parse_qs(next_url)
    print(next_qs)
    json_result = await aapi.user_illusts(**next_qs)
    print("next url?", json_result.next_url, json_result["next_url"])
    if len(json_result["illusts"]) == 0:
        print(f"Rate limited? Sleeping... iter: {iter} of limit {ITER_LIMIT}")
        await asyncio.sleep(10)
        if iter > ITER_LIMIT:
            raise Exception(f"nothing in illusts: {json_result}")
        iter += 1
        gettem(aapi, artist_id, current_offset - ILLUSTRATIONS_PAGE, iter)
    for illust in json_result["illusts"]:
        await download(aapi, illust)

async def main():
    artist_id = 151689
    current_user_id = 275527
    current_offset = ILLUSTRATIONS_PAGE # pages are 30 items long
    async with PixivClient() as client:
        aapi = AppPixivAPI(client=client)
        await aapi.login(refresh_token=TOKEN)
        json_result = await aapi.user_illusts(artist_id)
        # print(json_result)
        for illust in json_result["illusts"]:
            await download(aapi, illust)
        print("next url?", json_result.next_url, json_result["next_url"])
        print(json_result["next_url"])
        while True: # continue until errorsplode
            print("still true")
            await gettem(aapi, artist_id, current_offset)
            current_offset += ILLUSTRATIONS_PAGE
asyncio.run(main())
eggplants commented 2 years ago

My example: https://github.com/eggplants/pixiv-bulk-downloader/blob/eaf30d6f65fc2a1db7452e0cefee1c544e19bebe/pbd/base.py#L32-L85

Xdynix commented 2 years ago

Not sure the purpose of this thread, but here is mine. It utilized tqdm to create a nice looking progress bar.

Code ```python import os from concurrent.futures import ThreadPoolExecutor from pathlib import Path import requests from tqdm import tqdm from pixivpy3 import AppPixivAPI USER_ID = '15919563' DOWNLOAD_DIR = Path(r'SOME-WHERE') REFRESH_TOKEN_FILE = Path(r'SOME-WHERE\refresh-token.txt') def auth_pixiv_api(api: AppPixivAPI, refresh_token_file: Path): with refresh_token_file.open('rt') as f: refresh_token = f.read().strip() api.auth(refresh_token=refresh_token) with refresh_token_file.open('wt') as f: print(api.refresh_token, file=f) def download(url: str, file: Path, headers=None, force=False): if file.exists() and not force: return with requests.get(url, headers=headers, stream=True) as response: response.raise_for_status() with tqdm( total=int(response.headers.get('Content-Length', 0)), desc=f'Download: {file.name}', unit='B', unit_scale=True, unit_divisor=1024, leave=False, ) as progress: file.parent.mkdir(exist_ok=True) with file.open('wb') as f: for chunk in response.iter_content(chunk_size=1024): if not chunk: continue f.write(chunk) progress.update(len(chunk)) def main(): api = AppPixivAPI() auth_pixiv_api(api, REFRESH_TOKEN_FILE) with ThreadPoolExecutor( max_workers=5, initializer=tqdm.set_lock, initargs=(tqdm.get_lock(),), ) as executor: qs = {'user_id': USER_ID} root = DOWNLOAD_DIR / USER_ID while qs: json_result = api.user_illusts(**qs) qs = api.parse_qs(json_result.next_url) for illust in json_result.illusts: if illust.type == 'ugoira': img_urls = [] # Skip ugoira elif illust.page_count == 1: img_urls = [illust.meta_single_page.original_image_url] else: img_urls = [ page.image_urls.original for page in illust.meta_pages ] for url in img_urls: executor.submit( download, url, root / os.path.basename(url), headers={'Referer': 'https://app-api.pixiv.net/'}, force=True, ) if __name__ == '__main__': main() ```

I used to have a complex crawler that can even convert ugoira to gif, but now I don't use it anymore, so I don't continue to maintain it.

eggplants commented 2 years ago

@compwron Did you want a method to "download all images from user", or did you want to know how to implement it with pixivpy?