omnilib / aiomultiprocess

Take a modern Python codebase to the next level of performance.
https://aiomultiprocess.omnilib.dev
MIT License
1.77k stars 101 forks source link

TypeError: cannot pickle 'TaskStepMethWrapper' object #208

Open allrobot opened 5 months ago

allrobot commented 5 months ago

Description

run code, it report:

C:\ProgramData\anaconda3\envs\python310\python.exe C:\Users\Administrator\Personal_scripts\pythonProject\temp.py 
Traceback (most recent call last):
  File "C:\ProgramData\anaconda3\envs\python310\lib\multiprocessing\queues.py", line 245, in _feed
    obj = _ForkingPickler.dumps(obj)
  File "C:\ProgramData\anaconda3\envs\python310\lib\multiprocessing\reduction.py", line 51, in dumps
    cls(buf, protocol).dump(obj)
TypeError: cannot pickle 'TaskStepMethWrapper' object

code:

from urllib.parse import unquote,urlparse
import aiohttp,aiomultiprocess,asyncio,os
from bs4 import BeautifulSoup

main_url = "http://am.adianshi.com:6805"

download_dict = {}
# save_path = os.path.join(os.getcwd(), "download")
save_path = r"D:\BaiduNetdiskDownload\开卡教程"

async def get_detail_url(base_url: str, html: str) -> list[str]:
    soup = BeautifulSoup(html, "lxml")
    file_hrefs = soup.select("div div li[class='item file'] a[href]")
    curlink = os.path.join(save_path, urlparse(unquote(base_url)).path[1:].replace('/', '\\'))
    download_dict[curlink] = []
    for file_href in file_hrefs:
        link = "http://am.adianshi.com:6805" + unquote(file_href['href'])
        download_dict[curlink].append(link)
    folder_hrefs = soup.select("div div li[class='item folder'] a[href]")

    item_folder = []
    for folder_href in folder_hrefs:
        link = "http://am.adianshi.com:6805" + folder_href['href']
        item_folder.append(link)

    # print(item_folder)
    return item_folder

async def fetch_detail(url: str, aiohttp_session: aiohttp.ClientSession, pool: aiomultiprocess.Pool):
    async with aiohttp_session.get(url) as response:
        html = await response.text()
        item_folder = await get_detail_url(url, html)

async def aiomultiprocess_main():
    async with aiohttp.ClientSession() as aiohttp_session:
        async with aiomultiprocess.Pool() as pool:
            task = pool.apply(fetch_detail, args=(main_url, aiohttp_session, pool))
            await task
            print(download_dict)

if __name__ == "__main__":
    asyncio.run(aiomultiprocess_main())

Details