plotly / orca

Command line application for generating static images of interactive plotly charts
MIT License
292 stars 40 forks source link

Orca proceses don't close when using Plotlys "write_image()" functionality to store static images from subprocess #351

Open Amuoeba opened 3 years ago

Amuoeba commented 3 years ago

In my program I pass a pandas dataframes to a multiprocessing.Pool of workers. Each worker then creates a plotly figure and stores it on disk via fig.write_image("img_name").

If I use the orca engine fig.write_image(name,engine="orca") for storing images the subproceses created by the orca engine never close and after some time I start getting error same as this issue.

I have foudn out that by using the kaleido engine this issue is not present. Below is the code that reproduces the problem. To see how many proceses are still left running after main python process finishes open htop in terminal and press F4 and type orca to filter the proceses.

Does anyone know is this is a known issue of orca or am I doing something wrong in my multiprocessing code.

Orca version: plotly-orca 1.3.1

# General imports
import multiprocessing
import os
import time
import psutil
import pandas as pd
import random
import plotly.express as px
import plotly
import queue
# Project specific imports

# Imports from internal libraries

# Typing imports
from typing import TYPE_CHECKING

# if TYPE_CHECKING:

def rand_data_gen(size):
    df = pd.DataFrame(
        dict([(x, [random.random() for _ in range(size)]) for x in range(size)])
    )
    return df

def work_2(qe):
    item = qe.get(True)
    qdf = item[0]
    name = item[1]
    print(os.getpid(), "got", name)
    fig = px.imshow(qdf)
    fig.write_image(name)
    return True

def worker_main(qe):
    print(os.getpid(), "working")
    while True:
        item = qe.get(True)
        qdf = item[0]
        name = item[1]
        print(os.getpid(), "got", name)

        fig = px.imshow(qdf)
        fig.write_image(name,engine="orca")

if __name__ == '__main__':
    print(f'Running {__file__}')
    print(f"Script dir:  {os.path.dirname(os.path.abspath(__file__))}")
    print(f"Working dir: {os.path.abspath(os.getcwd())}")
    print(f"System memory: {psutil.virtual_memory()}")

    save_path = "test_images_2/"

    end_evt = multiprocessing.Event()
    the_queue = multiprocessing.Queue()

    the_pool = multiprocessing.Pool(5, worker_main, (the_queue,))
    # the_pool = multiprocessing.Pool(5, work_2, (the_queue,))
    i = 0
    for i in range(100):
        df = rand_data_gen(100)
        i += 1
        the_queue.put((df, f"{save_path}image{i}.png"))
        print(the_queue.qsize())

    count_sleep = 0

    while not the_queue.empty():
        count_sleep += 1
        print(f"\rWaiting for que to be empty: {count_sleep}", end="")
        time.sleep(1)

    end_evt.set()