KichangKim / DeepDanbooru

AI based multi-label girl image classification system, implemented by using TensorFlow.
MIT License
2.58k stars 258 forks source link

Vram usage too large #76

Open CrazyBoyM opened 1 year ago

CrazyBoyM commented 1 year ago

In my computer(A5000 card), it takes 19G VRAM. is it possible to reduce it? here is my code:

# from AUTOMATC1111
# maybe modified by Nyanko Lepsoni
# modified by crosstyan
import os.path
import re
import tempfile
import argparse
import glob
import zipfile
import deepdanbooru as dd
import tensorflow as tf
import numpy as np
import gradio as gr

from basicsr.utils.download_util import load_file_from_url
from PIL import Image
from tqdm import tqdm

re_special = re.compile(r"([\\()])")

def get_deepbooru_tags_model(model_path: str):
    if not os.path.exists(os.path.join(model_path, "project.json")):
        is_abs = os.path.isabs(model_path)
        if not is_abs:
            model_path = os.path.abspath(model_path)

        load_file_from_url(
            r"https://github.com/KichangKim/DeepDanbooru/releases/download/v3-20211112-sgd-e28/deepdanbooru-v3-20211112-sgd-e28.zip",
            model_path,
        )
        with zipfile.ZipFile(
            os.path.join(model_path, "deepdanbooru-v3-20211112-sgd-e28.zip"), "r"
        ) as zip_ref:
            zip_ref.extractall(model_path)
        os.remove(os.path.join(model_path, "deepdanbooru-v3-20211112-sgd-e28.zip"))

    tags = dd.project.load_tags_from_project(model_path)
    model = dd.project.load_model_from_project(model_path, compile_model=False)
    return model, tags

def get_deepbooru_tags_from_model(
    model,
    tags,
    pil_image,
    threshold,
    alpha_sort=False,
    use_spaces=True,
    use_escape=True,
    include_ranks=False,
):
    width = model.input_shape[2]
    height = model.input_shape[1]
    image = np.array(pil_image)
    image = tf.image.resize(
        image,
        size=(height, width),
        method=tf.image.ResizeMethod.AREA,
        preserve_aspect_ratio=True,
    )
    image = image.numpy()  # EagerTensor to np.array
    image = dd.image.transform_and_pad_image(image, width, height)
    image = image / 255.0
    image_shape = image.shape
    image = image.reshape((1, image_shape[0], image_shape[1], image_shape[2]))

    y = model.predict(image)[0]

    result_dict = {}

    for i, tag in enumerate(tags):
        result_dict[tag] = y[i]

    unsorted_tags_in_theshold = []
    result_tags_print = []
    for tag in tags:
        if result_dict[tag] >= threshold:
            if tag.startswith("rating:"):
                continue
            unsorted_tags_in_theshold.append((result_dict[tag], tag))
            result_tags_print.append(f"{result_dict[tag]} {tag}")

    # sort tags
    result_tags_out = []
    sort_ndx = 0
    if alpha_sort:
        sort_ndx = 1

    # sort by reverse by likelihood and normal for alpha, and format tag text as requested
    unsorted_tags_in_theshold.sort(key=lambda y: y[sort_ndx], reverse=(not alpha_sort))
    for weight, tag in unsorted_tags_in_theshold:
        tag_outformat = tag
        if use_spaces:
            tag_outformat = tag_outformat.replace("_", " ")
        if use_escape:
            tag_outformat = re.sub(re_special, r"\\\1", tag_outformat)
        if include_ranks:
            tag_outformat = f"({tag_outformat}:{weight:.3f})"

        result_tags_out.append(tag_outformat)

    # print("\n".join(sorted(result_tags_print, reverse=True)))

    return ", ".join(result_tags_out)

script_path = os.path.realpath(__file__)
model_path = os.path.join(os.path.dirname(script_path), "deepdanbooru-models")
model, tags = get_deepbooru_tags_model(model_path)
def image2prompt(image):
    prompt = get_deepbooru_tags_from_model(
        model,
        tags,
        image,
        0.75,
        alpha_sort=False,
        use_spaces=True,
        use_escape=True,
        include_ranks=False,
    )
    return prompt

if __name__ == "__main__":
    # gradio上传一张图像并读取
    image = gr.inputs.Image(label="image") 
    # gradio输出一串标签
    output = gr.outputs.Textbox(label="Prompt Text")

    # gradio界面
    interface = gr.Interface(fn=image2prompt, inputs=image, outputs=output, title="DeepDanbooru Prompt Generator", description="Upload an image to generate a prompt")
    interface.launch(server_port=6006)
KichangKim commented 1 year ago

Unfortunately, its model size is fixed. But I think 19GB is too big. In my environment, it takes only 1~2GB when estimating (NVIDIA Geforce 3070).

CrazyBoyM commented 1 year ago

Unfortunately, its model size is fixed. But I think 19GB is too big. In my environment, it takes only 1~2GB when estimating (NVIDIA Geforce 3070).

thanks for reply.Maybe it was caused by my code.

toriato commented 1 year ago

Tensorflow maps all VRAMs at startup by default. This could be the cause of the problem.

https://github.com/toriato/stable-diffusion-webui-wd14-tagger/blob/aa51c991b9bdf4d1a956f195aa642194222dc424/tagger/interrogator.py#L17-L21