wkpark / sd-webui-model-mixer

Checkpoint model mixer/merger extension
GNU Affero General Public License v3.0
105 stars 4 forks source link

[ENHANCEMENT] More Image Score Classifiers - A Guide #148

Open Koitenshin opened 5 months ago

Koitenshin commented 5 months ago

EDIT June 6th, 2024: I found a new one I recommend more than others, same install instructions. Make folder, copy init, make script, copy code into it. Code is is second comment.

If you want more image score classifiers, do the following:

1) Make 3 new folders under "sd-webui-model-mixer\sd_modelmixer\classifiers" and name them: artwork, shadow_v2, & shadow_v2_strict

2) Copy the "__init__.py" file from "classifiers" into each folder you just made.

3) Make an empty text file in each folder named similarly to said folder: score_artwork.py, score_shadow_v2.py, & score_shadow_v2_strict.py

Here's the code that goes in "score_artwork.py"

# based on https://github.com/WhiteWipe/sd-webui-bayesian-merger/blob/main/sd_webui_bayesian_merger/models/ShadowScore.py
import os
import safetensors
import torch

from huggingface_hub import hf_hub_download
from modules import devices
from PIL import Image
from transformers import pipeline, AutoConfig, AutoProcessor, ConvNextV2ForImageClassification

pathname = hf_hub_download(repo_id="Muinez/artwork-scorer", filename="model.safetensors")

statedict = safetensors.torch.load_file(pathname)

config = AutoConfig.from_pretrained(pretrained_model_name_or_path="Muinez/artwork-scorer")
model = ConvNextV2ForImageClassification.from_pretrained(pretrained_model_name_or_path=None, state_dict=statedict, config=config)
processor = AutoProcessor.from_pretrained(pretrained_model_name_or_path="Muinez/artwork-scorer")

def score(image, prompt="", use_cuda=True):
    if use_cuda:
        model.to("cuda")
    else:
        model.float()
        model.to("cpu")

    if isinstance(image, Image.Image):
        pil_image = image
    elif isinstance(image, str):
        if os.path.isfile(image):
            pil_image = Image.open(image)
    else:
        pil_image = image

    pipe = pipeline("image-classification", model=model, image_processor=processor, device="cpu" if not use_cuda else "cuda:0")

    score = pipe(images=[pil_image])[0]
    score = [p for p in score if p['label'] == 'score'][0]['score']

    if use_cuda:
        model.to("cpu")
    print(" > score =", score)

    devices.torch_gc()

    return score

Here's the code that goes in "score_shadow_v2.py"

# based on https://github.com/WhiteWipe/sd-webui-bayesian-merger/blob/main/sd_webui_bayesian_merger/models/ShadowScore.py
import os
import safetensors
import torch

from huggingface_hub import hf_hub_download
from modules import devices
from PIL import Image
from transformers import pipeline, AutoConfig, AutoProcessor, ViTForImageClassification

pathname = hf_hub_download(repo_id="shadowlilac/aesthetic-shadow-v2", filename="model.safetensors")

statedict = safetensors.torch.load_file(pathname)

config = AutoConfig.from_pretrained(pretrained_model_name_or_path="shadowlilac/aesthetic-shadow-v2")
model = ViTForImageClassification.from_pretrained(pretrained_model_name_or_path=None, state_dict=statedict, config=config)
processor = AutoProcessor.from_pretrained(pretrained_model_name_or_path="shadowlilac/aesthetic-shadow-v2")

def score(image, prompt="", use_cuda=True):
    if use_cuda:
        model.to("cuda")
    else:
        model.float()
        model.to("cpu")

    if isinstance(image, Image.Image):
        pil_image = image
    elif isinstance(image, str):
        if os.path.isfile(image):
            pil_image = Image.open(image)
    else:
        pil_image = image

    pipe = pipeline("image-classification", model=model, image_processor=processor, device="cpu" if not use_cuda else "cuda:0")

    score = pipe(images=[pil_image])[0]
    score = [p for p in score if p['label'] == 'hq'][0]['score']

    if use_cuda:
        model.to("cpu")
    print(" > score =", score)

    devices.torch_gc()

    return score

And here's the code that goes in "score_shadow_v2_strict.py"

# based on https://github.com/WhiteWipe/sd-webui-bayesian-merger/blob/main/sd_webui_bayesian_merger/models/ShadowScore.py
import os
import safetensors
import torch

from huggingface_hub import hf_hub_download
from modules import devices
from PIL import Image
from transformers import pipeline, AutoConfig, AutoProcessor, ViTForImageClassification

pathname = hf_hub_download(repo_id="shadowlilac/aesthetic-shadow-v2-strict", filename="model.safetensors")

statedict = safetensors.torch.load_file(pathname)

config = AutoConfig.from_pretrained(pretrained_model_name_or_path="shadowlilac/aesthetic-shadow-v2-strict")
model = ViTForImageClassification.from_pretrained(pretrained_model_name_or_path=None, state_dict=statedict, config=config)
processor = AutoProcessor.from_pretrained(pretrained_model_name_or_path="shadowlilac/aesthetic-shadow-v2-strict")

def score(image, prompt="", use_cuda=True):
    if use_cuda:
        model.to("cuda")
    else:
        model.float()
        model.to("cpu")

    if isinstance(image, Image.Image):
        pil_image = image
    elif isinstance(image, str):
        if os.path.isfile(image):
            pil_image = Image.open(image)
    else:
        pil_image = image

    pipe = pipeline("image-classification", model=model, image_processor=processor, device="cpu" if not use_cuda else "cuda:0")

    score = pipe(images=[pil_image])[0]
    score = [p for p in score if p['label'] == 'hq'][0]['score']

    if use_cuda:
        model.to("cpu")
    print(" > score =", score)

    devices.torch_gc()

    return score

None of them can compare to Image Reward but if you're having issues installing Image Reward like I was, these might help.

Koitenshin commented 5 months ago

TL;DR: After extensive testing I can only recommend:

  1. Image Reward (pain in the butt to install),
  2. score_laion_avs1 (the dataset is far bigger than score_laion-sac-logos-ava-v2),
  3. & score_imclasif-quality-v001 (the new one I am about to tell you about).

I managed to find a new one, that people may or may not like. I personally find it much better than Image Reward score wise, and no weird dependency hoops to jump through to get it installed.

I've tested it with my few best case, middle ground, and worst case images. Noisy images were given low scores; bad images were given low scores; photorealistic bad images were still scored a bit higher but not as high as Image Reward scores them.

I actually had to look up HuggingFace Hub's repo to find out how to get this one to work, as it kept corrupting the downloads or refusing to see the files. I wanted the "model.safetensors" that SFconvertbot issued a pull request for but hadn't been merged yet.

Follow the same instructions as above, but name the folder "imclasif-quality-v001" and the script inside as "score_imclasif-quality-v001.py"

You actually have two options with this scorer: 'good' is super strict and is exactly as I described above, good for making datasets for training. 'normal' gives relatively high scores to everything which means you'll need to use your own discernment more.

If you want the normal option, change 'good' in the script to 'normal'. Code is as follows:

# based on https://github.com/WhiteWipe/sd-webui-bayesian-merger/blob/main/sd_webui_bayesian_merger/models/ShadowScore.py
import os
import safetensors
import torch

from huggingface_hub import hf_hub_download
from modules import devices
from PIL import Image
from transformers import pipeline, AutoConfig, AutoProcessor, ViTForImageClassification

pathname = hf_hub_download(repo_id="sanali209/imclasif-quality-v001", filename="model.safetensors", revision="eaf21933abb930ac3390a77c2628151afca265f1")

statedict = safetensors.torch.load_file(pathname)

config = AutoConfig.from_pretrained(pretrained_model_name_or_path="sanali209/imclasif-quality-v001")
model = ViTForImageClassification.from_pretrained(pretrained_model_name_or_path=None, state_dict=statedict, config=config)
processor = AutoProcessor.from_pretrained(pretrained_model_name_or_path="sanali209/imclasif-quality-v001")

def score(image, prompt="", use_cuda=True):
    if use_cuda:
        model.to("cuda")
    else:
        model.float()
        model.to("cpu")

    if isinstance(image, Image.Image):
        pil_image = image
    elif isinstance(image, str):
        if os.path.isfile(image):
            pil_image = Image.open(image)
    else:
        pil_image = image

    pipe = pipeline("image-classification", model=model, image_processor=processor, device="cpu" if not use_cuda else "cuda:0")

    score = pipe(images=[pil_image])[0]
    score = [p for p in score if p['label'] == 'good'][0]['score']

    if use_cuda:
        model.to("cpu")
    print(" > score =", score)

    devices.torch_gc()

    return score