cassiebreviu / StableDiffusion

Inference Stable Diffusion with C# and ONNX Runtime
MIT License
218 stars 49 forks source link

How do I get cliptokenizer.onnx #32

Closed williamlzw closed 10 months ago

williamlzw commented 10 months ago

How do I get cliptokenizer.onnx? I am converting to a 512 size model of stable diffusion2.1, but this is not supported

cassiebreviu commented 10 months ago

This is created with ONNX Runtime Extension. Learn more here: https://onnxruntime.ai/docs/extensions/

williamlzw commented 10 months ago

What I mean is where to download the original pytorch model. I now want to convert the dictionary of stable diffusion. Thanks.

cassiebreviu commented 10 months ago

You are referencing this model in the repo right? cliptokenizer.onnx

That model is generated with ORT extensions.

williamlzw commented 10 months ago

How to generate cliptokenizer.onnx on stable-diffusion 2.1.

williamlzw commented 10 months ago
  import numpy as np
  import onnxruntime as _ort
  import onnx
  from pathlib import Path
  from onnx import helper, onnx_pb as onnx_proto
  from transformers import CLIPTokenizer, CLIPTokenizerFast
  from onnxruntime_extensions import (
      make_onnx_model,
      get_library_path as _get_library_path,
      PyOrtFunction)
  from onnxruntime_extensions.cvt import HFTokenizerConverter

  def _get_file_content(path):
      with open(path, "rb") as file:
          return file.read()

  def _create_test_model(**kwargs):
      vocab_file = kwargs["vocab_file"]
      merges_file = kwargs["merges_file"]
      max_length = kwargs["max_length"]

      input1 = helper.make_tensor_value_info(
          'string_input', onnx_proto.TensorProto.STRING, [None])
      output1 = helper.make_tensor_value_info(
          'input_ids', onnx_proto.TensorProto.INT64, ["batch_size", "num_input_ids"])
      output2 = helper.make_tensor_value_info(
          'attention_mask', onnx_proto.TensorProto.INT64, ["batch_size", "num_attention_masks"])
      output3 = helper.make_tensor_value_info(
          'offset_mapping', onnx_proto.TensorProto.INT64, ["batch_size", "num_offsets", 2])

      if kwargs["attention_mask"]:
          if kwargs["offset_map"]:
              node = [helper.make_node(
                  'CLIPTokenizer', ['string_input'],
                  ['input_ids', 'attention_mask', 'offset_mapping'], vocab=_get_file_content(vocab_file),
                  merges=_get_file_content(merges_file), name='bpetok', padding_length=max_length,
                  domain='ai.onnx.contrib')]

              graph = helper.make_graph(node, 'test0', [input1], [output1, output2, output3])
              model = make_onnx_model(graph)
          else:
              node = [helper.make_node(
                  'CLIPTokenizer', ['string_input'], ['input_ids', 'attention_mask'], vocab=_get_file_content(vocab_file),
                  merges=_get_file_content(merges_file), name='bpetok', padding_length=max_length,
                  domain='ai.onnx.contrib')]

              graph = helper.make_graph(node, 'test0', [input1], [output1, output2])
              model = make_onnx_model(graph)
      else:
          node = [helper.make_node(
              'CLIPTokenizer', ['string_input'], ['input_ids'], vocab=_get_file_content(vocab_file),
              merges=_get_file_content(merges_file), name='bpetok', padding_length=max_length,
              domain='ai.onnx.contrib')]

          graph = helper.make_graph(node, 'test0', [input1], [output1])
          model = make_onnx_model(graph)

      return model

  tokenizer = CLIPTokenizerFast.from_pretrained("openai/clip-vit-base-patch32")
  temp_dir = Path('./temp_onnxclip')
  temp_dir.mkdir(parents=True, exist_ok=True)
  files = tokenizer.save_vocabulary(str(temp_dir))
  tokjson = files[0]
  merges = files[1]
  model = _create_test_model(vocab_file=tokjson, merges_file=merges,
                                     max_length=-1, attention_mask=True, offset_map=False)
  onnx.save(model, 'test.onnx')