Could ChatGLM3 just run by CPU?

snow2zhou commented 1 month ago

lejunzhu commented 1 month ago

I managed to load ChatGLM3 to CPU by changing code like this:

comfyui commit a3dffc4
this plugin commit 8ebd13c
apply the patch below
start comfyui with --lowvram
use (Down)load ChatGLM3 Model node and load fp16 model. Quantization somehow doesn't work for CPU.

diff --git a/nodes.py b/nodes.py
index 821fc4b..7ad2907 100755
--- a/nodes.py
+++ b/nodes.py
@@ -181,7 +181,7 @@ class DownloadAndLoadChatGLM3:
         text_encoder = ChatGLMModel.from_pretrained(
             text_encoder_path,
             torch_dtype=torch.float16
-            )
+            ).float()
         if precision == 'quant8':
             text_encoder.quantize(8)
         elif precision == 'quant4':
@@ -215,8 +215,8 @@ class KolorsTextEncode:
     CATEGORY = "KwaiKolorsWrapper"

     def encode(self, chatglm3_model, prompt, negative_prompt, num_images_per_prompt):
-        device = mm.get_torch_device()
-        offload_device = mm.unet_offload_device()
+        device = mm.text_encoder_device()
+        offload_device = mm.text_encoder_offload_device()
         mm.unload_all_models()
         mm.soft_empty_cache()
          # Function to randomly select an option from the brackets
@@ -327,10 +327,10 @@ class KolorsTextEncode:
         mm.soft_empty_cache()
         gc.collect()
         kolors_embeds = {
-            'prompt_embeds': prompt_embeds,
-            'negative_prompt_embeds': negative_prompt_embeds,
-            'pooled_prompt_embeds': text_proj,
-            'negative_pooled_prompt_embeds': negative_text_proj
+            'prompt_embeds': prompt_embeds.half(),
+            'negative_prompt_embeds': negative_prompt_embeds.half(),
+            'pooled_prompt_embeds': text_proj.half(),
+            'negative_pooled_prompt_embeds': negative_text_proj.half()
         }

         return (kolors_embeds,)

CHNtentes commented 1 month ago

I managed to load ChatGLM3 to CPU by changing code like this:

comfyui commit a3dffc4
this plugin commit 8ebd13c
apply the patch below
start comfyui with --lowvram
use (Down)load ChatGLM3 Model node and load fp16 model. Quantization somehow doesn't work for CPU.

diff --git a/nodes.py b/nodes.py
index 821fc4b..7ad2907 100755
--- a/nodes.py
+++ b/nodes.py
@@ -181,7 +181,7 @@ class DownloadAndLoadChatGLM3:
         text_encoder = ChatGLMModel.from_pretrained(
             text_encoder_path,
             torch_dtype=torch.float16
-            )
+            ).float()
         if precision == 'quant8':
             text_encoder.quantize(8)
         elif precision == 'quant4':
@@ -215,8 +215,8 @@ class KolorsTextEncode:
     CATEGORY = "KwaiKolorsWrapper"

     def encode(self, chatglm3_model, prompt, negative_prompt, num_images_per_prompt):
-        device = mm.get_torch_device()
-        offload_device = mm.unet_offload_device()
+        device = mm.text_encoder_device()
+        offload_device = mm.text_encoder_offload_device()
         mm.unload_all_models()
         mm.soft_empty_cache()
          # Function to randomly select an option from the brackets
@@ -327,10 +327,10 @@ class KolorsTextEncode:
         mm.soft_empty_cache()
         gc.collect()
         kolors_embeds = {
-            'prompt_embeds': prompt_embeds,
-            'negative_prompt_embeds': negative_prompt_embeds,
-            'pooled_prompt_embeds': text_proj,
-            'negative_pooled_prompt_embeds': negative_text_proj
+            'prompt_embeds': prompt_embeds.half(),
+            'negative_prompt_embeds': negative_prompt_embeds.half(),
+            'pooled_prompt_embeds': text_proj.half(),
+            'negative_pooled_prompt_embeds': negative_text_proj.half()
         }

         return (kolors_embeds,)

Yeah you should run fp16 version on cpu, since quant model is not supported well on cpus.

kijai / ComfyUI-KwaiKolorsWrapper

Could ChatGLM3 just run by CPU? #15