Open BolinSNLHM opened 3 months ago
Describe the bug While working on benchmarking Hidet for CPU support, I noticed that during the tuning process, within a single source.cc/source.cu file, the block of #include statements:
#include <stdint.h> #include <hidet/runtime/symbols.h> #include <hidet/runtime/memory_planner.h> #include <hidet/runtime/cpu/context.h> #include <hidet/runtime/cuda/complex.h> #include <hidet/runtime/cuda/context.h> #include <hidet/runtime/logging.h>
Will appear multiple times, one before each candidate_xx namespace.
candidate_xx
You can refer to the attached source.cu file as an example.
source.cu
To Reproduce Script:
import hidet from typing import List import hidet.testing hidet.option.cache_dir('./mycache-gpt2') hidet.option.search_space(2) def generate_hidet(model, text, input_ids, position_ids, past_keys, past_values, device, tokens_to_generate=10): output_ids = [] for _ in range(tokens_to_generate): input_ids, position_ids, past_keys, past_values = model(input_ids, position_ids, past_keys, past_values) output_ids.append(input_ids[0].item()) return output_ids gpt2_module = hidet.testing.models.gpt2.model(disable_cache=True) gpt2_module.cuda() input_ids = hidet.symbol(['seq_length'], dtype=hidet.int32, device='cuda') position_ids = hidet.symbol(['seq_length'], dtype=hidet.int32, device='cuda') cache_shape = [gpt2_module.num_hidden_layers, gpt2_module.num_heads, 'prev_seq_length', gpt2_module.head_dim] past_keys = hidet.symbol(cache_shape, dtype=hidet.float32, device='cuda') past_values = hidet.symbol(cache_shape, dtype=hidet.float32, device='cuda') outputs = gpt2_module(input_ids, position_ids, past_keys, past_values) graph = hidet.trace_from(outputs, inputs=[input_ids, position_ids, past_keys, past_values]) graph = hidet.graph.optimize(graph) compiled_model = graph.build(space=2) compiled_model.save('./benchmark_outs2/compiled.hidet') text = "This is just an example..." hidet_tokenizer = hidet.testing.models.gpt2.tokenizer() hidet_input_ids_list: List[int] = hidet_tokenizer(text)['input_ids'] hidet_input_ids = hidet.asarray(hidet_input_ids_list, dtype=hidet.int32, device='cuda') hidet_position_ids = hidet.arange(hidet_input_ids.shape[0], dtype=hidet.int32, device='cuda') hidet_past_keys = hidet.zeros([gpt2_module.num_hidden_layers, gpt2_module.num_heads, 0, gpt2_module.head_dim], dtype=hidet.float32, device='cuda') hidet_past_values = hidet.zeros([gpt2_module.num_hidden_layers, gpt2_module.num_heads, 0, gpt2_module.head_dim], dtype=hidet.float32, device='cuda') hidet_latency = hidet.utils.benchmark_func( lambda: generate_hidet( compiled_model, text, hidet_input_ids, hidet_position_ids, hidet.zeros([gpt2_module.num_hidden_layers, gpt2_module.num_heads, 0, gpt2_module.head_dim], dtype=hidet.float32, device='cuda'), hidet.zeros([gpt2_module.num_hidden_layers, gpt2_module.num_heads, 0, gpt2_module.head_dim], dtype=hidet.float32, device='cuda'), 'cuda', tokens_to_generate=40 ), repeat=1 )
Describe the bug While working on benchmarking Hidet for CPU support, I noticed that during the tuning process, within a single source.cc/source.cu file, the block of #include statements:
Will appear multiple times, one before each
candidate_xx
namespace.You can refer to the attached
source.cu
file as an example.To Reproduce Script: