Closed steffencruz closed 1 year ago
Introduces DummySubtensor and DummyMetagraph (amongst others). Runs at least 10x faster.
DummySubtensor
DummyMetagraph
Works with new config file subnet_config.yml as long as number of queries is less than 500. Otherwise, we get the following error
subnet_config.yml
2023-05-04 21:34:01.261 | INFO | forward() 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊| 499/500 [00:02<00:00, 185.92it/s] Traceback (most recent call last): File "/Users/steffencruz/Desktop/py/bittensor/mirror_neuron/main.py", line 96, in <module> main() File "/Users/steffencruz/Desktop/py/bittensor/mirror_neuron/main.py", line 76, in main run_query(model=model, data=data) File "/Users/steffencruz/Desktop/py/bittensor/mirror_neuron/query.py", line 95, in run_query run_train(model) File "/Users/steffencruz/Desktop/py/bittensor/mirror_neuron/query.py", line 30, in run_train model.train(max_iter=1) File "/Users/steffencruz/Desktop/py/bittensor/mirror_neuron/sources/neuron.py", line 413, in train self.forward( File "/Users/steffencruz/Desktop/py/bittensor/mirror_neuron/sources/neuron.py", line 258, in forward scores = self.gating_model( unravelled_message ).to( self.device ) File "/Users/steffencruz/Desktop/py/bittensor/mirror_neuron/env/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1215, in _call_impl hook_result = hook(self, input, result) File "/Users/steffencruz/Desktop/py/bittensor/mirror_neuron/env/lib/python3.9/site-packages/wandb/wandb_torch.py", line 110, in <lambda> lambda mod, inp, outp: parameter_log_hook( File "/Users/steffencruz/Desktop/py/bittensor/mirror_neuron/env/lib/python3.9/site-packages/wandb/wandb_torch.py", line 105, in parameter_log_hook self.log_tensor_stats(data.cpu(), "parameters/" + prefix + name) File "/Users/steffencruz/Desktop/py/bittensor/mirror_neuron/env/lib/python3.9/site-packages/wandb/wandb_torch.py", line 231, in log_tensor_stats tensor = flat.histc(bins=self._num_bins, min=tmin, max=tmax) RuntimeError: "histogram_cpu" not implemented for 'Long' ╭─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮ │ /Users/steffencruz/Desktop/py/bittensor/mirror_neuron/main.py:96 in <module> │ │ │ │ 93 │ │ 94 │ │ 95 if __name__ == "__main__": │ │ ❱ 96 │ main() │ │ │ │ /Users/steffencruz/Desktop/py/bittensor/mirror_neuron/main.py:76 in main │ │ │ │ 73 │ # Run the queries │ │ 74 │ if config.get('query'): │ │ 75 │ │ print(f'{"- "*40}\nRunning queries:') │ │ ❱ 76 │ │ run_query(model=model, data=data) │ │ 77 │ │ print("\n>>> Queries executed successfully\n") │ │ 78 │ │ │ 79 │ # Run the analysis │ │ │ │ /Users/steffencruz/Desktop/py/bittensor/mirror_neuron/query.py:95 in run_query │ │ │ │ 92 │ │ │ 93 │ method_name = template.method.get('name') │ │ 94 │ if method_name == 'train': │ │ ❱ 95 │ │ run_train(model) │ │ 96 │ elif method_name == 'forward': │ │ 97 │ │ run_forward(model, data) │ │ 98 │ elif method_name == 'inference': │ │ │ │ /Users/steffencruz/Desktop/py/bittensor/mirror_neuron/query.py:30 in run_train │ │ │ │ 27 │ for i in tqdm.tqdm(range(max_iter)): │ │ 28 │ │ t0 = time.time() │ │ 29 │ │ qsize = model.history.qsize() │ │ ❱ 30 │ │ model.train(max_iter=1) │ │ 31 │ │ # add step time and step number to the added queue items │ │ 32 │ │ add_call_metrics(model.history.queue, t0, i, added_size=model.history.qsize()-qs │ │ 33 │ │ │ │ /Users/steffencruz/Desktop/py/bittensor/mirror_neuron/sources/neuron.py:413 in train │ │ │ │ 410 │ │ │ if question == None: continue # no responses from network. │ │ 411 │ │ │ │ │ 412 │ │ │ # Ask the network to complete the random question, training the gating netwo │ │ ❱ 413 │ │ │ self.forward( │ │ 414 │ │ │ │ roles = ['system', 'user' ], │ │ 415 │ │ │ │ messages = [ self.config.neuron.base_prompt, question.completion ], │ │ 416 │ │ │ │ topk = self.config.neuron.training_topk, │ │ │ │ /Users/steffencruz/Desktop/py/bittensor/mirror_neuron/sources/neuron.py:258 in forward │ │ │ │ 255 │ │ │ │ 256 │ │ # We run the gating network here to get the best uids │ │ 257 │ │ # Use the gating model to generate scores for each `uid`. │ │ ❱ 258 │ │ scores = self.gating_model( unravelled_message ).to( self.device ) │ │ 259 │ │ bittensor.logging.trace( 'scores', scores ) │ │ 260 │ │ │ │ 261 │ │ # Select the top `topk` `uids` based on the highest `scores`. │ │ │ │ /Users/steffencruz/Desktop/py/bittensor/mirror_neuron/env/lib/python3.9/site-packages/torch/nn/m │ │ odules/module.py:1215 in _call_impl │ │ │ │ 1212 │ │ result = forward_call(*input, **kwargs) │ │ 1213 │ │ if _global_forward_hooks or self._forward_hooks: │ │ 1214 │ │ │ for hook in (*_global_forward_hooks.values(), *self._forward_hooks.values()) │ │ ❱ 1215 │ │ │ │ hook_result = hook(self, input, result) │ │ 1216 │ │ │ │ if hook_result is not None: │ │ 1217 │ │ │ │ │ result = hook_result │ │ 1218 │ │ │ │ /Users/steffencruz/Desktop/py/bittensor/mirror_neuron/env/lib/python3.9/site-packages/wandb/wand │ │ b_torch.py:110 in <lambda> │ │ │ │ 107 │ │ log_track_params = log_track_init(log_freq) │ │ 108 │ │ try: │ │ 109 │ │ │ hook = module.register_forward_hook( │ │ ❱ 110 │ │ │ │ lambda mod, inp, outp: parameter_log_hook( │ │ 111 │ │ │ │ │ mod, inp, outp, log_track_params │ │ 112 │ │ │ │ ) │ │ 113 │ │ │ ) │ │ │ │ /Users/steffencruz/Desktop/py/bittensor/mirror_neuron/env/lib/python3.9/site-packages/wandb/wand │ │ b_torch.py:105 in parameter_log_hook │ │ │ │ 102 │ │ │ │ │ data = parameter.data │ │ 103 │ │ │ │ else: │ │ 104 │ │ │ │ │ data = parameter │ │ ❱ 105 │ │ │ │ self.log_tensor_stats(data.cpu(), "parameters/" + prefix + name) │ │ 106 │ │ │ │ 107 │ │ log_track_params = log_track_init(log_freq) │ │ 108 │ │ try: │ │ │ │ /Users/steffencruz/Desktop/py/bittensor/mirror_neuron/env/lib/python3.9/site-packages/wandb/wand │ │ b_torch.py:231 in log_tensor_stats │ │ │ │ 228 │ │ # in min()/max() above. Swap here to prevent a runtime error. │ │ 229 │ │ if tmin > tmax: │ │ 230 │ │ │ tmin, tmax = tmax, tmin │ │ ❱ 231 │ │ tensor = flat.histc(bins=self._num_bins, min=tmin, max=tmax) │ │ 232 │ │ tensor = tensor.cpu().clone().detach() │ │ 233 │ │ bins = torch.linspace(tmin, tmax, steps=self._num_bins + 1) │ │ 234 │ ╰──────────────────────────────────────────────────────────────────────────────────────────────────╯ RuntimeError: "histogram_cpu" not implemented for 'Long'
The above is a pytorch error, not wandb.
Introduces
DummySubtensor
andDummyMetagraph
(amongst others). Runs at least 10x faster.Works with new config file
subnet_config.yml
as long as number of queries is less than 500. Otherwise, we get the following errorThe above is a pytorch error, not wandb.