Open YourPeer opened 1 year ago
另一个问题是当我运行Hierfl时,错误提示:
2023-08-08 23:23:46,263 simple_logger.py log_once [line:14] INFO Current_time:0
Traceback (most recent call last):
File "/home/archlab/lzr/easyFL-FLGo/main.py", line 31, in
你好,hierFL因为它的架构跟传统一对多的中心服务器架构不同,因此benchmark设置跟横向联邦不是共享的。hierFL目前还没有写tutorial,后面会写相关的tutorial。若你想要尝试hierFL的运行,可以运行以下代码
import flgo
import flgo.experiment.logger
import os
bmkname = 'hier_mnist' # benchmark的名称
bmk_config = './config_hier_mnist.py' # 准备的配置文件路径
if not os.path.exists(bmkname):
bmk = flgo.gen_hierarchical_benchmark(bmkname, bmk_config, target_path='.', data_type='cv', task_type='classification')
import flgo.algorithm.hierarchical as hierFL
bmk = bmkname
task = './my_test_hier'
task_config = {
'benchmark':bmk,
}
if not os.path.exists(task): flgo.gen_task(task_config, task_path=task)
# 运行fedavg算法
runner = flgo.init(task, hierFL, {'gpu':[0,],'log_file':True, 'learning_rate':0.1,'num_steps':1, 'batch_size':128, 'num_rounds':10000, 'proportion':1.0, 'train_holdout':0.2,'local_test':True,'eval_interval':1,},scene='hierarchical')
runner.run()
Traceback (most recent call last): File "/home/archlab/lzr/easyFL-FLGo/main.py", line 31, in runner.run() File "/home/archlab/lzr/easyFL-FLGo/flgo/algorithm/fedbase.py", line 243, in run updated = self.iterate() File "/home/archlab/lzr/easyFL-FLGo/flgo/algorithm/fedasync.py", line 25, in iterate res = self.communicate(self.selected_clients, asynchronous=True) File "/home/archlab/lzr/easyFL-FLGo/flgo/simulator/base.py", line 481, in communicate_with_dropout return communicate(self, selected_clients, mtype, asynchronous) File "/home/archlab/lzr/easyFL-FLGo/flgo/simulator/base.py", line 481, in communicate_with_dropout return communicate(self, selected_clients, mtype, asynchronous) File "/home/archlab/lzr/easyFL-FLGo/flgo/simulator/base.py", line 573, in communicate_with_clock if pkgs[0].get('__cid', None) is None: IndexError: list index out of range
该bug因为没有检查pkgs是否是空数组引起的。我已在最新版本中修复该bug,感谢你的反馈。 注:异步算法的尝试需要配合系统异构性模拟器(用户响应时间异构)。这里给出一段测试代码:
import flgo
import os
# the target path of the task
task = './my_first_task'
# create task configuration
task_config = {'benchmark':{'name': 'flgo.benchmark.mnist_classification'}, 'partitioner':{'name':'IIDPartitioner', 'para':{'num_clients':100}}}
# generate the task if the task doesn't exist
if not os.path.exists(task):
flgo.gen_task(task_config, task)
import flgo.algorithm.fedasync as fedasync
# create fedavg runner on the task
runner = flgo.init(task, fedasync, {'gpu':[0,],'log_file':True, 'num_steps':5, 'responsiveness':'UNI-5-1000'})
runner.run()
HierFL里的
配置文件config_hier_mnist.py的内容如下:
import os
import torchvision
import torch
import flgo.benchmark
transform = torchvision.transforms.Compose(
[torchvision.transforms.ToTensor(),
torchvision.transforms.Normalize((0.1307,), (0.3081,))]
)
path = os.path.join(flgo.benchmark.path, 'RAW_DATA', 'MNIST')
# 定义训练集实例,并命名为train_data
train_data = torchvision.datasets.MNIST(root=path, train=True, download=True, transform=transform)
# 定义测试集实例,并命名为test_data
test_data = torchvision.datasets.MNIST(root=path, train=False, download=True, transform=transform)
class mlp(torch.nn.Module):
def __init__(self):
super(mlp, self).__init__()
self.fc1 = torch.nn.Linear(784, 200)
self.fc2 = torch.nn.Linear(200, 200)
self.fc3 = torch.nn.Linear(200, 10)
self.relu = torch.nn.ReLU()
def forward(self, x):
x = x.view(-1, x.shape[1] * x.shape[-2] * x.shape[-1])
x = self.relu(self.fc1(x))
x = self.relu(self.fc2(x))
x = self.fc3(x)
return x
# 定义返回模型的函数,并命名为get_model
def get_model():
return mlp()
Traceback (most recent call last): File "/home/archlab/lzr/easyFL-FLGo/main.py", line 31, in
runner.run()
File "/home/archlab/lzr/easyFL-FLGo/flgo/algorithm/fedbase.py", line 243, in run
updated = self.iterate()
File "/home/archlab/lzr/easyFL-FLGo/flgo/algorithm/fedasync.py", line 25, in iterate
res = self.communicate(self.selected_clients, asynchronous=True)
File "/home/archlab/lzr/easyFL-FLGo/flgo/simulator/base.py", line 481, in communicate_with_dropout
return communicate(self, selected_clients, mtype, asynchronous)
File "/home/archlab/lzr/easyFL-FLGo/flgo/simulator/base.py", line 481, in communicate_with_dropout
return communicate(self, selected_clients, mtype, asynchronous)
File "/home/archlab/lzr/easyFL-FLGo/flgo/simulator/base.py", line 573, in communicate_with_clock
if pkgs[0].get('__cid', None) is None:
IndexError: list index out of range