After successfully installing Flower, I followed the run example to get this error. (Versions python=1.10.0, pytorch=2.2.1,Flower=1.7.0)
Steps/Code to Reproduce
from collections import OrderedDict
from typing import Dict, List, Optional, Tuple
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, random_split
from torchvision.datasets import CIFAR10
import flwr as fl
DEVICE = torch.device("cpu") # Try "cuda" to train on GPU
print(
f"Training on {DEVICE} using PyTorch {torch.version} and Flower {fl.version}"
)
NUM_CLIENTS = 10
def load_datasets(num_clients: int):
Download and transform CIFAR-10 (train and test)
transform = transforms.Compose(
[transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]
)
trainset = CIFAR10("./dataset", train=True, download=True, transform=transform)
testset = CIFAR10("./dataset", train=False, download=True, transform=transform)
# Split training set into `num_clients` partitions to simulate different local datasets
partition_size = len(trainset) // num_clients
lengths = [partition_size] * num_clients
datasets = random_split(trainset, lengths, torch.Generator().manual_seed(42))
# Split each partition into train/val and create DataLoader
trainloaders = []
valloaders = []
for ds in datasets:
len_val = len(ds) // 10 # 10 % validation set
len_train = len(ds) - len_val
lengths = [len_train, len_val]
ds_train, ds_val = random_split(ds, lengths, torch.Generator().manual_seed(42))
trainloaders.append(DataLoader(ds_train, batch_size=32, shuffle=True))
valloaders.append(DataLoader(ds_val, batch_size=32))
testloader = DataLoader(testset, batch_size=32)
return trainloaders, valloaders, testloader
def forward(self, x: torch.Tensor) -> torch.Tensor:
x = self.pool(F.relu(self.conv1(x)))
x = self.pool(F.relu(self.conv2(x)))
x = x.view(-1, 16 * 5 * 5)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x
def getparameters(net) -> List[np.ndarray]:
return [val.cpu().numpy() for , val in net.state_dict().items()]
def set_parameters(net, parameters: List[np.ndarray]):
params_dict = zip(net.state_dict().keys(), parameters)
state_dict = OrderedDict({k: torch.Tensor(v) for k, v in params_dict})
net.load_state_dict(state_dict, strict=True)
def train(net, trainloader, epochs: int):
"""Train the network on the training set."""
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters())
net.train()
for epoch in range(epochs):
correct, total, epoch_loss = 0, 0, 0.0
for images, labels in trainloader:
images, labels = images.to(DEVICE), labels.to(DEVICE)
optimizer.zero_grad()
outputs = net(images)
loss = criterion(net(images), labels)
loss.backward()
optimizer.step()
Metrics
epoch_loss += loss
total += labels.size(0)
correct += (torch.max(outputs.data, 1)[1] == labels).sum().item()
epoch_loss /= len(trainloader.dataset)
epoch_acc = correct / total
print(f"Epoch {epoch+1}: train loss {epoch_loss}, accuracy {epoch_acc}")
def test(net, testloader):
"""Evaluate the network on the entire test set."""
criterion = torch.nn.CrossEntropyLoss()
correct, total, loss = 0, 0, 0.0
net.eval()
with torch.nograd():
for images, labels in testloader:
images, labels = images.to(DEVICE), labels.to(DEVICE)
outputs = net(images)
loss += criterion(outputs, labels).item()
, predicted = torch.max(outputs.data, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
loss /= len(testloader.dataset)
accuracy = correct / total
return loss, accuracy
class FlowerClient(fl.client.NumPyClient):
def init(self, cid, net, trainloader, valloader):
self.cid = cid
self.net = net
self.trainloader = trainloader
self.valloader = valloader
strategy = fl.server.strategy.FedAvg(
fraction_fit=1.0, # Sample 100% of available clients for training
fraction_evaluate=0.5, # Sample 50% of available clients for evaluation
min_fit_clients=10, # Never sample less than 10 clients for training
min_evaluate_clients=5, # Never sample less than 5 clients for evaluation
min_available_clients=10, # Wait until all 10 clients are available
)
Specify the resources each of your clients need. By default, each
client will be allocated 1x CPU and 0x GPUs
client_resources = {"num_cpus": 1, "num_gpus": 0.0}
if DEVICE.type == "cuda":
here we are assigning an entire GPU for each client.
client_resources = {"num_cpus": 1, "num_gpus": 1.0}
# Refer to our documentation for more details about Flower Simulations
# and how to setup these `client_resources`.
Describe the bug
After successfully installing Flower, I followed the run example to get this error. (Versions python=1.10.0, pytorch=2.2.1,Flower=1.7.0)![Error picture](https://github.com/adap/flower/assets/39286092/49c61cbe-9fdd-49d1-b0fd-b0d4fa6c50bb)
Steps/Code to Reproduce
from collections import OrderedDict from typing import Dict, List, Optional, Tuple
import numpy as np import torch import torch.nn as nn import torch.nn.functional as F import torchvision.transforms as transforms from torch.utils.data import DataLoader, random_split from torchvision.datasets import CIFAR10
import flwr as fl
DEVICE = torch.device("cpu") # Try "cuda" to train on GPU print( f"Training on {DEVICE} using PyTorch {torch.version} and Flower {fl.version}" )
NUM_CLIENTS = 10
def load_datasets(num_clients: int):
Download and transform CIFAR-10 (train and test)
trainloaders, valloaders, testloader = load_datasets(NUM_CLIENTS)
class Net(nn.Module): def init(self) -> None: super(Net, self).init() self.conv1 = nn.Conv2d(3, 6, 5) self.pool = nn.MaxPool2d(2, 2) self.conv2 = nn.Conv2d(6, 16, 5) self.fc1 = nn.Linear(16 5 5, 120) self.fc2 = nn.Linear(120, 84) self.fc3 = nn.Linear(84, 10)
def getparameters(net) -> List[np.ndarray]: return [val.cpu().numpy() for , val in net.state_dict().items()]
def set_parameters(net, parameters: List[np.ndarray]): params_dict = zip(net.state_dict().keys(), parameters) state_dict = OrderedDict({k: torch.Tensor(v) for k, v in params_dict}) net.load_state_dict(state_dict, strict=True)
def train(net, trainloader, epochs: int): """Train the network on the training set.""" criterion = torch.nn.CrossEntropyLoss() optimizer = torch.optim.Adam(net.parameters()) net.train() for epoch in range(epochs): correct, total, epoch_loss = 0, 0, 0.0 for images, labels in trainloader: images, labels = images.to(DEVICE), labels.to(DEVICE) optimizer.zero_grad() outputs = net(images) loss = criterion(net(images), labels) loss.backward() optimizer.step()
Metrics
def test(net, testloader): """Evaluate the network on the entire test set.""" criterion = torch.nn.CrossEntropyLoss() correct, total, loss = 0, 0, 0.0 net.eval() with torch.nograd(): for images, labels in testloader: images, labels = images.to(DEVICE), labels.to(DEVICE) outputs = net(images) loss += criterion(outputs, labels).item() , predicted = torch.max(outputs.data, 1) total += labels.size(0) correct += (predicted == labels).sum().item() loss /= len(testloader.dataset) accuracy = correct / total return loss, accuracy
class FlowerClient(fl.client.NumPyClient): def init(self, cid, net, trainloader, valloader): self.cid = cid self.net = net self.trainloader = trainloader self.valloader = valloader
def client_fn(cid) -> FlowerClient: net = Net().to(DEVICE) trainloader = trainloaders[int(cid)] valloader = valloaders[int(cid)] return FlowerClient(cid, net, trainloader, valloader)
client_resources = None if DEVICE.type == "cuda": client_resources = {"num_gpus": 1}
Create FedAvg strategy
strategy = fl.server.strategy.FedAvg( fraction_fit=1.0, # Sample 100% of available clients for training fraction_evaluate=0.5, # Sample 50% of available clients for evaluation min_fit_clients=10, # Never sample less than 10 clients for training min_evaluate_clients=5, # Never sample less than 5 clients for evaluation min_available_clients=10, # Wait until all 10 clients are available )
Specify the resources each of your clients need. By default, each
client will be allocated 1x CPU and 0x GPUs
client_resources = {"num_cpus": 1, "num_gpus": 0.0} if DEVICE.type == "cuda":
here we are assigning an entire GPU for each client.
Start simulation
fl.simulation.start_simulation( client_fn=client_fn, num_clients=NUM_CLIENTS, config=fl.server.ServerConfig(num_rounds=5), strategy=strategy, client_resources=client_resources, )
Expected Results
Solve this problem
Actual Results
None