apache / singa

a distributed deep learning platform
Apache License 2.0
3.37k stars 1.25k forks source link

AttributeError: module 'singa.singa_wrap' has no attribute 'Communicator' #975

Closed lalitjain99 closed 2 years ago

lalitjain99 commented 2 years ago

I am trying to implement distribute training using DistOpt but getting error

sgd = opt.SGD(lr=0.005, momentum=0.9, weight_decay=1e-5) sgd = opt.DistOpt(sgd) model.set_optimizer(sgd) dev = device.create_cuda_gpu_on(sgd.local_rank)


AttributeError Traceback (most recent call last) in () 1 sgd = opt.SGD(lr=0.005, momentum=0.9, weight_decay=1e-5) ----> 2 sgd = opt.DistOpt(sgd) 3 model.set_optimizer(sgd) 4 dev = device.create_cuda_gpu_on(sgd.local_rank)

/usr/local/lib/python3.7/dist-packages/singa/opt.py in init(self, opt, nccl_id, local_rank, world_size, buffSize) 723 if nccl_id is None: 724 # constructure for application using MPI --> 725 self.communicator = singa.Communicator(buffSize) 726 else: 727 # constructor for application using python multi-process module

AttributeError: module 'singa.singa_wrap' has no attribute 'Communicator'

lzjpaul commented 2 years ago

Hi,

It is advised to use the miniconda 3 with python 3.6 as here:

https://singa.apache.org/docs/3.1.0/installation/

And you can also try installing 3.1.0 using the conda option

lalitjain99 commented 2 years ago

Hi Team,

I have installed 3.1.0 as per documentation. But when I am applying the distopt api , its crashing. SINGA_Install_CPU(pip).zip Below is the code:-

 from singa import singa_wrap as singa
 from singa import device
 from singa import tensor
 from singa import opt
 import numpy as np
 import time
 import argparse
 from PIL import Image
 from singa import layer
 from singa import model
 from singa import tensor
 from singa import opt
 from singa import device

class MLP(model.Model):

def __init__(self, data_size=10, perceptron_size=100, num_classes=10):
    super(MLP, self).__init__()
    self.num_classes = num_classes
    self.dimension = 2

    self.relu = layer.ReLU()
    self.linear1 = layer.Linear(perceptron_size)
    self.linear2 = layer.Linear(num_classes)
    self.softmax_cross_entropy = layer.SoftMaxCrossEntropy()

def forward(self, inputs):
    y = self.linear1(inputs)
    y = self.relu(y)
    y = self.linear2(y)
    return y

def train_one_batch(self, x, y, dist_option, spars):
    out = self.forward(x)
    loss = self.softmax_cross_entropy(out, y)

    if dist_option == 'plain':
        self.optimizer(loss)
    elif dist_option == 'half':
        self.optimizer.backward_and_update_half(loss)
    elif dist_option == 'partialUpdate':
        self.optimizer.backward_and_partial_update(loss)
    elif dist_option == 'sparseTopK':
        self.optimizer.backward_and_sparse_update(loss,
                                                  topK=True,
                                                  spars=spars)
    elif dist_option == 'sparseThreshold':
        self.optimizer.backward_and_sparse_update(loss,
                                                  topK=False,
                                                  spars=spars)
    return out, loss

def set_optimizer(self, optimizer):
    self.optimizer = optimizer

def create_model(pretrained=False, **kwargs):
   """Constructs a CNN model.
   Args:
      pretrained (bool): If True, returns a pre-trained model.

   Returns:
     The created CNN model.
  """
  model = MLP(**kwargs)

  return model

 __all__ = ['MLP', 'create_model']

 if __name__ == "__main__":
    np.random.seed(0)

# generate the boundary
f = lambda x: (5 * x + 1)
bd_x = np.linspace(-1.0, 1, 200)
bd_y = f(bd_x)

# generate the training data
x = np.random.uniform(-1, 1, 400)
y = f(x) + 2 * np.random.randn(len(x))

# convert training data to 2d space
label = np.asarray([5 * a + 1 > b for (a, b) in zip(x, y)]).astype(np.int32)
data = np.array([[a, b] for (a, b) in zip(x, y)], dtype=np.float32)

dev = device.create_cuda_gpu_on(0)
sgd = opt.SGD(0.1, 0.9, 1e-5)
#**sgd = opt.DistOpt(sgd)**
tx = tensor.Tensor((400, 2), dev, tensor.float32)
ty = tensor.Tensor((400,), dev, tensor.int32)
model = MLP(data_size=2, perceptron_size=3, num_classes=2)

# attach model to graph
model.set_optimizer(sgd)
model.compile([tx], is_train=True, sequential=True)
model.train()

for i in range(100):
    tx.copy_from_numpy(data)
    ty.copy_from_numpy(label)
    out, loss = model(tx, ty, 'fp32', spars=None)

    if i % 100 == 0:
        print("training loss = ", tensor.to_numpy(loss)[0])