dense with mask != sparse

#gpt2_pytorch.py

class SrnmSpmm(torch.nn.Module):
    def __init__(self, original: transformers.pytorch_utils.Conv1D):
        super().__init__()
        self.bias = original.bias

        # Convert weights from original module to SrNM
        w, mask, sp_to_dense = NMVectorSparsifier(n, m, v)(original.weight)
        w = w.wrapped_tensor
        self.orig =original.weight.half().to('cuda') 
        self.mask= mask.half().to('cuda')
        self.sp_to_dense = sp_to_dense

        self.values = torch.nn.Parameter(w.values)
        #self.columns = self.register_buffer('columns', w.columns)
        self.columns = w.columns
        self.metadata = w.metadata
        #print(w.metadata)
        self.nrows_sp = w.ncols
        self.ncols_sp = w.nrows
        self.nnz      = w.nnz

    def forward(self, input):

        flattened_input = torch.flatten(input, start_dim=0, end_dim=-2)

        ncols_d  = flattened_input.T.shape[1]
        DM, _    = flattened_input.shape

        output = sparse_dense_mul_dispatch(self.values, self.columns, self.metadata, flattened_input.T, self.nrows_sp, self.ncols_sp,
                                           ncols_d, m, n, v, self.nnz, self.bias)
        print(f"{output.shape=},{self.bias.shape=}, {input.shape=} {self.orig.shape=}, {self.nrows_sp=}, {self.ncols_sp=}, {ncols_d=}, {m=}, {n=}, {v=}, {self.nnz=}")
        output = output.reshape((*input.shape[0:-1], -1))[..., :DM]

        output2 = input@ (self.orig * self.mask)
        output3 = input@ (self.sp_to_dense)
        print(f"{torch.allclose(output2, output)=}") # FALSE 
        print(f"{torch.allclose(output3, output)=}") # FALSE 
        print(f"{torch.allclose(output2, output3])=}") #  FALSE 
        exit(1)

        return output

#grouped_nmv_tensor.py

class NMVectorSparsifier:
    def __init__(self, n, m, tileM):
        self.n = n
        self.m = m
        self.tileM = tileM

    def __call__(self, tensor, grad_fmt=None):
        # uncomment to use magnitude-pruning -> mask, columns
        #mask, columns = nm_vector_mask_sparsify(tensor, sparsifier.n, sparsifier.m, sparsifier.tileM)

        # uncomment to use random pruning (cuSparseLt-like approach) -> mask, columns
        nrows, ncols = tensor.shape
        columns = torch.zeros(nrows//self.tileM, ncols//self.m*4, dtype=torch.int32)
        columns = columns.reshape((-1,4)) + torch.tensor([0,1,2,3], dtype=torch.int32)
        columns = columns.reshape((nrows//self.tileM, ncols//self.m*4))

        mask = torch.zeros(tensor.shape, dtype=tensor.dtype)
        m = torch.cat( (torch.tensor([1,0,1,0]), torch.zeros(self.m-4)), 0 )
        mask = mask.reshape(-1, self.tileM, self.m) + m
        mask = mask.reshape(tensor.shape)

        sp = SrNMTensor(self.n, self.m, self.tileM, tensor, mask, columns, tensor.device)
        to_dense = sp.to_dense()
        sparse_mtx = sten.SparseTensorWrapper.wrapped_from_dense(
            sp,
            tensor,
            grad_fmt,
        )

        return sparse_mtx, mask, to_dense

Hi, The above is to get mask and sparse to dense matrix and learn the code. Here is my finding, original dense matrix with mask is not the same as the sp_to_dense from the method of "SrNMTensor". also the dense with mask multiplication with input is not the same as the sparse multiplication sparse_dense_mul_dispatch. It is weird as I expect they are equal. Look forward to your reply.

Thannks!

UDC-GAC / venom

dense with mask != sparse #5