Pressio / pressio

core C++ library
Other
45 stars 7 forks source link

issue with tpetra block and tpetra #607

Closed fnrizzi closed 1 year ago

fnrizzi commented 1 year ago
#include <gtest/gtest.h>
#include <Tpetra_BlockVector.hpp>
#include <Tpetra_BlockMultiVector.hpp>
#include <Tpetra_Map.hpp>
#include <Teuchos_CommHelpers.hpp>
#include <Tpetra_Map_decl.hpp>

using tcomm = Teuchos::Comm<int>;
using map_t = Tpetra::Map<>;
using vec_type = Tpetra::BlockVector<>;

struct TagA{};
struct TagB{};

class MyObject{
public:
  using state_type = vec_type;
  using residual_type = vec_type;

private:
  int rank_;
  int numProc_;
  const int localSize_ = 15;
  const int blockSize_ = 5;
  int numGlobalEntries_;
  Teuchos::RCP<const tcomm> comm_;
  Teuchos::RCP<const map_t> contigMap_;
  vec_type V_;

public:
  MyObject(){
    comm_ = Teuchos::rcp (new Teuchos::MpiComm<int>(MPI_COMM_WORLD));
    rank_ = comm_->getRank();
    numProc_ = comm_->getSize();
    numGlobalEntries_ = localSize_*numProc_;
    contigMap_ = Teuchos::rcp(new map_t(numGlobalEntries_, 0, comm_));
  }

  state_type createState() const{
    state_type V(*contigMap_, blockSize_);
    V.putScalar(0);
    return V;
  }

  residual_type createResidual() const{
    residual_type V(*contigMap_, blockSize_);
    V.putScalar(0);
    return V;
  }
};

template<class T1, class T2, class DT1, class DT2>
struct Registry
{
  DT1 d1_;
  DT2 d2_;

  template<class TagToFind, std::enable_if_t<std::is_same<TagToFind,T1>::value, int> = 0 >
  DT1 & get(){ return d1_; }

  template<class TagToFind, std::enable_if_t<std::is_same<TagToFind,T2>::value, int> = 0 >
  DT2 & get(){ return d2_; }
};

template<class T>
auto createRegistry(const T & system){
  using state_t    = typename T::state_type;
  using r_t        = typename T::residual_type;
  using registry_t = Registry<TagA, TagB, state_t, r_t>;
  registry_t reg{system.createState(), system.createResidual()};
  return reg;
}

void print(int rank, const std::string & s1, const std::string & s2, vec_type r)
{
  auto r_tpmv  = r.getVectorView();
  std::size_t r_0 = r.getMap()->getGlobalNumElements();
  std::size_t r_tpmv_0 = r_tpmv.getGlobalLength();

  if (rank==0){
    std::cout << "ops\n";
    std::cout << s1 << "ext = " << r_0 << "\n";
    std::cout << s2 << "ext = " << r_tpmv_0 << "\n";
  }
}

template<class RegistryType>
class Foo1{
  int rank_;
  RegistryType reg_;
public:
  Foo1(RegistryType && reg) : reg_(std::move(reg)){
    MPI_Comm_rank(MPI_COMM_WORLD, &rank_);
    auto & r = reg_.template get<TagA>();
    print(rank_, "CONSTR-block ", "CONSTR-tpetra ", r);
  }

  void dummy() {
    auto & r = reg_.template get<TagA>();
    print(rank_, "DUMMY-block ", "DUMMY-tpetra ", r);
  }
};

template<class T>
auto create1a(const T& system)
{
  using state_t    = typename T::state_type;
  using r_t        = typename T::residual_type;
  using registry_t = Registry<TagA, TagB, state_t, r_t>;
  registry_t reg{system.createState(), system.createResidual()};
  return Foo1<registry_t>(std::move(reg));
}

TEST(something, caseA)
{
  MyObject object;
  auto reg = createRegistry(object);
  Foo1<decltype(reg)> fs2(std::move(reg));
  fs2.dummy();
  std::cout << "\n";
}

TEST(something, caseB)
{
  MyObject object;
  auto fs = create1a(object);
  fs.dummy();
  std::cout << "\n";
}

this prints:

[ RUN      ] something.caseA
ops
CONSTR-block ext = 15
CONSTR-tpetra ext = 75
ops
DUMMY-block ext = 15
DUMMY-tpetra ext = 75

[       OK ] something.caseA (0 ms)
[ RUN      ] something.caseB
ops
CONSTR-block ext = 15
CONSTR-tpetra ext = 75
ops
DUMMY-block ext = 15
DUMMY-tpetra ext = 140701992916208
fnrizzi commented 1 year ago

caseA behaves as expected, caseB does not

fnrizzi commented 1 year ago

Simplified to

#include <gtest/gtest.h>
#include <Tpetra_BlockVector.hpp>
#include <Tpetra_BlockMultiVector.hpp>
#include <Tpetra_Map.hpp>
#include <Teuchos_CommHelpers.hpp>
#include <Tpetra_Map_decl.hpp>

using tcomm = Teuchos::Comm<int>;
using map_t = Tpetra::Map<>;
using vec_type = Tpetra::BlockVector<>;

class MyObject{
public:
  using state_type = vec_type;
  using residual_type = vec_type;

private:
  int rank_;
  int numProc_;
  const int localSize_ = 15;
  const int blockSize_ = 5;
  int numGlobalEntries_;
  Teuchos::RCP<const tcomm> comm_;
  map_t contigMap_;

public:
  MyObject(){
    comm_ = Teuchos::rcp (new Teuchos::MpiComm<int>(MPI_COMM_WORLD));
    rank_ = comm_->getRank();
    numProc_ = comm_->getSize();
    numGlobalEntries_ = localSize_*numProc_;
    contigMap_ = map_t(numGlobalEntries_, 0, comm_);
  }

  state_type createState() const{
    state_type V(contigMap_, blockSize_);
    V.putScalar(0);
    return V;
  }
};

template<class DT1, class DT2>
struct Registry
{
  DT1 d1_;
  DT2 d2_;

  DT1 & getA(){ return d1_; }
  DT2 & getB(){ return d2_; }
};

template<class T>
auto createRegistry(const T & system)
{
  using state_t    = typename T::state_type;
  using r_t        = typename T::residual_type;
  using registry_t = Registry<state_t, r_t>;
  registry_t reg{system.createState(), system.createState()};
  return reg;
}

void print(int rank, const std::string & s1, const std::string & s2, vec_type r)
{
  auto r_tpmv  = r.getVectorView();
  std::size_t r_0 = r.getMap()->getGlobalNumElements();
  std::size_t r_tpmv_0 = r_tpmv.getGlobalLength();

  if (rank==0){
    std::cout << "ops\n";
    std::cout << s1 << "ext = " << r_0 << "\n";
    std::cout << s2 << "ext = " << r_tpmv_0 << "\n";
  }
}

template<class RegistryType>
class Foo{
  int rank_;
  RegistryType reg_;
public:
  Foo(RegistryType && reg) : reg_(std::move(reg))
  {
    MPI_Comm_rank(MPI_COMM_WORLD, &rank_);
    auto & r = reg_.getA();
    print(rank_, "CONSTR-block-A ", "CONSTR-tpetra-A ", r);
    auto & r2 = reg_.getB();
    print(rank_, "CONSTR-block-B ", "CONSTR-tpetra-B ", r2);
  }

  void dummy() {
    auto & r = reg_.getA();
    print(rank_, "DUMMY-block-A ", "DUMMY-tpetra-A ", r);
    auto & r2 = reg_.getB();
    print(rank_, "DUMMY-block-B ", "DUMMY-tpetra-B ", r2);
  }
};

template<class T>
auto create1a(const T& system)
{
  using state_t    = typename T::state_type;
  using r_t        = typename T::residual_type;
  using registry_t = Registry<state_t, r_t>;
  registry_t reg{system.createState(), system.createState()};
  return Foo<registry_t>(std::move(reg));
}

TEST(something, caseA)
{
  MyObject object;
  auto reg = createRegistry(object);
  Foo<decltype(reg)> fs2(std::move(reg));
  fs2.dummy();
}

TEST(something, caseB)
{
  MyObject object;
  auto fs = create1a(object);
  fs.dummy();
}

this prints:

[ RUN      ] something.caseA
ops
CONSTR-block-A ext = 15
CONSTR-tpetra-A ext = 75
ops
CONSTR-block-B ext = 15
CONSTR-tpetra-B ext = 75
ops
DUMMY-block-A ext = 15
DUMMY-tpetra-A ext = 75
ops
DUMMY-block-B ext = 15
DUMMY-tpetra-B ext = 75
[       OK ] something.caseA (1 ms)
[ RUN      ] something.caseB
ops
CONSTR-block-A ext = 15
CONSTR-tpetra-A ext = 75
ops
CONSTR-block-B ext = 15
CONSTR-tpetra-B ext = 75
ops
DUMMY-block-A ext = 15
DUMMY-tpetra-A ext = 1
ops
DUMMY-block-B ext = 15
DUMMY-tpetra-B ext = 75
fnrizzi commented 1 year ago

(no issues whatsoever if i use std::vector instead of tpetra block but it is not a fair comparison)

fnrizzi commented 1 year ago

see https://github.com/trilinos/Trilinos/issues/11931

fnrizzi commented 1 year ago

currently bypassed using workaround