lix19937 / tensorrt-insight

deep insight tensorrt
1 stars 0 forks source link

nonzero impl #17

Open lix19937 opened 3 weeks ago

lix19937 commented 3 weeks ago

#include <iostream>
#include <thrust/copy.h>
#include <thrust/device_vector.h>
#include <thrust/functional.h>
#include <thrust/iterator/counting_iterator.h>

// https://forums.developer.nvidia.com/t/all-non-zero-element-indexes/164658
//     thrust::device_ptr<int> dev_ptr = thrust::device_pointer_cast(raw_ptr);
//     thrust::device_ptr<int> dev_ptr = thrust::device_malloc<int>(N);
//     int * raw_ptr = thrust::raw_pointer_cast(dev_ptr);

template <int THRESHOLD>
struct is_nonzero {
  __host__ __device__ bool operator()(const int x) { return x != THRESHOLD; }
};

int main() {
  // this example computes indices for all the nonzero values in a sequence

  // sequence of zero and nonzero values   40000*4
  thrust::device_vector<int> stencil(8);
  stencil[0] = 0;
  stencil[1] = 1;
  stencil[2] = 1;
  stencil[3] = 0;
  stencil[4] = 0;
  stencil[5] = 1;
  stencil[6] = 0;
  stencil[7] = 1;

  thrust::copy(stencil.begin(), stencil.end(), std::ostream_iterator<int>(std::cout, " "));
  std::cout << "\n";

  // storage for the nonzero indices
  thrust::device_vector<int> indices(8);

  // compute indices of nonzero elements
  typedef thrust::device_vector<int>::iterator IndexIterator;

  // use make_counting_iterator to define the sequence [0, 8)
  IndexIterator indices_end = thrust::copy_if(
      thrust::device,
      thrust::make_counting_iterator(0),
      thrust::make_counting_iterator(6),
      stencil.begin(),
      indices.begin(),
      is_nonzero<0>());
  // thrust::identity<int>());

  // indices now contains [1,2,5,7]

  thrust::copy(indices.begin(), indices.end(), std::ostream_iterator<int>(std::cout, " "));
  std::cout << "\n";
  return 0;
}

//   nvcc -std=c++14 -arch=sm_86  ./test_nonzero.cu   
lix19937 commented 1 week ago
INonZeroLayer   https://github.com/NVIDIA/TensorRT/issues/3550