Open lix19937 opened 3 weeks ago
#include <iostream> #include <thrust/copy.h> #include <thrust/device_vector.h> #include <thrust/functional.h> #include <thrust/iterator/counting_iterator.h> // https://forums.developer.nvidia.com/t/all-non-zero-element-indexes/164658 // thrust::device_ptr<int> dev_ptr = thrust::device_pointer_cast(raw_ptr); // thrust::device_ptr<int> dev_ptr = thrust::device_malloc<int>(N); // int * raw_ptr = thrust::raw_pointer_cast(dev_ptr); template <int THRESHOLD> struct is_nonzero { __host__ __device__ bool operator()(const int x) { return x != THRESHOLD; } }; int main() { // this example computes indices for all the nonzero values in a sequence // sequence of zero and nonzero values 40000*4 thrust::device_vector<int> stencil(8); stencil[0] = 0; stencil[1] = 1; stencil[2] = 1; stencil[3] = 0; stencil[4] = 0; stencil[5] = 1; stencil[6] = 0; stencil[7] = 1; thrust::copy(stencil.begin(), stencil.end(), std::ostream_iterator<int>(std::cout, " ")); std::cout << "\n"; // storage for the nonzero indices thrust::device_vector<int> indices(8); // compute indices of nonzero elements typedef thrust::device_vector<int>::iterator IndexIterator; // use make_counting_iterator to define the sequence [0, 8) IndexIterator indices_end = thrust::copy_if( thrust::device, thrust::make_counting_iterator(0), thrust::make_counting_iterator(6), stencil.begin(), indices.begin(), is_nonzero<0>()); // thrust::identity<int>()); // indices now contains [1,2,5,7] thrust::copy(indices.begin(), indices.end(), std::ostream_iterator<int>(std::cout, " ")); std::cout << "\n"; return 0; } // nvcc -std=c++14 -arch=sm_86 ./test_nonzero.cu
INonZeroLayer https://github.com/NVIDIA/TensorRT/issues/3550