Closed psychocoderHPC closed 2 months ago
This decreases the register footprint to 64/71 for the example bufferCopy.
diff --git a/include/alpaka/exec/UniformElements.hpp b/include/alpaka/exec/UniformElements.hpp
index b7f6cd2ee54..cf06e312a1e 100644
--- a/include/alpaka/exec/UniformElements.hpp
+++ b/include/alpaka/exec/UniformElements.hpp
@@ -134,7 +134,6 @@ namespace alpaka
, extent_{extent}
, first_{std::min(first, extent)}
, index_{first_}
- , range_{std::min(first + elements, extent)}
{
}
@@ -149,20 +148,18 @@ namespace alpaka
{
// increment the index along the elements processed by the current thread
++index_;
- if(index_ < range_)
- return *this;
-
- // increment the thread index with the grid stride
- first_ += stride_;
- index_ = first_;
- range_ = std::min(first_ + elements_, extent_);
- if(index_ < extent_)
- return *this;
-
- // the iterator has reached or passed the end of the extent, clamp it to the extent
- first_ = extent_;
- index_ = extent_;
- range_ = extent_;
+ if(index_ >= std::min(first_ + elements_, extent_))
+ {
+ // increment the thread index with the grid stride
+ first_ += stride_;
+ index_ = first_;
+ if(index_ >= extent_)
+ {
+ // the iterator has reached or passed the end of the extent, clamp it to the extent
+ first_ = extent_;
+ index_ = extent_;
+ }
+ }
return *this;
}
@@ -192,7 +189,6 @@ namespace alpaka
// modified by the pre/post-increment operator
Idx first_;
Idx index_;
- Idx range_;
};
private:
https://github.com/alpaka-group/alpaka/pull/2377#issuecomment-2348309816 showed that the register footprint increased strongly when switching examples to the new iteration schema. The reason is that the iterator state is huge and increases the register footprint.
This issue should evaluate possible optimizations to solve this issue.