Open ziyuhuang123 opened 1 month ago
I tried to change here:
/// make shared memory descriptor
template <class PointerType>
DEVICE GmmaDescriptor make_smem_desc(PointerType smem_ptr) {
GmmaDescriptor desc;
uint32_t uint_ptr = static_cast<uint32_t>(__cvta_generic_to_shared(smem_ptr));
desc.bitfield.start_address_ = uint_ptr >> 4;
// desc.bitfield.layout_type_ =
// 0x1; /// swizzle 128B because we use Swizzle<3,4,3>
desc.bitfield.layout_type_ =
0x0; /// swizzle disable because we use Swizzle<0,4,3>---------------------> changed to 0x0 because no swizzle
desc.bitfield.leading_byte_offset_ = 0x1; /// no use
desc.bitfield.stride_byte_offset_ =
64; /// how many 128bits-rows needed between two core matrices
desc.bitfield.base_offset_ = 0x0;
return desc;
}
But the result is still incorrect....
I see this:
And I changed this to 0:
But I get error:
Any suggestion? Thanks!