oneapi-src / oneDNN

oneAPI Deep Neural Network Library (oneDNN)
https://uxlfoundation.org
Apache License 2.0
3.62k stars 1k forks source link

Assertion `dynamic_cast<derived_type>(base) == base' failed #1901

Closed wangzy0327 closed 5 months ago

wangzy0327 commented 6 months ago

Summary

I tried to run matmul primitive refer to official-example

It occured the bug.

derived_type dnnl::impl::utils::downcast(base_type *) 
[derived_type = dnnl::impl::sycl::sycl_buffer_memory_storage_t *, base_type = dnnl::impl::memory_storage_t]: 
Assertion `dynamic_cast<derived_type>(base) == base' failed.
Aborted (core dumped)

Version

oneDNN version : v3.2 githash : 04b180b sycl version: 2022-06 release sycl githash : 4043dda

Environment

Steps to reproduce

The program is as follow.

matmul.cpp ``` /******************************************************************************* * Copyright 2020-2022 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. *******************************************************************************/ #include #include #include #include #include #include "example_utils.hpp" #include "oneapi/dnnl/dnnl.hpp" //compile // clang++ -fsycl -fsycl-targets=mlisa-cambricon-bang matmul.cpp -o matmul.out -ldnnl //execuate // ./matmul.out gpu using namespace dnnl; using tag = memory::format_tag; using dt = memory::data_type; void matmul_example(dnnl::engine::kind engine_kind) { // Create execution dnnl::engine. dnnl::engine engine(engine_kind, 0); // Create dnnl::stream. dnnl::stream engine_stream(engine); // Tensor dimensions. const memory::dim MB = 3, // batch size M = 128, K = 256, N = 512; // Source (src), weights, bias, and destination (dst) tensors dimensions. memory::dims src_dims = {MB, M, K}; memory::dims weights_dims = {MB, K, N}; memory::dims bias_dims = {1, 1, N}; memory::dims dst_dims = {MB, M, N}; // Allocate buffers. std::vector src_data(product(src_dims)); std::vector weights_data(product(weights_dims)); std::vector bias_data(product(bias_dims)); std::vector dst_data(product(dst_dims)); // Initialize src, weights, bias. std::generate(src_data.begin(), src_data.end(), []() { static int i = 0; return std::cos(i++ / 10.f); }); std::generate(weights_data.begin(), weights_data.end(), []() { static int i = 0; return std::sin(i++ * 2.f); }); std::generate(bias_data.begin(), bias_data.end(), []() { static int i = 0; return std::tanh(float(i++)); }); sycl::buffer buffer_src(static_cast(src_data.data()), sycl::range<3>(MB, M, K)); sycl::buffer buffer_weights(static_cast(weights_data.data()), sycl::range<3>(MB, K, N)); sycl::buffer buffer_bias(static_cast(bias_data.data()), sycl::range<3>(1, 1, N)); sycl::buffer buffer_dst(static_cast(dst_data.data()), sycl::range<3>(MB, M, N)); // Create memory descriptors and memory objects for src, weights, bias, and // dst. auto src_md = memory::desc(src_dims, dt::f32, tag::abc); auto weights_md = memory::desc(weights_dims, dt::f32, tag::abc); auto bias_md = memory::desc(bias_dims, dt::f32, tag::abc); auto dst_md = memory::desc(dst_dims, dt::f32, tag::abc); auto src_mem = memory(src_md, engine); auto weights_mem = memory(weights_md, engine); auto bias_mem = memory(bias_md, engine); auto dst_mem = memory(dst_md, engine); // auto src_mem = dnnl::sycl_interop::make_memory(src_md, engine, buffer_src); // auto weights_mem = dnnl::sycl_interop::make_memory(weights_md, engine, buffer_weights); // auto bias_mem = dnnl::sycl_interop::make_memory(bias_md, engine, buffer_bias); // auto dst_mem = dnnl::sycl_interop::make_memory(dst_md, engine, buffer_dst); // Write data to memory object's handles. write_to_dnnl_memory(src_data.data(), src_mem); write_to_dnnl_memory(weights_data.data(), weights_mem); write_to_dnnl_memory(bias_data.data(), bias_mem); // Create primitive post-ops (ReLU). // const float alpha = 0.f; // const float beta = 0.f; // post_ops matmul_ops; // matmul_ops.append_eltwise(algorithm::eltwise_relu, alpha, beta); // primitive_attr matmul_attr; // matmul_attr.set_post_ops(matmul_ops); auto matmul_d = matmul::desc(src_md,weights_md,bias_md,dst_md); // Create primitive descriptor. // auto matmul_pd = matmul::primitive_desc( // engine, src_md, weights_md, bias_md, dst_md, matmul_attr); auto matmul_pd = matmul::primitive_desc( matmul_d, engine); // Create the primitive. auto matmul_prim = matmul(matmul_pd); // Primitive arguments. std::unordered_map matmul_args; matmul_args.insert({DNNL_ARG_SRC, src_mem}); matmul_args.insert({DNNL_ARG_WEIGHTS, weights_mem}); matmul_args.insert({DNNL_ARG_BIAS, bias_mem}); matmul_args.insert({DNNL_ARG_DST, dst_mem}); // Primitive execution: matrix multiplication with ReLU. matmul_prim.execute(engine_stream, matmul_args); // Wait for the computation to finalize. engine_stream.wait(); // Read data from memory object's handle. read_from_dnnl_memory(dst_data.data(), dst_mem); } int main(int argc, char **argv) { return handle_example_errors(matmul_example, parse_engine_kind(argc, argv)); } ```
example_utils.hpp /******************************************************************************* * Copyright 2019-2021 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. *******************************************************************************/ #ifndef EXAMPLE_UTILS_HPP #define EXAMPLE_UTILS_HPP #include #include #include #include #include #include #include #include #include #include "dnnl.hpp" #include "dnnl_debug.h" #if DNNL_GPU_RUNTIME == DNNL_RUNTIME_OCL #include "dnnl_ocl.hpp" #elif DNNL_GPU_RUNTIME == DNNL_RUNTIME_SYCL #include "dnnl_sycl.hpp" #endif #if DNNL_CPU_THREADING_RUNTIME == DNNL_RUNTIME_OMP #ifdef _MSC_VER #define PRAGMA_MACRo(x) __pragma(x) #define PRAGMA_MACRO(x) PRAGMA_MACRo(x) #else #define PRAGMA_MACRo(x) _Pragma(#x) #define PRAGMA_MACRO(x) PRAGMA_MACRo(x) #endif // MSVC doesn't support collapse clause in omp parallel #if defined(_MSC_VER) && !defined(__clang__) && !defined(__INTEL_COMPILER) #define collapse(x) #endif #define PRAGMA_OMP_PARALLEL_FOR_COLLAPSE(n) PRAGMA_MACRO(omp parallel for collapse(n)) #else // DNNL_CPU_THREADING_RUNTIME == DNNL_RUNTIME_OMP #define PRAGMA_OMP_PARALLEL_FOR_COLLAPSE(n) #endif dnnl::engine::kind validate_engine_kind(dnnl::engine::kind akind) { // Checking if a GPU exists on the machine if (akind == dnnl::engine::kind::gpu) { if (dnnl::engine::get_count(dnnl::engine::kind::gpu) == 0) { std::cout << "Application couldn't find GPU, please run with CPU " "instead.\n"; exit(0); } } return akind; } // Exception class to indicate that the example uses a feature that is not // available on the current systems. It is not treated as an error then, but // just notifies a user. struct example_allows_unimplemented : public std::exception { example_allows_unimplemented(const char *message) noexcept : message(message) {} const char *what() const noexcept override { return message; } const char *message; }; inline const char *engine_kind2str_upper(dnnl::engine::kind kind); // Runs example function with signature void() and catches errors. // Returns `0` on success, `1` or oneDNN error, and `2` on example error. inline int handle_example_errors( std::initializer_list engine_kinds, std::function example) { int exit_code = 0; try { example(); } catch (example_allows_unimplemented &e) { std::cout << e.message << std::endl; exit_code = 0; } catch (dnnl::error &e) { std::cout << "oneDNN error caught: " << std::endl << "\tStatus: " << dnnl_status2str(e.status) << std::endl << "\tMessage: " << e.what() << std::endl; exit_code = 1; } catch (std::exception &e) { std::cout << "Error in the example: " << e.what() << "." << std::endl; exit_code = 2; } std::string engine_kind_str; for (auto it = engine_kinds.begin(); it != engine_kinds.end(); ++it) { if (it != engine_kinds.begin()) engine_kind_str += "/"; engine_kind_str += engine_kind2str_upper(*it); } std::cout << "Example " << (exit_code ? "failed" : "passed") << " on " << engine_kind_str << "." << std::endl; return exit_code; } // Same as above, but for functions with signature // void(dnnl::engine::kind engine_kind, int argc, char **argv). inline int handle_example_errors( std::function example, dnnl::engine::kind engine_kind, int argc, char **argv) { return handle_example_errors( {engine_kind}, [&]() { example(engine_kind, argc, argv); }); } // Same as above, but for functions with signature void(dnnl::engine::kind). inline int handle_example_errors( std::function example, dnnl::engine::kind engine_kind) { return handle_example_errors( {engine_kind}, [&]() { example(engine_kind); }); } inline dnnl::engine::kind parse_engine_kind( int argc, char **argv, int extra_args = 0) { // Returns default engine kind, i.e. CPU, if none given if (argc == 1) { return validate_engine_kind(dnnl::engine::kind::cpu); } else if (argc <= extra_args + 2) { std::string engine_kind_str = argv[1]; // Checking the engine type, i.e. CPU or GPU if (engine_kind_str == "cpu") { return validate_engine_kind(dnnl::engine::kind::cpu); } else if (engine_kind_str == "gpu") { return validate_engine_kind(dnnl::engine::kind::gpu); } } // If all above fails, the example should be ran properly std::cout << "Inappropriate engine kind." << std::endl << "Please run the example like this: " << argv[0] << " [cpu|gpu]" << (extra_args ? " [extra arguments]" : "") << "." << std::endl; exit(1); } inline const char *engine_kind2str_upper(dnnl::engine::kind kind) { if (kind == dnnl::engine::kind::cpu) return "CPU"; if (kind == dnnl::engine::kind::gpu) return "GPU"; assert(!"not expected"); return ""; } inline dnnl::memory::dim product(const dnnl::memory::dims &dims) { return std::accumulate(dims.begin(), dims.end(), (dnnl::memory::dim)1, std::multiplies()); } // Read from memory, write to handle inline void read_from_dnnl_memory(void *handle, dnnl::memory &mem) { dnnl::engine eng = mem.get_engine(); size_t size = mem.get_desc().get_size(); if (!handle) throw std::runtime_error("handle is nullptr."); #ifdef DNNL_WITH_SYCL bool is_cpu_sycl = (DNNL_CPU_RUNTIME == DNNL_RUNTIME_SYCL && eng.get_kind() == dnnl::engine::kind::cpu); bool is_gpu_sycl = (DNNL_GPU_RUNTIME == DNNL_RUNTIME_SYCL && eng.get_kind() == dnnl::engine::kind::gpu); if (is_cpu_sycl || is_gpu_sycl) { auto mkind = dnnl::sycl_interop::get_memory_kind(mem); if (mkind == dnnl::sycl_interop::memory_kind::buffer) { auto buffer = dnnl::sycl_interop::get_buffer(mem); auto src = buffer.get_access<::sycl::access::mode::read>(); uint8_t *src_ptr = src.get_pointer(); if (!src_ptr) throw std::runtime_error("get_pointer returned nullptr."); for (size_t i = 0; i < size; ++i) ((uint8_t *)handle)[i] = src_ptr[i]; } else { assert(mkind == dnnl::sycl_interop::memory_kind::usm); uint8_t *src_ptr = (uint8_t *)mem.get_data_handle(); if (!src_ptr) throw std::runtime_error("get_data_handle returned nullptr."); if (is_cpu_sycl) { for (size_t i = 0; i < size; ++i) ((uint8_t *)handle)[i] = src_ptr[i]; } else { auto sycl_queue = dnnl::sycl_interop::get_queue(dnnl::stream(eng)); sycl_queue.memcpy(handle, src_ptr, size).wait(); } } return; } #endif #if DNNL_GPU_RUNTIME == DNNL_RUNTIME_OCL if (eng.get_kind() == dnnl::engine::kind::gpu) { void *mapped_ptr = mem.map_data(); if (mapped_ptr) std::memcpy(handle, mapped_ptr, size); mem.unmap_data(mapped_ptr); return; } #endif if (eng.get_kind() == dnnl::engine::kind::cpu) { uint8_t *src = static_cast(mem.get_data_handle()); if (!src) throw std::runtime_error("get_data_handle returned nullptr."); for (size_t i = 0; i < size; ++i) ((uint8_t *)handle)[i] = src[i]; return; } assert(!"not expected"); } // Read from handle, write to memory inline void write_to_dnnl_memory(void *handle, dnnl::memory &mem) { dnnl::engine eng = mem.get_engine(); size_t size = mem.get_desc().get_size(); if (!handle) throw std::runtime_error("handle is nullptr."); #ifdef DNNL_WITH_SYCL bool is_cpu_sycl = (DNNL_CPU_RUNTIME == DNNL_RUNTIME_SYCL && eng.get_kind() == dnnl::engine::kind::cpu); bool is_gpu_sycl = (DNNL_GPU_RUNTIME == DNNL_RUNTIME_SYCL && eng.get_kind() == dnnl::engine::kind::gpu); if (is_cpu_sycl || is_gpu_sycl) { auto mkind = dnnl::sycl_interop::get_memory_kind(mem); if (mkind == dnnl::sycl_interop::memory_kind::buffer) { auto buffer = dnnl::sycl_interop::get_buffer(mem); auto dst = buffer.get_access<::sycl::access::mode::write>(); uint8_t *dst_ptr = dst.get_pointer(); if (!dst_ptr) throw std::runtime_error("get_pointer returned nullptr."); for (size_t i = 0; i < size; ++i) dst_ptr[i] = ((uint8_t *)handle)[i]; } else { assert(mkind == dnnl::sycl_interop::memory_kind::usm); uint8_t *dst_ptr = (uint8_t *)mem.get_data_handle(); if (!dst_ptr) throw std::runtime_error("get_data_handle returned nullptr."); if (is_cpu_sycl) { for (size_t i = 0; i < size; ++i) dst_ptr[i] = ((uint8_t *)handle)[i]; } else { auto sycl_queue = dnnl::sycl_interop::get_queue(dnnl::stream(eng)); sycl_queue.memcpy(dst_ptr, handle, size).wait(); } } return; } #endif #if DNNL_GPU_RUNTIME == DNNL_RUNTIME_OCL if (eng.get_kind() == dnnl::engine::kind::gpu) { void *mapped_ptr = mem.map_data(); if (mapped_ptr) std::memcpy(mapped_ptr, handle, size); mem.unmap_data(mapped_ptr); return; } #endif if (eng.get_kind() == dnnl::engine::kind::cpu) { uint8_t *dst = static_cast(mem.get_data_handle()); if (!dst) throw std::runtime_error("get_data_handle returned nullptr."); for (size_t i = 0; i < size; ++i) dst[i] = ((uint8_t *)handle)[i]; return; } assert(!"not expected"); } #endif
/home/wzy/sycl_workspace/build-cuda-2022-06-debug/bin/clang++ \
-g -fsycl -fsycl-targets=nvptx64-nvidia-cuda -Xsycl-target-backend --offload-arch=sm_70 matmul.cpp -o matmul-v32-ano.out -ldnnl

Observed behavior

The output of run the program is as follow.

matmul-v32.out: /home/wzy/sycl_workspace/oneDNN/src/common/utils.hpp:474: derived_type dnnl::impl::utils::downcast(base_type *) [derived_type = dnnl::impl::sycl::sycl_buffer_memory_storage_t *, base_type = dnnl::impl::memory_storage_t]: Assertion `dynamic_cast<derived_type>(base) == base' failed.
Aborted (core dumped)

Expected behavior

Document behavior you expect.

dzarukin commented 6 months ago

Hi @wangzy0327, have you tried the latest version of oneDNN?

wangzy0327 commented 6 months ago

@dzarukin I need to specify a stable version of oneDNN and it needs to be compiled into a debug version. I have tried to compiled oneDNN v3.2 /v3.4 on Nvidia sycl-platform in debug version.But it failed. like the issue-5980

dzarukin commented 6 months ago

@dzarukin I need to specify a stable version of oneDNN and it needs to be compiled into a debug version. I have tried to compiled oneDNN v3.2 /v3.4 on Nvidia sycl-platform in debug version.But it failed. like the issue-5980

It doesn't have to be the latest and greatest version of the compiler. Could you try the older one, please? I'd expect the one from half year ago should still compile the latest oneDNN.