Open skyline75489 opened 4 days ago
Seems like an iOS-only issue. The same model works on macOS.
Update: this seems to be ARM64 only. On x86_64 iOS simulator, there's no such crash.
Update: I'm also hitting this on win-x64, with Phi3.5-vision model:
Exception thrown at 0x00007FFC186E53F9 (onnxruntime.dll) in phi3v.exe: 0xC0000005: Access violation reading location 0x000001E3AC256080.
> onnxruntime.dll!onnxruntime::contrib::RunRotaryEmbedding::__l2::<lambda>(__int64 begin, __int64 end) Line 90 C++
[External Code]
onnxruntime.dll!onnxruntime::concurrency::ThreadPool::ParallelForFixedBlockSizeScheduling::__l10::<lambda_1>::operator()(unsigned int idx) Line 433 C++
[External Code]
onnxruntime.dll!onnxruntime::concurrency::ThreadPoolTempl<onnxruntime::Env>::RunInParallel(std::function<void __cdecl(unsigned int)> fn, unsigned int n, __int64 block_size) Line 1310 C++
onnxruntime.dll!onnxruntime::concurrency::ThreadPool::RunInParallel(std::function<void __cdecl(unsigned int)> fn, unsigned int n, __int64 block_size) Line 525 C++
onnxruntime.dll!onnxruntime::concurrency::ThreadPool::ParallelForFixedBlockSizeScheduling(__int64 total, __int64 block_size, const std::function<void __cdecl(__int64,__int64)> & fn) Line 440 C++
onnxruntime.dll!onnxruntime::concurrency::ThreadPool::ParallelFor(__int64 n, const onnxruntime::TensorOpCost & c, const std::function<void __cdecl(__int64,__int64)> & f) Line 626 C++
onnxruntime.dll!onnxruntime::concurrency::ThreadPool::TryParallelFor(onnxruntime::concurrency::ThreadPool * tp, __int64 total, const onnxruntime::TensorOpCost & cost_per_unit, const std::function<void __cdecl(__int64,__int64)> & fn) Line 703 C++
onnxruntime.dll!onnxruntime::concurrency::ThreadPool::TryParallelFor(onnxruntime::concurrency::ThreadPool * tp, __int64 total, double cost_per_unit, const std::function<void __cdecl(__int64,__int64)> & fn) Line 251 C++
onnxruntime.dll!onnxruntime::contrib::RunRotaryEmbedding<float>(onnxruntime::concurrency::ThreadPool * tp, onnxruntime::contrib::rotary_embedding_helper::RotaryParameters parameters, const float * input, const __int64 * position_ids, const float * cos_cache, const float * sin_cache, float * output, bool interleaved) Line 58 C++
onnxruntime.dll!onnxruntime::contrib::GroupQueryAttention<float>::Compute(onnxruntime::OpKernelContext * context) Line 152 C++
onnxruntime.dll!onnxruntime::ExecuteKernel(onnxruntime::StreamExecutionContext & ctx, unsigned __int64 idx, unsigned __int64 stream_idx, const bool & terminate_flag, onnxruntime::SessionScope & session_scope) Line 495 C++
onnxruntime.dll!onnxruntime::LaunchKernelStep::Execute(onnxruntime::StreamExecutionContext & ctx, unsigned __int64 stream_idx, onnxruntime::SessionScope & session_scope, const bool & terminate_flag, bool & continue_flag) Line 73 C++
onnxruntime.dll!onnxruntime::RunSince(unsigned __int64 stream_idx, onnxruntime::StreamExecutionContext & ctx, onnxruntime::SessionScope & session_scope, const bool & terminate_flag, unsigned __int64 since) Line 222 C++
onnxruntime.dll!onnxruntime::ExecuteThePlan::__l23::<lambda>() Line 589 C++
[External Code]
onnxruntime.dll!onnxruntime::concurrency::ThreadPool::Schedule(onnxruntime::concurrency::ThreadPool * tp, std::function<void __cdecl(void)> fn) Line 233 C++
onnxruntime.dll!onnxruntime::ExecuteThePlan(const onnxruntime::SessionState & session_state, gsl::span<int const ,-1> feed_mlvalue_idxs, gsl::span<OrtValue const ,-1> feeds, gsl::span<int const ,-1> fetch_mlvalue_idxs, std::vector<OrtValue,std::allocator<OrtValue>> & fetches, const std::unordered_map<unsigned __int64,std::function<onnxruntime::common::Status __cdecl(onnxruntime::TensorShape const &,OrtDevice const &,OrtValue &,bool &)>,std::hash<unsigned __int64>,std::equal_to<unsigned __int64>,std::allocator<std::pair<unsigned __int64 const ,std::function<onnxruntime::common::Status __cdecl(onnxruntime::TensorShape const &,OrtDevice const &,OrtValue &,bool &)>>>> & fetch_allocators, const onnxruntime::logging::Logger & logger, const onnxruntime::DeviceStreamCollection * device_streams, const bool & terminate_flag, const bool only_execute_path_to_fetches, bool single_thread_mode) Line 588 C++
onnxruntime.dll!onnxruntime::utils::ExecuteGraphImpl(const onnxruntime::SessionState & session_state, const onnxruntime::FeedsFetchesManager & feeds_fetches_manager, gsl::span<OrtValue const ,-1> feeds, std::vector<OrtValue,std::allocator<OrtValue>> & fetches, const std::unordered_map<unsigned __int64,std::function<onnxruntime::common::Status __cdecl(onnxruntime::TensorShape const &,OrtDevice const &,OrtValue &,bool &)>,std::hash<unsigned __int64>,std::equal_to<unsigned __int64>,std::allocator<std::pair<unsigned __int64 const ,std::function<onnxruntime::common::Status __cdecl(onnxruntime::TensorShape const &,OrtDevice const &,OrtValue &,bool &)>>>> & fetch_allocators, ExecutionMode execution_mode, const bool & terminate_flag, const onnxruntime::logging::Logger & logger, onnxruntime::DeviceStreamCollection * device_stream_collection, const bool only_execute_path_to_fetches, onnxruntime::Stream * parent_stream) Line 649 C++
onnxruntime.dll!onnxruntime::utils::ExecuteGraph(const onnxruntime::SessionState & session_state, onnxruntime::FeedsFetchesManager & feeds_fetches_manager, gsl::span<OrtValue const ,-1> feeds, std::vector<OrtValue,std::allocator<OrtValue>> & fetches, ExecutionMode execution_mode, const bool & terminate_flag, const onnxruntime::logging::Logger & logger, onnxruntime::DeviceStreamCollectionHolder & device_stream_collection_holder, bool only_execute_path_to_fetches, onnxruntime::Stream * parent_stream) Line 752 C++
onnxruntime.dll!onnxruntime::utils::ExecuteGraph(const onnxruntime::SessionState & session_state, onnxruntime::FeedsFetchesManager & feeds_fetches_manager, gsl::span<OrtValue const ,-1> feeds, std::vector<OrtValue,std::allocator<OrtValue>> & fetches, ExecutionMode execution_mode, const OrtRunOptions & run_options, onnxruntime::DeviceStreamCollectionHolder & device_stream_collection_holder, const onnxruntime::logging::Logger & logger) Line 774 C++
onnxruntime.dll!onnxruntime::InferenceSession::Run(const OrtRunOptions & run_options, gsl::span<std::string const ,-1> feed_names, gsl::span<OrtValue const ,-1> feeds, gsl::span<std::string const ,-1> output_names, std::vector<OrtValue,std::allocator<OrtValue>> * p_fetches, const std::vector<OrtDevice,std::allocator<OrtDevice>> * p_fetches_device_info) Line 2592 C++
onnxruntime.dll!onnxruntime::InferenceSession::Run(const OrtRunOptions & run_options, gsl::span<char const * const,-1> feed_names, gsl::span<OrtValue const * const,-1> feeds, gsl::span<char const * const,-1> fetch_names, gsl::span<OrtValue *,-1> fetches) Line 2720 C++
onnxruntime.dll!OrtApis::Run(OrtSession * sess, const OrtRunOptions * run_options, const char * const * input_names, const OrtValue * const * input, unsigned __int64 input_len, const char * const * output_names, unsigned __int64 output_names_len, OrtValue * * output) Line 831 C++
[Inline Frame] onnxruntime-genai.dll!OrtSession::Run(const OrtRunOptions *) Line 819 C++
onnxruntime-genai.dll!Generators::State::Run(OrtSession & session, OrtRunOptions & run_options, int new_batch_size) Line 69 C++
onnxruntime-genai.dll!Generators::DecoderState::Run(int current_length, Generators::RoamingArray<int> next_tokens, Generators::RoamingArray<int> next_indices) Line 131 C++
onnxruntime-genai.dll!Generators::MultiModalPipelineState::Run(int current_length, Generators::RoamingArray<int> next_tokens, Generators::RoamingArray<int> next_indices) Line 173 C++
onnxruntime-genai.dll!Generators::Generator::ComputeLogits() Line 161 C++
onnxruntime-genai.dll!OgaGenerator_ComputeLogits(OgaGenerator * generator) Line 257 C++
phi3v.exe!OgaGenerator::ComputeLogits() Line 240 C++
phi3v.exe!CXX_API(const char * model_path) Line 80 C++
phi3v.exe!main(int argc, char * * argv) Line 206 C++
[External Code]
Describe the bug
Introduced in #907, when there is no EP specified,
use_env_allocators
will betrue
. This crashes the inference on iOS with the following stacktrace:Additional context
The model is Phi3-mini-4k-instruct-onnx-cpu-int4-rtn-block-32-acc-level-4.
Tested it with ORT 1.19.0 and ORT 1.19.2. The crash happens with both versions.