[BUG][MetaSchedule] ResNet Integration Test Error on Cuda

Hi, I was using the main branch code and running integration test on test_reset_end_to_end_cuda.py. I changed the target from rtx-3080 to rtx-2080-ti for testing usage and got the following error when running the test.
One or more operators have not been tuned. Please tune your model for better performance. Use DEBUG logging level to see more details.
Traceback (most recent call last):
  File "test_resnet_end_to_end_cuda.py", line 214, in <module>
    test_end_to_end_resnet("resnet_cuda.json")
  File "test_resnet_end_to_end_cuda.py", line 149, in test_end_to_end_resnet
    lib_std = relay.build_module.build(mod, target, params=params)
  File "/home/zxybazh/tvm-tensorir/python/tvm/relay/build_module.py", line 363, in build
    executor_config, runtime_mod, params = bld_mod.build(
  File "/home/zxybazh/tvm-tensorir/python/tvm/relay/build_module.py", line 174, in build
    self._build(mod, target, target_host, executor, mod_name)
  File "/home/zxybazh/tvm-tensorir/python/tvm/_ffi/_ctypes/packed_func.py", line 237, in __call__
    raise get_last_ffi_error()
AttributeError: Traceback (most recent call last):
  36: TVMFuncCall
  35: _ZNSt17_Function_handlerIFvN
  34: tvm::relay::backend::RelayBuildModule::GetFunction(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, tvm::runtime::ObjectPtr<tvm::runtime::Object> const&)::{lambda(tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue*)#3}::operator()(tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue*) const
  33: tvm::relay::backend::RelayBuildModule::BuildRelay(tvm::IRModule, std::unordered_map<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, tvm::runtime::NDArray, std::hash<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::equal_to<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, tvm::runtime::NDArray> > > const&, tvm::runtime::String)
  32: std::_Function_handler<void (tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue*), tvm::relay::backend::GraphExecutorCodegenModule::GetFunction(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, tvm::runtime::ObjectPtr<tvm::runtime::Object> const&)::{lambda(tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue*)#2}>::_M_invoke(std::_Any_data const&, tvm::runtime::TVMArgs&&, tvm::runtime::TVMRetValue*&&)
  31: tvm::relay::backend::GraphExecutorCodegen::Codegen(tvm::relay::Function, tvm::runtime::String)
  30: tvm::relay::tec::LowerTE(tvm::IRModule const&, std::unordered_map<DLDeviceType, tvm::Target, tvm::relay::tec::EnumClassHash, std::equal_to<DLDeviceType>, std::allocator<std::pair<DLDeviceType const, tvm::Target> > >, std::unordered_map<tvm::RelayExpr, DLDevice, tvm::runtime::ObjectPtrHash, tvm::runtime::ObjectPtrEqual, std::allocator<std::pair<tvm::RelayExpr const, DLDevice> > >, tvm::relay::backend::StaticMemoryPlan, tvm::runtime::String const&, std::function<void (tvm::relay::Function)>)
  29: tvm::transform::Pass::operator()(tvm::IRModule) const
  28: tvm::transform::Pass::operator()(tvm::IRModule, tvm::transform::PassContext const&) const
  27: tvm::relay::transform::FunctionPassNode::operator()(tvm::IRModule, tvm::transform::PassContext const&) const
  26: std::_Function_handler<void (tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue*), tvm::runtime::TypedPackedFunc<tvm::relay::Function (tvm::relay::Function, tvm::IRModule, tvm::transform::PassContext)>::AssignTypedLambda<tvm::relay::tec::LowerTE(tvm::IRModule const&, std::unordered_map<DLDeviceType, tvm::Target, tvm::relay::tec::EnumClassHash, std::equal_to<DLDeviceType>, std::allocator<std::pair<DLDeviceType const, tvm::Target> > >, std::unordered_map<tvm::RelayExpr, DLDevice, tvm::runtime::ObjectPtrHash, tvm::runtime::ObjectPtrEqual, std::allocator<std::pair<tvm::RelayExpr const, DLDevice> > >, tvm::relay::backend::StaticMemoryPlan, tvm::runtime::String const&, std::function<void (tvm::relay::Function)>)::{lambda(tvm::relay::Function, tvm::IRModule, tvm::transform::PassContext)#1}>(tvm::relay::tec::LowerTE(tvm::IRModule const&, std::unordered_map<DLDeviceType, tvm::Target, tvm::relay::tec::EnumClassHash, std::equal_to<DLDeviceType>, std::allocator<std::pair<DLDeviceType const, tvm::Target> > >, std::unordered_map<tvm::RelayExpr, DLDevice, tvm::runtime::ObjectPtrHash, tvm::runtime::ObjectPtrEqual, std::allocator<std::pair<tvm::RelayExpr const, DLDevice> > >, tvm::relay::backend::StaticMemoryPlan, tvm::runtime::String const&, std::function<void (tvm::relay::Function)>)::{lambda(tvm::relay::Function, tvm::IRModule, tvm::transform::PassContext)#1})::{lambda(tvm::runtime::TVMArgs const&, tvm::runtime::TVMRetValue*)#1}>::_M_invoke(std::_Any_data const&, tvm::runtime::TVMArgs&&, tvm::runtime::TVMRetValue*&&)
  25: tvm::relay::ExprMutator::VisitExpr(tvm::RelayExpr const&)
  24: tvm::relay::ExprFunctor<tvm::RelayExpr (tvm::RelayExpr const&)>::VisitExpr(tvm::RelayExpr const&)
  23: _ZZN3tvm5relay11ExprFunctorIFNS_9RelayExprERKS2_EE10InitVTableEvENUlR
  22: tvm::relay::ExprMutator::VisitExpr_(tvm::relay::FunctionNode const*)
  21: tvm::relay::ExprMutator::VisitExpr(tvm::RelayExpr const&)
  20: tvm::relay::ExprFunctor<tvm::RelayExpr (tvm::RelayExpr const&)>::VisitExpr(tvm::RelayExpr const&)
  19: _ZZN3tvm5relay11ExprFunctorIFNS_9RelayExprERKS2_EE10InitVTableEvENUlR
  18: tvm::relay::tec::LowerTensorExpr::VisitExpr_(tvm::relay::CallNode const*)
  17: tvm::relay::ExprMutator::VisitExpr(tvm::RelayExpr const&)
  16: tvm::relay::ExprFunctor<tvm::RelayExpr (tvm::RelayExpr const&)>::VisitExpr(tvm::RelayExpr const&)
  15: _ZZN3tvm5relay11ExprFunctorIFNS_9RelayExprERKS2_EE10InitVTableEvENUlR
  14: tvm::relay::tec::LowerTensorExpr::VisitExpr_(tvm::relay::CallNode const*)
  13: tvm::relay::tec::TECompilerImpl::Lower(tvm::relay::tec::CCacheKey const&, tvm::runtime::String)
  12: tvm::relay::tec::TECompilerImpl::Lower(tvm::relay::tec::CCacheKey const&, std::function<tvm::runtime::String (tvm::runtime::String)>)
  11: tvm::relay::tec::TECompilerImpl::LowerInternal(tvm::relay::tec::CCacheKey const&, std::function<tvm::runtime::String (tvm::runtime::String)>)
  10: tvm::relay::tec::PrimFuncFor(tvm::relay::Function const&, tvm::Target const&, std::function<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > (std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >)>)
  9: tvm::relay::tec::ScheduleBuilder::Create(tvm::relay::Function const&, std::function<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > (std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >)>)
  8: tvm::relay::backend::MemoizedExprTranslator<tvm::runtime::Array<tvm::te::Tensor, void> >::VisitExpr(tvm::RelayExpr const&)
  7: tvm::relay::ExprFunctor<tvm::runtime::Array<tvm::te::Tensor, void> (tvm::RelayExpr const&)>::VisitExpr(tvm::RelayExpr const&)
  6: _ZZN3tvm5relay11ExprFunctorIFNS_7runtime5ArrayINS_2te6TensorEvEERKNS_
  5: tvm::relay::tec::ScheduleBuilder::VisitExpr_(tvm::relay::CallNode const*)
  4: tvm::relay::backend::MemoizedExprTranslator<tvm::runtime::Array<tvm::te::Tensor, void> >::VisitExpr(tvm::RelayExpr const&)
  3: tvm::relay::ExprFunctor<tvm::runtime::Array<tvm::te::Tensor, void> (tvm::RelayExpr const&)>::VisitExpr(tvm::RelayExpr const&)
  2: _ZZN3tvm5relay11ExprFunctorIFNS_7runtime5ArrayINS_2te6TensorEvEERKNS_
  1: tvm::relay::tec::ScheduleBuilder::VisitExpr_(tvm::relay::CallNode const*)
  0: std::_Function_handler<void (tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue*), TVMFuncCreateFromCFunc::{lambda(tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue*)#2}>::_M_invoke(std::_Any_data const&, tvm::runtime::TVMArgs&&, tvm::runtime::TVMRetValue*&&)
  File "/home/zxybazh/tvm-tensorir/python/tvm/_ffi/_ctypes/packed_func.py", line 81, in cfun
    rv = local_pyfunc(*pyargs)
  File "/home/zxybazh/tvm-tensorir/python/tvm/relay/backend/compile_engine.py", line 320, in lower_call
    best_impl, outputs = select_implementation(op, call.attrs, inputs, ret_type, target)
  File "/home/zxybazh/tvm-tensorir/python/tvm/relay/backend/compile_engine.py", line 199, in select_implementation
    all_impls = get_valid_implementations(op, attrs, inputs, out_type, target)
  File "/home/zxybazh/tvm-tensorir/python/tvm/relay/backend/compile_engine.py", line 135, in get_valid_implementations
    strategy = fstrategy(attrs, inputs, out_type, target)
  File "/home/zxybazh/tvm-tensorir/python/tvm/target/generic_func.py", line 46, in __call__
    return _ffi_api.GenericFuncCallFunc(self, *args)
  File "/home/zxybazh/tvm-tensorir/python/tvm/_ffi/_ctypes/packed_func.py", line 237, in __call__
    raise get_last_ffi_error()
  3: TVMFuncCall
  2: std::_Function_handler<void (tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue*), tvm::{lambda(tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue*)#6}>::_M_invoke(std::_Any_data const&, tvm::runtime::TVMArgs&&, tvm::runtime::TVMRetValue*&&)
  1: tvm::GenericFunc::CallPacked(tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue*) const
  0: std::_Function_handler<void (tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue*), TVMFuncCreateFromCFunc::{lambda(tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue*)#2}>::_M_invoke(std::_Any_data const&, tvm::runtime::TVMArgs&&, tvm::runtime::TVMRetValue*&&)
  File "/home/zxybazh/tvm-tensorir/python/tvm/_ffi/_ctypes/packed_func.py", line 81, in cfun
    rv = local_pyfunc(*pyargs)
  File "/home/zxybazh/tvm-tensorir/python/tvm/relay/op/strategy/cuda.py", line 829, in dense_strategy_cuda
    wrap_compute_dense(topi.cuda.dense_small_batch),
AttributeError: module 'tvm.topi.cuda' has no attribute 'dense_small_batch'
tlc-pack / tvm-tensorir

[BUG][MetaSchedule] ResNet Integration Test Error on Cuda #438