From 726ff4286bd2f92160f6156722aa5b790bbba02d Mon Sep 17 00:00:00 2001 From: Daming Feng Date: Sat, 24 Feb 2024 04:09:11 +0000 Subject: [PATCH 1/6] fix failed error bugs in 2d/3d conv backward weight solvers --- .../miopen/solver/implicitgemm_ck_util.hpp | 95 ++++++++++++++++--- 1 file changed, 80 insertions(+), 15 deletions(-) diff --git a/src/include/miopen/solver/implicitgemm_ck_util.hpp b/src/include/miopen/solver/implicitgemm_ck_util.hpp index 2199d88b01..fc1320ecf1 100644 --- a/src/include/miopen/solver/implicitgemm_ck_util.hpp +++ b/src/include/miopen/solver/implicitgemm_ck_util.hpp @@ -29,6 +29,7 @@ #include #include #include +#include #if MIOPEN_USE_COMPOSABLEKERNEL #include @@ -96,6 +97,25 @@ bool IsCKApplicable(const ProblemDescriptionType& problem) ptrs.begin(), ptrs.end(), [&args](auto& ptr) { return args.IsSupportedBy(ptr); }); } +#if MIOPEN_BACKEND_HIP +inline void ProfilingRecordStart(const Handle& handle, HipEventPtr& start, HipEventPtr& stop) +{ + start = make_hip_event(); + stop = make_hip_event(); + hipEventRecord(start.get(), handle.GetStream()); +} + +inline void ProfilingRecordStop(const Handle& handle, HipEventPtr& start, HipEventPtr& stop) +{ + hipEventRecord(stop.get(), handle.GetStream()); + hipEventSynchronize(stop.get()); + float mS = 0.0f; + hipEventElapsedTime(&mS, start.get(), stop.get()); + handle.ResetKernelTime(); + handle.AccumKernelTime(mS); +} +#endif + template &) mutable { - return [ck_args = std::move(ck_args), sh_conv_ptr = std::move(sh_conv_ptr)]( - const Handle& handle, const AnyInvokeParams& primitive_parameters) { - const auto& data_ctx = primitive_parameters.CastTo(); - auto argument_ptr = ck_args.MakeArgPtr(sh_conv_ptr, data_ctx.tensors); - auto invoker_ptr = sh_conv_ptr->MakeInvokerPointer(); + result.invoker_factory = [ck_args = CKArgsType{problem}, + sh_conv_ptr = std::shared_ptr{std::move(*ptr_iter)}]( + const std::vector&) mutable { + return [ck_args = std::move(ck_args), sh_conv_ptr = std::move(sh_conv_ptr)]( + const Handle& handle, const AnyInvokeParams& primitive_parameters) { + const auto& data_ctx = primitive_parameters.CastTo(); + auto argument_ptr = ck_args.MakeArgPtr(sh_conv_ptr, data_ctx.tensors); + auto invoker_ptr = sh_conv_ptr->MakeInvokerPointer(); + if constexpr(std::is_same::value) + { + auto zero = 0.0f; + const auto& tensors = data_ctx.tensors; +#if MIOPEN_BACKEND_HIP + HipEventPtr start = nullptr; + HipEventPtr stop = nullptr; + if(handle.IsProfilingEnabled()) + { + ProfilingRecordStart(handle, start, stop); + } +#endif + SetTensor(handle, tensors.dwDesc, tensors.dw, &zero); + invoker_ptr->Run(argument_ptr.get(), {handle.GetStream(), false}); +#if MIOPEN_BACKEND_HIP + if(handle.IsProfilingEnabled()) + ProfilingRecordStop(handle, start, stop); +#endif + } + else + { const auto enable_profiling = handle.IsProfilingEnabled(); float elapsed_time = invoker_ptr->Run(argument_ptr.get(), {handle.GetStream(), enable_profiling}); @@ -131,8 +172,9 @@ ConvSolution InitInvokerFactoryNHWC(const ExecutionContext&, handle.ResetKernelTime(); handle.AccumKernelTime(elapsed_time); } - }; + } }; + }; return result; } @@ -583,6 +625,11 @@ ConvSolution InitInvokerFactoryNCHW(const ExecutionContext& ctx, handle.ResetKernelTime(); const auto& data_ctx = primitive_parameters.CastTo(); + if constexpr(std::is_same::value) + { + auto zero = 0.0f; + SetTensor(handle, data_ctx.tensors.dwDesc, data_ctx.tensors.dw, &zero); + } if(!data_ctx.workSpace) { @@ -632,15 +679,33 @@ ConvSolution InitInvokerFactoryNCHW(const ExecutionContext& ctx, tr_ptrs[0]->GetBufferPtr(), tr_ptrs[1]->GetBufferPtr(), tr_ptrs[2]->GetBufferPtr()); - float conv_time = 0; - conv_time += invoker_ptr->Run(argument_ptr.get(), - {handle.GetStream(), handle.IsProfilingEnabled()}); - - if(handle.IsProfilingEnabled()) + if constexpr(std::is_same::value) { - handle.AccumKernelTime(conv_time); +#if MIOPEN_BACKEND_HIP + HipEventPtr start = nullptr; + HipEventPtr stop = nullptr; + if(handle.IsProfilingEnabled()) + { + ProfilingRecordStart(handle, start, stop); + } +#endif + invoker_ptr->Run(argument_ptr.get(), {handle.GetStream(), false}); +#if MIOPEN_BACKEND_HIP + if(handle.IsProfilingEnabled()) + ProfilingRecordStop(handle, start, stop); +#endif } + else + { + float conv_time = 0; + conv_time += invoker_ptr->Run(argument_ptr.get(), + {handle.GetStream(), handle.IsProfilingEnabled()}); + if(handle.IsProfilingEnabled()) + { + handle.AccumKernelTime(conv_time); + } + } output_tr_inst.ConvertTo(handle, kernels, conv_tensors); }; }; From 81f29c00af6dcc3e9053390953373dc55dc45117 Mon Sep 17 00:00:00 2001 From: Daming Feng Date: Thu, 29 Feb 2024 03:53:24 +0000 Subject: [PATCH 2/6] fix time issue in NCHW layout invoker --- .../miopen/solver/implicitgemm_ck_util.hpp | 50 ++++--------------- 1 file changed, 11 insertions(+), 39 deletions(-) diff --git a/src/include/miopen/solver/implicitgemm_ck_util.hpp b/src/include/miopen/solver/implicitgemm_ck_util.hpp index fc1320ecf1..ba5baedb8e 100644 --- a/src/include/miopen/solver/implicitgemm_ck_util.hpp +++ b/src/include/miopen/solver/implicitgemm_ck_util.hpp @@ -97,7 +97,6 @@ bool IsCKApplicable(const ProblemDescriptionType& problem) ptrs.begin(), ptrs.end(), [&args](auto& ptr) { return args.IsSupportedBy(ptr); }); } -#if MIOPEN_BACKEND_HIP inline void ProfilingRecordStart(const Handle& handle, HipEventPtr& start, HipEventPtr& stop) { start = make_hip_event(); @@ -114,7 +113,6 @@ inline void ProfilingRecordStop(const Handle& handle, HipEventPtr& start, HipEve handle.ResetKernelTime(); handle.AccumKernelTime(mS); } -#endif template Run(argument_ptr.get(), {handle.GetStream(), false}); -#if MIOPEN_BACKEND_HIP if(handle.IsProfilingEnabled()) ProfilingRecordStop(handle, start, stop); -#endif } else { @@ -625,11 +620,6 @@ ConvSolution InitInvokerFactoryNCHW(const ExecutionContext& ctx, handle.ResetKernelTime(); const auto& data_ctx = primitive_parameters.CastTo(); - if constexpr(std::is_same::value) - { - auto zero = 0.0f; - SetTensor(handle, data_ctx.tensors.dwDesc, data_ctx.tensors.dw, &zero); - } if(!data_ctx.workSpace) { @@ -653,6 +643,12 @@ ConvSolution InitInvokerFactoryNCHW(const ExecutionContext& ctx, std::swap(conv_tensors.xDesc, conv_tensors.yDesc); } + HipEventPtr start = nullptr; + HipEventPtr stop = nullptr; + if(handle.IsProfilingEnabled()) + { + ProfilingRecordStart(handle, start, stop); + } input1_tr_inst.ConvertFrom(handle, kernels, conv_tensors); input2_tr_inst.ConvertFrom(handle, kernels, conv_tensors); @@ -679,34 +675,10 @@ ConvSolution InitInvokerFactoryNCHW(const ExecutionContext& ctx, tr_ptrs[0]->GetBufferPtr(), tr_ptrs[1]->GetBufferPtr(), tr_ptrs[2]->GetBufferPtr()); - if constexpr(std::is_same::value) - { -#if MIOPEN_BACKEND_HIP - HipEventPtr start = nullptr; - HipEventPtr stop = nullptr; - if(handle.IsProfilingEnabled()) - { - ProfilingRecordStart(handle, start, stop); - } -#endif - invoker_ptr->Run(argument_ptr.get(), {handle.GetStream(), false}); -#if MIOPEN_BACKEND_HIP - if(handle.IsProfilingEnabled()) - ProfilingRecordStop(handle, start, stop); -#endif - } - else - { - float conv_time = 0; - conv_time += invoker_ptr->Run(argument_ptr.get(), - {handle.GetStream(), handle.IsProfilingEnabled()}); - - if(handle.IsProfilingEnabled()) - { - handle.AccumKernelTime(conv_time); - } - } + invoker_ptr->Run(argument_ptr.get(), {handle.GetStream(), false}); output_tr_inst.ConvertTo(handle, kernels, conv_tensors); + if(handle.IsProfilingEnabled()) + ProfilingRecordStop(handle, start, stop); }; }; From f6f67d53f2a5a9bb872c6aea0d9e6c4999761126 Mon Sep 17 00:00:00 2001 From: Daming Feng Date: Fri, 1 Mar 2024 20:05:02 +0000 Subject: [PATCH 3/6] code refactoring: define hip event profiler to reduce code duplicate --- .../miopen/solver/implicitgemm_ck_util.hpp | 103 ++++++++---------- 1 file changed, 45 insertions(+), 58 deletions(-) diff --git a/src/include/miopen/solver/implicitgemm_ck_util.hpp b/src/include/miopen/solver/implicitgemm_ck_util.hpp index ba5baedb8e..9777535bb2 100644 --- a/src/include/miopen/solver/implicitgemm_ck_util.hpp +++ b/src/include/miopen/solver/implicitgemm_ck_util.hpp @@ -97,22 +97,31 @@ bool IsCKApplicable(const ProblemDescriptionType& problem) ptrs.begin(), ptrs.end(), [&args](auto& ptr) { return args.IsSupportedBy(ptr); }); } -inline void ProfilingRecordStart(const Handle& handle, HipEventPtr& start, HipEventPtr& stop) +#define WORKAROUND_CK_ISSUE_1184 1 +#ifdef WORKAROUND_CK_ISSUE_1184 +struct HipEventProfiler { - start = make_hip_event(); - stop = make_hip_event(); - hipEventRecord(start.get(), handle.GetStream()); -} + const Handle& handle; + float event_time; + HipEventPtr start; + HipEventPtr stop; -inline void ProfilingRecordStop(const Handle& handle, HipEventPtr& start, HipEventPtr& stop) -{ - hipEventRecord(stop.get(), handle.GetStream()); - hipEventSynchronize(stop.get()); - float mS = 0.0f; - hipEventElapsedTime(&mS, start.get(), stop.get()); - handle.ResetKernelTime(); - handle.AccumKernelTime(mS); -} + HipEventProfiler(const Handle& handle_) : handle(std::move(handle_)), event_time(0.0f) + { + start = make_hip_event(); + stop = make_hip_event(); + hipEventRecord(start.get(), handle.GetStream()); + } + ~HipEventProfiler() + { + hipEventRecord(stop.get(), handle.GetStream()); + hipEventSynchronize(stop.get()); + hipEventElapsedTime(&event_time, start.get(), stop.get()); + handle.ResetKernelTime(); + handle.AccumKernelTime(event_time); + } +}; +#endif template &) mutable { - return [ck_args = std::move(ck_args), sh_conv_ptr = std::move(sh_conv_ptr)]( - const Handle& handle, const AnyInvokeParams& primitive_parameters) { - const auto& data_ctx = primitive_parameters.CastTo(); - auto argument_ptr = ck_args.MakeArgPtr(sh_conv_ptr, data_ctx.tensors); - auto invoker_ptr = sh_conv_ptr->MakeInvokerPointer(); - - if constexpr(std::is_same::value) - { - auto zero = 0.0f; - const auto& tensors = data_ctx.tensors; - SetTensor(handle, tensors.dwDesc, tensors.dw, &zero); - - HipEventPtr start = nullptr; - HipEventPtr stop = nullptr; - if(handle.IsProfilingEnabled()) - { - ProfilingRecordStart(handle, start, stop); - } - invoker_ptr->Run(argument_ptr.get(), {handle.GetStream(), false}); - if(handle.IsProfilingEnabled()) - ProfilingRecordStop(handle, start, stop); - } - else - { - const auto enable_profiling = handle.IsProfilingEnabled(); - float elapsed_time = - invoker_ptr->Run(argument_ptr.get(), {handle.GetStream(), enable_profiling}); - if(enable_profiling) + result.invoker_factory = + [ck_args = CKArgsType{problem}, + sh_conv_ptr = std::shared_ptr{std::move(*ptr_iter)}](const std::vector&) mutable { + return [ck_args = std::move(ck_args), sh_conv_ptr = std::move(sh_conv_ptr)]( + const Handle& handle, const AnyInvokeParams& primitive_parameters) { + const auto& data_ctx = primitive_parameters.CastTo(); + auto argument_ptr = ck_args.MakeArgPtr(sh_conv_ptr, data_ctx.tensors); + auto invoker_ptr = sh_conv_ptr->MakeInvokerPointer(); { - handle.ResetKernelTime(); - handle.AccumKernelTime(elapsed_time); +#ifdef WORKAROUND_CK_ISSUE_1184 + HipEventProfiler pfr(handle); +#endif + if constexpr(std::is_same::value) + { + auto zero = 0.0f; + const auto& tensors = data_ctx.tensors; + SetTensor(handle, tensors.dwDesc, tensors.dw, &zero); + } + invoker_ptr->Run(argument_ptr.get(), {handle.GetStream(), false}); } - } + }; }; - }; return result; } @@ -642,13 +635,9 @@ ConvSolution InitInvokerFactoryNCHW(const ExecutionContext& ctx, std::swap(conv_tensors.x, conv_tensors.y); std::swap(conv_tensors.xDesc, conv_tensors.yDesc); } - - HipEventPtr start = nullptr; - HipEventPtr stop = nullptr; - if(handle.IsProfilingEnabled()) - { - ProfilingRecordStart(handle, start, stop); - } +#ifdef WORKAROUND_CK_ISSUE_1184 + HipEventProfiler pfr(handle); +#endif input1_tr_inst.ConvertFrom(handle, kernels, conv_tensors); input2_tr_inst.ConvertFrom(handle, kernels, conv_tensors); @@ -677,8 +666,6 @@ ConvSolution InitInvokerFactoryNCHW(const ExecutionContext& ctx, tr_ptrs[2]->GetBufferPtr()); invoker_ptr->Run(argument_ptr.get(), {handle.GetStream(), false}); output_tr_inst.ConvertTo(handle, kernels, conv_tensors); - if(handle.IsProfilingEnabled()) - ProfilingRecordStop(handle, start, stop); }; }; From ec136ac578003c0255b544428ad34983f400b51f Mon Sep 17 00:00:00 2001 From: Daming Feng Date: Fri, 1 Mar 2024 20:10:38 +0000 Subject: [PATCH 4/6] delete comments --- src/include/miopen/solver/implicitgemm_ck_util.hpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/include/miopen/solver/implicitgemm_ck_util.hpp b/src/include/miopen/solver/implicitgemm_ck_util.hpp index 9777535bb2..2d0422e5a8 100644 --- a/src/include/miopen/solver/implicitgemm_ck_util.hpp +++ b/src/include/miopen/solver/implicitgemm_ck_util.hpp @@ -644,8 +644,6 @@ ConvSolution InitInvokerFactoryNCHW(const ExecutionContext& ctx, output_init_tr_inst.ConvertFrom(handle, kernels, conv_tensors); - /// \todo: Fix NHWC Wrw invokers to also issue a zero-out kernel. Will - /// need SetTensor() to properly zero out non-packed tensors if(output_tr_inst.GetConvOperandTag() == internal::ConvOperandTag::Weights) { output_tr_inst.ZeroOutBuffer(); From 86549f6fff14aa754eb439bb9fd87f1001f0eb20 Mon Sep 17 00:00:00 2001 From: Daming Feng Date: Fri, 1 Mar 2024 22:26:39 +0000 Subject: [PATCH 5/6] fix tidy error --- src/include/miopen/solver/implicitgemm_ck_util.hpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/include/miopen/solver/implicitgemm_ck_util.hpp b/src/include/miopen/solver/implicitgemm_ck_util.hpp index 2d0422e5a8..1e261cd204 100644 --- a/src/include/miopen/solver/implicitgemm_ck_util.hpp +++ b/src/include/miopen/solver/implicitgemm_ck_util.hpp @@ -106,10 +106,12 @@ struct HipEventProfiler HipEventPtr start; HipEventPtr stop; - HipEventProfiler(const Handle& handle_) : handle(std::move(handle_)), event_time(0.0f) + HipEventProfiler(const Handle& handle_) + : handle(std::move(handle_)), + event_time(0.0f), + start(make_hip_event()), + stop(make_hip_event()) { - start = make_hip_event(); - stop = make_hip_event(); hipEventRecord(start.get(), handle.GetStream()); } ~HipEventProfiler() From 1fb3360254a276d79672a57dfc47f0998541f31a Mon Sep 17 00:00:00 2001 From: Daming Feng Date: Mon, 4 Mar 2024 13:35:40 +0000 Subject: [PATCH 6/6] address comments --- src/include/miopen/solver/implicitgemm_ck_util.hpp | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/src/include/miopen/solver/implicitgemm_ck_util.hpp b/src/include/miopen/solver/implicitgemm_ck_util.hpp index 1e261cd204..efb0d16b96 100644 --- a/src/include/miopen/solver/implicitgemm_ck_util.hpp +++ b/src/include/miopen/solver/implicitgemm_ck_util.hpp @@ -98,7 +98,7 @@ bool IsCKApplicable(const ProblemDescriptionType& problem) } #define WORKAROUND_CK_ISSUE_1184 1 -#ifdef WORKAROUND_CK_ISSUE_1184 +#if WORKAROUND_CK_ISSUE_1184 struct HipEventProfiler { const Handle& handle; @@ -107,10 +107,7 @@ struct HipEventProfiler HipEventPtr stop; HipEventProfiler(const Handle& handle_) - : handle(std::move(handle_)), - event_time(0.0f), - start(make_hip_event()), - stop(make_hip_event()) + : handle(handle_), event_time(0.0f), start(make_hip_event()), stop(make_hip_event()) { hipEventRecord(start.get(), handle.GetStream()); } @@ -152,9 +149,7 @@ ConvSolution InitInvokerFactoryNHWC(const ExecutionContext&, auto argument_ptr = ck_args.MakeArgPtr(sh_conv_ptr, data_ctx.tensors); auto invoker_ptr = sh_conv_ptr->MakeInvokerPointer(); { -#ifdef WORKAROUND_CK_ISSUE_1184 HipEventProfiler pfr(handle); -#endif if constexpr(std::is_same::value) { auto zero = 0.0f; @@ -637,15 +632,14 @@ ConvSolution InitInvokerFactoryNCHW(const ExecutionContext& ctx, std::swap(conv_tensors.x, conv_tensors.y); std::swap(conv_tensors.xDesc, conv_tensors.yDesc); } -#ifdef WORKAROUND_CK_ISSUE_1184 HipEventProfiler pfr(handle); -#endif input1_tr_inst.ConvertFrom(handle, kernels, conv_tensors); input2_tr_inst.ConvertFrom(handle, kernels, conv_tensors); output_init_tr_inst.ConvertFrom(handle, kernels, conv_tensors); + /// \todo: Will need SetTensor() to properly zero out non-packed tensors if(output_tr_inst.GetConvOperandTag() == internal::ConvOperandTag::Weights) { output_tr_inst.ZeroOutBuffer();