From 567a3980bd9e5c448ce7af7babdfb1128787d15d Mon Sep 17 00:00:00 2001 From: "Jiang, Zhiwei" Date: Mon, 6 Jan 2025 16:27:32 +0800 Subject: [PATCH] [SYCLomatic] Fix macro and use rewriter to impl cudaFree/cublasFree (#2590) Signed-off-by: Jiang, Zhiwei --- .../DPCT/RuleInfra/CallExprRewriterCommon.h | 14 +-- clang/lib/DPCT/RulesLang/APINamesMemory.inc | 90 +++++++++++++++++++ .../DPCT/RulesLang/CallExprRewriterMemory.cpp | 3 +- clang/lib/DPCT/RulesLang/RulesLang.cpp | 45 +--------- clang/lib/DPCT/Utility.cpp | 4 +- clang/test/dpct/macro_test.cu | 68 ++++++++++++++ clang/test/dpct/texture_layered.cu | 2 +- 7 files changed, 176 insertions(+), 50 deletions(-) diff --git a/clang/lib/DPCT/RuleInfra/CallExprRewriterCommon.h b/clang/lib/DPCT/RuleInfra/CallExprRewriterCommon.h index 429aee4bfbd1..2c5c713be43c 100644 --- a/clang/lib/DPCT/RuleInfra/CallExprRewriterCommon.h +++ b/clang/lib/DPCT/RuleInfra/CallExprRewriterCommon.h @@ -280,16 +280,20 @@ class MemArgExpr { MemArgExpr() = default; std::pair getMemAPIVarNameAndArrayOffset(const Expr *) const; - -public: const Expr *E = nullptr; + const CallExpr *CE = nullptr; +public: template void print(StreamT &Stream) const { auto P = getMemAPIVarNameAndArrayOffset(E); std::string VarName = P.first; std::string ArrayOffset = P.second; - clang::dpct::print(Stream, E); + ArgumentAnalysis AA; + AA.setCallSpelling(CE); + AA.analyze(E); + Stream << AA.getRewritePrefix() << AA.getRewriteString() + << AA.getRewritePostfix(); if (VarName.empty()) return; @@ -301,13 +305,13 @@ class MemArgExpr { Stream << " + " << ArrayOffset; } - static MemArgExpr create(const Expr *E); + static MemArgExpr create(const CallExpr *CE, const Expr *E); }; inline std::function makeMemArgCallArgCreator(unsigned Idx) { return [=](const CallExpr *C) -> MemArgExpr { - return MemArgExpr::create(C->getArg(Idx)); + return MemArgExpr::create(C, C->getArg(Idx)); }; } diff --git a/clang/lib/DPCT/RulesLang/APINamesMemory.inc b/clang/lib/DPCT/RulesLang/APINamesMemory.inc index 5569cd8e0639..d340070b1af7 100644 --- a/clang/lib/DPCT/RulesLang/APINamesMemory.inc +++ b/clang/lib/DPCT/RulesLang/APINamesMemory.inc @@ -932,3 +932,93 @@ CONDITIONAL_FACTORY_ENTRY( "cudaMemcpy", CALL(MemoryMigrationRule::getMemoryHelperFunctionName( "memcpy", false), MEM_ARG(0), MEM_ARG(1), ARG(2), ARG(3)))))) + +#define CUDA_FREE(NAME) \ + CONDITIONAL_FACTORY_ENTRY( \ + hasManagedAttr(0), \ + CONDITIONAL_FACTORY_ENTRY( \ + checkIsUSM(), \ + CONDITIONAL_FACTORY_ENTRY( \ + [](const CallExpr *) { \ + return DpctGlobalInfo::isOptimizeMigration(); \ + }, \ + ASSIGNABLE_FACTORY(FEATURE_REQUEST_FACTORY( \ + HelperFeatureEnum::device_ext, \ + CALL_FACTORY_ENTRY( \ + NAME, CALL(MapNames::getClNamespace() + "free", \ + makeCombinedArg( \ + makeCombinedArg(ARG("*("), ARG_WC(0)), \ + ARG(".get_ptr())")), \ + QUEUESTR)))), \ + CONDITIONAL_FACTORY_ENTRY( \ + [](const CallExpr *) { \ + return DpctGlobalInfo::useNoQueueDevice(); \ + }, \ + MULTI_STMTS_FACTORY_ENTRY( \ + NAME, true, true, true, true, \ + MEMBER_CALL(QUEUESTR, false, "wait_and_throw"), \ + CALL(MapNames::getClNamespace() + "free", \ + makeCombinedArg( \ + makeCombinedArg(ARG("*("), ARG_WC(0)), \ + ARG(".get_ptr())")), \ + QUEUESTR)), \ + ASSIGNABLE_FACTORY(FEATURE_REQUEST_FACTORY( \ + HelperFeatureEnum::device_ext, \ + CALL_FACTORY_ENTRY( \ + NAME, \ + CALL(MapNames::getDpctNamespace() + \ + (DpctGlobalInfo::useSYCLCompat() \ + ? "wait_and_free" \ + : "dpct_free"), \ + makeCombinedArg( \ + makeCombinedArg(ARG("*("), ARG_WC(0)), \ + ARG(".get_ptr())")), \ + QUEUESTR)))))), \ + ASSIGNABLE_FACTORY(FEATURE_REQUEST_FACTORY( \ + HelperFeatureEnum::device_ext, \ + CALL_FACTORY_ENTRY( \ + NAME, \ + CALL(MapNames::getDpctNamespace() + \ + (DpctGlobalInfo::useSYCLCompat() ? "wait_and_free" \ + : "dpct_free"), \ + makeCombinedArg(makeCombinedArg(ARG("*("), ARG_WC(0)), \ + ARG(".get_ptr())"))))))), \ + CONDITIONAL_FACTORY_ENTRY( \ + checkIsUSM(), \ + CONDITIONAL_FACTORY_ENTRY( \ + [](const CallExpr *) { \ + return DpctGlobalInfo::isOptimizeMigration(); \ + }, \ + ASSIGNABLE_FACTORY(FEATURE_REQUEST_FACTORY( \ + HelperFeatureEnum::device_ext, \ + CALL_FACTORY_ENTRY(NAME, \ + CALL(MapNames::getClNamespace() + "free", \ + ARG_WC(0), QUEUESTR)))), \ + CONDITIONAL_FACTORY_ENTRY( \ + [](const CallExpr *) { \ + return DpctGlobalInfo::useNoQueueDevice(); \ + }, \ + MULTI_STMTS_FACTORY_ENTRY( \ + NAME, true, true, true, true, \ + MEMBER_CALL(QUEUESTR, false, "wait_and_throw"), \ + CALL(MapNames::getClNamespace() + "free", ARG_WC(0), \ + QUEUESTR)), \ + ASSIGNABLE_FACTORY(FEATURE_REQUEST_FACTORY( \ + HelperFeatureEnum::device_ext, \ + CALL_FACTORY_ENTRY( \ + NAME, CALL(MapNames::getDpctNamespace() + \ + (DpctGlobalInfo::useSYCLCompat() \ + ? "wait_and_free" \ + : "dpct_free"), \ + ARG_WC(0), QUEUESTR)))))), \ + ASSIGNABLE_FACTORY(FEATURE_REQUEST_FACTORY( \ + HelperFeatureEnum::device_ext, \ + CALL_FACTORY_ENTRY( \ + NAME, \ + CALL(MapNames::getDpctNamespace() + \ + (DpctGlobalInfo::useSYCLCompat() ? "wait_and_free" \ + : "dpct_free"), \ + ARG_WC(0))))))) +CUDA_FREE("cudaFree") +CUDA_FREE("cublasFree") +#undef CUDA_FREE diff --git a/clang/lib/DPCT/RulesLang/CallExprRewriterMemory.cpp b/clang/lib/DPCT/RulesLang/CallExprRewriterMemory.cpp index 904be3bf9191..675190216b56 100644 --- a/clang/lib/DPCT/RulesLang/CallExprRewriterMemory.cpp +++ b/clang/lib/DPCT/RulesLang/CallExprRewriterMemory.cpp @@ -50,9 +50,10 @@ MemArgExpr::getMemAPIVarNameAndArrayOffset(const Expr *) const { return MemoryMigrationRule::getMemAPIVarNameAndArrayOffset(E); }; -MemArgExpr MemArgExpr::create(const Expr *E) { +MemArgExpr MemArgExpr::create(const CallExpr *CE, const Expr *E) { MemArgExpr MAE; MAE.E = E; + MAE.CE = CE; return MAE; } diff --git a/clang/lib/DPCT/RulesLang/RulesLang.cpp b/clang/lib/DPCT/RulesLang/RulesLang.cpp index ff40fd77763a..81c8001ac94c 100644 --- a/clang/lib/DPCT/RulesLang/RulesLang.cpp +++ b/clang/lib/DPCT/RulesLang/RulesLang.cpp @@ -6267,47 +6267,7 @@ void MemoryMigrationRule::freeMigration(const MatchFinder::MatchResult &Result, return; } int Index = DpctGlobalInfo::getHelperFuncReplInfoIndexThenInc(); - if (Name == "cudaFree" || Name == "cublasFree") { - if (DpctGlobalInfo::getUsmLevel() == UsmLevel::UL_Restricted) { - ArgumentAnalysis AA; - AA.setCallSpelling(C); - AA.analyze(C->getArg(0)); - auto ArgStr = AA.getRewritePrefix() + AA.getRewriteString() + - AA.getRewritePostfix(); - std::ostringstream Repl; - buildTempVariableMap(Index, C, HelperFuncType::HFT_DefaultQueue); - if (hasManagedAttr(0)(C)) { - ArgStr = "*(" + ArgStr + ".get_ptr())"; - } - auto &SM = DpctGlobalInfo::getSourceManager(); - auto Indent = getIndent(SM.getExpansionLoc(C->getBeginLoc()), SM).str(); - if (DpctGlobalInfo::isOptimizeMigration()) { - Repl << MapNames::getClNamespace() << "free"; - } else { - if (DpctGlobalInfo::useNoQueueDevice()) { - Repl << Indent << "{{NEEDREPLACEQ" << std::to_string(Index) - << "}}.wait_and_throw();\n" - << Indent << MapNames::getClNamespace() << "free"; - } else { - requestFeature(HelperFeatureEnum::device_ext); - Repl << MapNames::getDpctNamespace(); - if (DpctGlobalInfo::useSYCLCompat()) - Repl << "wait_and_free"; - else - Repl << "dpct_free"; - } - } - Repl << "(" << ArgStr - << ", {{NEEDREPLACEQ" + std::to_string(Index) + "}})"; - emplaceTransformation(new ReplaceStmt(C, std::move(Repl.str()))); - } else { - requestFeature(HelperFeatureEnum::device_ext); - emplaceTransformation(new ReplaceCalleeName( - C, MapNames::getDpctNamespace() + (DpctGlobalInfo::useSYCLCompat() - ? "wait_and_free" - : "dpct_free"))); - } - } else if (Name == "cudaFreeHost" || Name == "cuMemFreeHost") { + if (Name == "cudaFreeHost" || Name == "cuMemFreeHost") { if (DpctGlobalInfo::getUsmLevel() == UsmLevel::UL_Restricted) { CheckCanUseCLibraryMallocOrFree Checker(0, true); ExprAnalysis EA; @@ -6817,7 +6777,8 @@ void MemoryMigrationRule::runRule(const MatchFinder::MatchResult &Result) { Name.compare("cuMemAllocPitch_v2") && Name.compare("cuMemAlloc_v2") && Name.compare("cudaMallocMipmappedArray") && Name.compare("cudaGetMipmappedArrayLevel") && - Name.compare("cudaFreeMipmappedArray") && Name.compare("cudaMemcpy")) { + Name.compare("cudaFreeMipmappedArray") && Name.compare("cudaMemcpy") && + Name.compare("cudaFree") && Name.compare("cublasFree")) { requestFeature(HelperFeatureEnum::device_ext); insertAroundStmt(C, MapNames::getCheckErrorMacroName() + "(", ")"); } else if (IsAssigned && !Name.compare("cudaMemAdvise") && diff --git a/clang/lib/DPCT/Utility.cpp b/clang/lib/DPCT/Utility.cpp index 9d0f39922997..e9b2e30b6e17 100644 --- a/clang/lib/DPCT/Utility.cpp +++ b/clang/lib/DPCT/Utility.cpp @@ -2264,10 +2264,12 @@ getRangeInRange(SourceRange Range, SourceLocation SearchRangeBegin, // the behavior of immediateExpansion is different: // 1. string_literal created with "#" does not include the last token // 2. greatergreatergreater of template does include the last token + // 3. raw_identifier created with "##" is similar as (1) // We need to process the last token according to the token kind. if (IncludeLastToken && (!SM.isWrittenInScratchSpace(SM.getSpellingLoc(Range.getEnd())) || - Tok.getKind() == tok::TokenKind::string_literal)) { + Tok.getKind() == tok::TokenKind::string_literal || + Tok.getKind() == tok::TokenKind::raw_identifier)) { auto LastTokenLength = Lexer::MeasureTokenLength(ResultEnd, SM, Context.getLangOpts()); ResultEnd = ResultEnd.getLocWithOffset(LastTokenLength); diff --git a/clang/test/dpct/macro_test.cu b/clang/test/dpct/macro_test.cu index 8a8779734197..0aac0863e535 100644 --- a/clang/test/dpct/macro_test.cu +++ b/clang/test/dpct/macro_test.cu @@ -1444,4 +1444,72 @@ void foo43() { #undef TOO_SMALL #undef JUST_RIGHT +// CHECK: #define TODEV(A, s) \ +//CHECK-NEXT: A = (float *)malloc((s) * sizeof(float)); \ +//CHECK-NEXT: for (int i = 0; i < s; i++) A[i] = 0.001; \ +//CHECK-NEXT: float *A##_d; \ +//CHECK-NEXT: A##_d = sycl::malloc_device(((s)), dpct::get_in_order_queue()); \ +//CHECK-NEXT: dpct::get_in_order_queue().memcpy(A##_d, A, (s) * sizeof(float)).wait(); +# define TODEV(A,s) A = (float*) malloc ((s) * sizeof(float)); \ + for (int i = 0; i < s; i++) A[i] = 0.001; \ + float *A##_d;\ + cudaMalloc((void**)&A##_d,((s))*sizeof(float));\ + cudaMemcpy(A##_d, A, (s)*sizeof(float), cudaMemcpyHostToDevice); + +// CHECK: #define FROMDEV(A, s) \ +//CHECK-NEXT: dpct::get_in_order_queue().memcpy(A, A##_d, (s) * sizeof(float)).wait(); +# define FROMDEV(A,s) cudaMemcpy(A, A##_d, (s)*sizeof(float), cudaMemcpyDeviceToHost); + +// CHECK: #define FREE(A) \ +//CHECK-NEXT: free(A); \ +//CHECK-NEXT: dpct::dpct_free(A##_d, q_ct1) +# define FREE(A) free(A);\ + cudaFree(A##_d) + +// CHECK: # define TODEV3(A) TODEV(A,d3) +//CHECK-NEXT: # define TODEV2(A) TODEV(A,d2) +//CHECK-NEXT: # define FROMDEV3(A) FROMDEV(A,d3) +//CHECK-NEXT: # define FROMDEV2(A) FROMDEV(A,d2) +# define TODEV3(A) TODEV(A,d3) +# define TODEV2(A) TODEV(A,d2) +# define FROMDEV3(A) FROMDEV(A,d3) +# define FROMDEV2(A) FROMDEV(A,d2) + +// CHECK: void foo44(float *x, int size, int d3, int d2) { +//CHECK-NEXT: dpct::device_ext &dev_ct1 = dpct::get_current_device(); +//CHECK-NEXT: sycl::queue &q_ct1 = dev_ct1.in_order_queue(); +//CHECK-NEXT: TODEV(x, size) +//CHECK-NEXT: FROMDEV(x, size) +//CHECK-NEXT: FREE(x); +//CHECK-NEXT: { +//CHECK-NEXT: TODEV3(x) +//CHECK-NEXT: } +//CHECK-NEXT: { +//CHECK-NEXT: TODEV2(x) +//CHECK-NEXT: } +//CHECK-NEXT: FROMDEV3(x) +//CHECK-NEXT: FROMDEV2(x) +//CHECK-NEXT: } +void foo44(float *x, int size, int d3, int d2) { + TODEV(x, size) + FROMDEV(x, size) + FREE(x); + { + TODEV3(x) + } + { + TODEV2(x) + } + FROMDEV3(x) + FROMDEV2(x) +} + +#undef TODEV +#undef FROMDEV +#undef FREE +#undef TODEV3 +#undef TODEV2 +#undef FROMDEV3 +#undef FROMDEV2 + #endif diff --git a/clang/test/dpct/texture_layered.cu b/clang/test/dpct/texture_layered.cu index 30d928175b4e..86ff15e4b3c8 100644 --- a/clang/test/dpct/texture_layered.cu +++ b/clang/test/dpct/texture_layered.cu @@ -103,7 +103,7 @@ int main() { cudaMalloc(&d, sizeof(float4) * 4); // CHECK: tex42.create_image(); // CHECK: tex21.create_image(); - // CHECK: dpct::get_out_of_order_queue().submit( + // CHECK: dpct::get_out_of_order_queue().submit( // CHECK-NEXT: [&](sycl::handler &cgh) { // CHECK-NEXT: auto d_acc_ct0 = dpct::get_access(d, cgh); // CHECK-EMPTY: