Skip to content

Commit

Permalink
[SYCLomatic] Fix macro and use rewriter to impl cudaFree/cublasFree (#…
Browse files Browse the repository at this point in the history
…2590)


Signed-off-by: Jiang, Zhiwei <[email protected]>
  • Loading branch information
zhiweij1 authored Jan 6, 2025
1 parent 49dd16b commit 567a398
Show file tree
Hide file tree
Showing 7 changed files with 176 additions and 50 deletions.
14 changes: 9 additions & 5 deletions clang/lib/DPCT/RuleInfra/CallExprRewriterCommon.h
Original file line number Diff line number Diff line change
Expand Up @@ -280,16 +280,20 @@ class MemArgExpr {
MemArgExpr() = default;
std::pair<std::string, std::string>
getMemAPIVarNameAndArrayOffset(const Expr *) const;

public:
const Expr *E = nullptr;
const CallExpr *CE = nullptr;

public:
template <class StreamT> void print(StreamT &Stream) const {
auto P = getMemAPIVarNameAndArrayOffset(E);
std::string VarName = P.first;
std::string ArrayOffset = P.second;

clang::dpct::print(Stream, E);
ArgumentAnalysis AA;
AA.setCallSpelling(CE);
AA.analyze(E);
Stream << AA.getRewritePrefix() << AA.getRewriteString()
<< AA.getRewritePostfix();
if (VarName.empty())
return;

Expand All @@ -301,13 +305,13 @@ class MemArgExpr {
Stream << " + " << ArrayOffset;
}

static MemArgExpr create(const Expr *E);
static MemArgExpr create(const CallExpr *CE, const Expr *E);
};

inline std::function<MemArgExpr(const CallExpr *)>
makeMemArgCallArgCreator(unsigned Idx) {
return [=](const CallExpr *C) -> MemArgExpr {
return MemArgExpr::create(C->getArg(Idx));
return MemArgExpr::create(C, C->getArg(Idx));
};
}

Expand Down
90 changes: 90 additions & 0 deletions clang/lib/DPCT/RulesLang/APINamesMemory.inc
Original file line number Diff line number Diff line change
Expand Up @@ -932,3 +932,93 @@ CONDITIONAL_FACTORY_ENTRY(
"cudaMemcpy", CALL(MemoryMigrationRule::getMemoryHelperFunctionName(
"memcpy", false),
MEM_ARG(0), MEM_ARG(1), ARG(2), ARG(3))))))

#define CUDA_FREE(NAME) \
CONDITIONAL_FACTORY_ENTRY( \
hasManagedAttr(0), \
CONDITIONAL_FACTORY_ENTRY( \
checkIsUSM(), \
CONDITIONAL_FACTORY_ENTRY( \
[](const CallExpr *) { \
return DpctGlobalInfo::isOptimizeMigration(); \
}, \
ASSIGNABLE_FACTORY(FEATURE_REQUEST_FACTORY( \
HelperFeatureEnum::device_ext, \
CALL_FACTORY_ENTRY( \
NAME, CALL(MapNames::getClNamespace() + "free", \
makeCombinedArg( \
makeCombinedArg(ARG("*("), ARG_WC(0)), \
ARG(".get_ptr())")), \
QUEUESTR)))), \
CONDITIONAL_FACTORY_ENTRY( \
[](const CallExpr *) { \
return DpctGlobalInfo::useNoQueueDevice(); \
}, \
MULTI_STMTS_FACTORY_ENTRY( \
NAME, true, true, true, true, \
MEMBER_CALL(QUEUESTR, false, "wait_and_throw"), \
CALL(MapNames::getClNamespace() + "free", \
makeCombinedArg( \
makeCombinedArg(ARG("*("), ARG_WC(0)), \
ARG(".get_ptr())")), \
QUEUESTR)), \
ASSIGNABLE_FACTORY(FEATURE_REQUEST_FACTORY( \
HelperFeatureEnum::device_ext, \
CALL_FACTORY_ENTRY( \
NAME, \
CALL(MapNames::getDpctNamespace() + \
(DpctGlobalInfo::useSYCLCompat() \
? "wait_and_free" \
: "dpct_free"), \
makeCombinedArg( \
makeCombinedArg(ARG("*("), ARG_WC(0)), \
ARG(".get_ptr())")), \
QUEUESTR)))))), \
ASSIGNABLE_FACTORY(FEATURE_REQUEST_FACTORY( \
HelperFeatureEnum::device_ext, \
CALL_FACTORY_ENTRY( \
NAME, \
CALL(MapNames::getDpctNamespace() + \
(DpctGlobalInfo::useSYCLCompat() ? "wait_and_free" \
: "dpct_free"), \
makeCombinedArg(makeCombinedArg(ARG("*("), ARG_WC(0)), \
ARG(".get_ptr())"))))))), \
CONDITIONAL_FACTORY_ENTRY( \
checkIsUSM(), \
CONDITIONAL_FACTORY_ENTRY( \
[](const CallExpr *) { \
return DpctGlobalInfo::isOptimizeMigration(); \
}, \
ASSIGNABLE_FACTORY(FEATURE_REQUEST_FACTORY( \
HelperFeatureEnum::device_ext, \
CALL_FACTORY_ENTRY(NAME, \
CALL(MapNames::getClNamespace() + "free", \
ARG_WC(0), QUEUESTR)))), \
CONDITIONAL_FACTORY_ENTRY( \
[](const CallExpr *) { \
return DpctGlobalInfo::useNoQueueDevice(); \
}, \
MULTI_STMTS_FACTORY_ENTRY( \
NAME, true, true, true, true, \
MEMBER_CALL(QUEUESTR, false, "wait_and_throw"), \
CALL(MapNames::getClNamespace() + "free", ARG_WC(0), \
QUEUESTR)), \
ASSIGNABLE_FACTORY(FEATURE_REQUEST_FACTORY( \
HelperFeatureEnum::device_ext, \
CALL_FACTORY_ENTRY( \
NAME, CALL(MapNames::getDpctNamespace() + \
(DpctGlobalInfo::useSYCLCompat() \
? "wait_and_free" \
: "dpct_free"), \
ARG_WC(0), QUEUESTR)))))), \
ASSIGNABLE_FACTORY(FEATURE_REQUEST_FACTORY( \
HelperFeatureEnum::device_ext, \
CALL_FACTORY_ENTRY( \
NAME, \
CALL(MapNames::getDpctNamespace() + \
(DpctGlobalInfo::useSYCLCompat() ? "wait_and_free" \
: "dpct_free"), \
ARG_WC(0)))))))
CUDA_FREE("cudaFree")
CUDA_FREE("cublasFree")
#undef CUDA_FREE
3 changes: 2 additions & 1 deletion clang/lib/DPCT/RulesLang/CallExprRewriterMemory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,9 +50,10 @@ MemArgExpr::getMemAPIVarNameAndArrayOffset(const Expr *) const {
return MemoryMigrationRule::getMemAPIVarNameAndArrayOffset(E);
};

MemArgExpr MemArgExpr::create(const Expr *E) {
MemArgExpr MemArgExpr::create(const CallExpr *CE, const Expr *E) {
MemArgExpr MAE;
MAE.E = E;
MAE.CE = CE;
return MAE;
}

Expand Down
45 changes: 3 additions & 42 deletions clang/lib/DPCT/RulesLang/RulesLang.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6267,47 +6267,7 @@ void MemoryMigrationRule::freeMigration(const MatchFinder::MatchResult &Result,
return;
}
int Index = DpctGlobalInfo::getHelperFuncReplInfoIndexThenInc();
if (Name == "cudaFree" || Name == "cublasFree") {
if (DpctGlobalInfo::getUsmLevel() == UsmLevel::UL_Restricted) {
ArgumentAnalysis AA;
AA.setCallSpelling(C);
AA.analyze(C->getArg(0));
auto ArgStr = AA.getRewritePrefix() + AA.getRewriteString() +
AA.getRewritePostfix();
std::ostringstream Repl;
buildTempVariableMap(Index, C, HelperFuncType::HFT_DefaultQueue);
if (hasManagedAttr(0)(C)) {
ArgStr = "*(" + ArgStr + ".get_ptr())";
}
auto &SM = DpctGlobalInfo::getSourceManager();
auto Indent = getIndent(SM.getExpansionLoc(C->getBeginLoc()), SM).str();
if (DpctGlobalInfo::isOptimizeMigration()) {
Repl << MapNames::getClNamespace() << "free";
} else {
if (DpctGlobalInfo::useNoQueueDevice()) {
Repl << Indent << "{{NEEDREPLACEQ" << std::to_string(Index)
<< "}}.wait_and_throw();\n"
<< Indent << MapNames::getClNamespace() << "free";
} else {
requestFeature(HelperFeatureEnum::device_ext);
Repl << MapNames::getDpctNamespace();
if (DpctGlobalInfo::useSYCLCompat())
Repl << "wait_and_free";
else
Repl << "dpct_free";
}
}
Repl << "(" << ArgStr
<< ", {{NEEDREPLACEQ" + std::to_string(Index) + "}})";
emplaceTransformation(new ReplaceStmt(C, std::move(Repl.str())));
} else {
requestFeature(HelperFeatureEnum::device_ext);
emplaceTransformation(new ReplaceCalleeName(
C, MapNames::getDpctNamespace() + (DpctGlobalInfo::useSYCLCompat()
? "wait_and_free"
: "dpct_free")));
}
} else if (Name == "cudaFreeHost" || Name == "cuMemFreeHost") {
if (Name == "cudaFreeHost" || Name == "cuMemFreeHost") {
if (DpctGlobalInfo::getUsmLevel() == UsmLevel::UL_Restricted) {
CheckCanUseCLibraryMallocOrFree Checker(0, true);
ExprAnalysis EA;
Expand Down Expand Up @@ -6817,7 +6777,8 @@ void MemoryMigrationRule::runRule(const MatchFinder::MatchResult &Result) {
Name.compare("cuMemAllocPitch_v2") && Name.compare("cuMemAlloc_v2") &&
Name.compare("cudaMallocMipmappedArray") &&
Name.compare("cudaGetMipmappedArrayLevel") &&
Name.compare("cudaFreeMipmappedArray") && Name.compare("cudaMemcpy")) {
Name.compare("cudaFreeMipmappedArray") && Name.compare("cudaMemcpy") &&
Name.compare("cudaFree") && Name.compare("cublasFree")) {
requestFeature(HelperFeatureEnum::device_ext);
insertAroundStmt(C, MapNames::getCheckErrorMacroName() + "(", ")");
} else if (IsAssigned && !Name.compare("cudaMemAdvise") &&
Expand Down
4 changes: 3 additions & 1 deletion clang/lib/DPCT/Utility.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2264,10 +2264,12 @@ getRangeInRange(SourceRange Range, SourceLocation SearchRangeBegin,
// the behavior of immediateExpansion is different:
// 1. string_literal created with "#" does not include the last token
// 2. greatergreatergreater of template does include the last token
// 3. raw_identifier created with "##" is similar as (1)
// We need to process the last token according to the token kind.
if (IncludeLastToken &&
(!SM.isWrittenInScratchSpace(SM.getSpellingLoc(Range.getEnd())) ||
Tok.getKind() == tok::TokenKind::string_literal)) {
Tok.getKind() == tok::TokenKind::string_literal ||
Tok.getKind() == tok::TokenKind::raw_identifier)) {
auto LastTokenLength =
Lexer::MeasureTokenLength(ResultEnd, SM, Context.getLangOpts());
ResultEnd = ResultEnd.getLocWithOffset(LastTokenLength);
Expand Down
68 changes: 68 additions & 0 deletions clang/test/dpct/macro_test.cu
Original file line number Diff line number Diff line change
Expand Up @@ -1444,4 +1444,72 @@ void foo43() {
#undef TOO_SMALL
#undef JUST_RIGHT

// CHECK: #define TODEV(A, s) \
//CHECK-NEXT: A = (float *)malloc((s) * sizeof(float)); \
//CHECK-NEXT: for (int i = 0; i < s; i++) A[i] = 0.001; \
//CHECK-NEXT: float *A##_d; \
//CHECK-NEXT: A##_d = sycl::malloc_device<float>(((s)), dpct::get_in_order_queue()); \
//CHECK-NEXT: dpct::get_in_order_queue().memcpy(A##_d, A, (s) * sizeof(float)).wait();
# define TODEV(A,s) A = (float*) malloc ((s) * sizeof(float)); \
for (int i = 0; i < s; i++) A[i] = 0.001; \
float *A##_d;\
cudaMalloc((void**)&A##_d,((s))*sizeof(float));\
cudaMemcpy(A##_d, A, (s)*sizeof(float), cudaMemcpyHostToDevice);

// CHECK: #define FROMDEV(A, s) \
//CHECK-NEXT: dpct::get_in_order_queue().memcpy(A, A##_d, (s) * sizeof(float)).wait();
# define FROMDEV(A,s) cudaMemcpy(A, A##_d, (s)*sizeof(float), cudaMemcpyDeviceToHost);

// CHECK: #define FREE(A) \
//CHECK-NEXT: free(A); \
//CHECK-NEXT: dpct::dpct_free(A##_d, q_ct1)
# define FREE(A) free(A);\
cudaFree(A##_d)

// CHECK: # define TODEV3(A) TODEV(A,d3)
//CHECK-NEXT: # define TODEV2(A) TODEV(A,d2)
//CHECK-NEXT: # define FROMDEV3(A) FROMDEV(A,d3)
//CHECK-NEXT: # define FROMDEV2(A) FROMDEV(A,d2)
# define TODEV3(A) TODEV(A,d3)
# define TODEV2(A) TODEV(A,d2)
# define FROMDEV3(A) FROMDEV(A,d3)
# define FROMDEV2(A) FROMDEV(A,d2)

// CHECK: void foo44(float *x, int size, int d3, int d2) {
//CHECK-NEXT: dpct::device_ext &dev_ct1 = dpct::get_current_device();
//CHECK-NEXT: sycl::queue &q_ct1 = dev_ct1.in_order_queue();
//CHECK-NEXT: TODEV(x, size)
//CHECK-NEXT: FROMDEV(x, size)
//CHECK-NEXT: FREE(x);
//CHECK-NEXT: {
//CHECK-NEXT: TODEV3(x)
//CHECK-NEXT: }
//CHECK-NEXT: {
//CHECK-NEXT: TODEV2(x)
//CHECK-NEXT: }
//CHECK-NEXT: FROMDEV3(x)
//CHECK-NEXT: FROMDEV2(x)
//CHECK-NEXT: }
void foo44(float *x, int size, int d3, int d2) {
TODEV(x, size)
FROMDEV(x, size)
FREE(x);
{
TODEV3(x)
}
{
TODEV2(x)
}
FROMDEV3(x)
FROMDEV2(x)
}

#undef TODEV
#undef FROMDEV
#undef FREE
#undef TODEV3
#undef TODEV2
#undef FROMDEV3
#undef FROMDEV2

#endif
2 changes: 1 addition & 1 deletion clang/test/dpct/texture_layered.cu
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ int main() {
cudaMalloc(&d, sizeof(float4) * 4);
// CHECK: tex42.create_image();
// CHECK: tex21.create_image();
// CHECK: dpct::get_out_of_order_queue().submit(
// CHECK: dpct::get_out_of_order_queue().submit(
// CHECK-NEXT: [&](sycl::handler &cgh) {
// CHECK-NEXT: auto d_acc_ct0 = dpct::get_access(d, cgh);
// CHECK-EMPTY:
Expand Down

0 comments on commit 567a398

Please sign in to comment.