Skip to content

Commit

Permalink
Refine migration of red.relaxed.gpu.global
Browse files Browse the repository at this point in the history
Signed-off-by: chenwei.sun <[email protected]>
  • Loading branch information
tomflinda committed Jan 22, 2025
1 parent 7f5af18 commit c1988a8
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 54 deletions.
49 changes: 16 additions & 33 deletions clang/lib/DPCT/RulesAsm/AsmMigration.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2730,7 +2730,8 @@ class SYCLGen : public SYCLGenBase {
const auto *Type = dyn_cast<InlineAsmBuiltinType>(Inst->getType(0));
if (!Type || (Type->getKind() != InlineAsmBuiltinType::s32 &&
Type->getKind() != InlineAsmBuiltinType::b32 &&
Type->getKind() != InlineAsmBuiltinType::u32))
Type->getKind() != InlineAsmBuiltinType::u32 &&
Type->getKind() != InlineAsmBuiltinType::f32))
return SYCLGenError();

if (emitStmt(Dst))
Expand All @@ -2743,45 +2744,27 @@ class SYCLGen : public SYCLGenBase {
if (tryEmitStmt(b, Src))
return SYCLGenError();

OS() << " = ";
OS() << MapNames::getClNamespace() + "reduce_over_group(";
OS() << DpctGlobalInfo::getItem(GAS) << ".get_group(), " << b << ",";

if (Inst->hasAttr(InstAttr::add))
OS() << " += ";
OS() << MapNames::getClNamespace() + "plus<>()";
else if (Inst->hasAttr(InstAttr::op_or))
OS() << " |= ";
OS() << MapNames::getClNamespace() + "bit_or<>()";
else if (Inst->hasAttr(InstAttr::op_xor))
OS() << " ^= ";
OS() << MapNames::getClNamespace() + "bit_xor<>()";
else if (Inst->hasAttr(InstAttr::op_and))
OS() << " &= ";
else if (Inst->hasAttr(InstAttr::dec)) {
OS() << " = ";
OS() << '(';
OS() << a << " == 0 || " << a << " > " << b << ") ? " << b << " : " << a
<< " - 1";
endstmt();
return SYCLGenSuccess();

} else if (Inst->hasAttr(InstAttr::inc)) {
OS() << " = ";
OS() << '(';
OS() << a << " >= " << b << ") ? 0 : " << a << " + 1";
endstmt();
return SYCLGenSuccess();
} else if (Inst->hasAttr(InstAttr::max)) {
OS() << " = " << MapNames::getClNamespace() + "max(" << a << ", " << b
<< ")";
endstmt();
return SYCLGenSuccess();
} else if (Inst->hasAttr(InstAttr::min)) {
OS() << " = " << MapNames::getClNamespace() + "min(" << a << ", " << b
<< ")";
endstmt();
return SYCLGenSuccess();
} else
OS() << MapNames::getClNamespace() + "bit_and<>()";
else if (Inst->hasAttr(InstAttr::min))
OS() << MapNames::getClNamespace() + "minimum<>()";
else if (Inst->hasAttr(InstAttr::max))
OS() << MapNames::getClNamespace() + "maximum<>()";
else
return SYCLGenError();

if (emitStmt(Src))
return SYCLGenError();
OS() << ")";
endstmt();

return SYCLGenSuccess();
}
};
Expand Down
39 changes: 18 additions & 21 deletions clang/test/dpct/asm/red.cu
Original file line number Diff line number Diff line change
Expand Up @@ -8,56 +8,53 @@
#include <cstdint>
#include <cuda_runtime.h>

// CHECK: void atomicAddKernel(int* lock, int val) {
// CHECK-NEXT: *lock += val;
// CHECK: void atomicAddKernel(int* lock, int val, const sycl::nd_item<3> &item_ct1) {
// CHECK-NEXT: *lock = sycl::reduce_over_group(item_ct1.get_group(), val,sycl::plus<>());
// CHECK-NEXT:}
__global__ void atomicAddKernel(int* lock, int val) {
asm volatile("red.relaxed.gpu.global.add.s32 [%0], %1;\n"
::"l"(lock),"r"(val):"memory");
}

// CHECK: void atomicOrKernel(uint32_t* lock, uint32_t val) {
// CHECK-NEXT: *lock |= val;
// CHECK: void atomicOrKernel(uint32_t* lock, uint32_t val,
// CHECK-NEXT: const sycl::nd_item<3> &item_ct1) {
// CHECK-NEXT: *lock = sycl::reduce_over_group(item_ct1.get_group(), val,sycl::bit_or<>());
// CHECK-NEXT:}
__global__ void atomicOrKernel(uint32_t* lock, uint32_t val) {
asm volatile("red.relaxed.gpu.global.or.b32 [%0], %1;\n"
::"l"(lock),"r"(val):"memory");
}

// CHECK: void atomicXorKernel(uint32_t* lock, uint32_t val) {
// CHECK-NEXT: *lock ^= val;
// CHECK: void atomicXorKernel(uint32_t* lock, uint32_t val,
// CHECK-NEXT: const sycl::nd_item<3> &item_ct1) {
// CHECK-NEXT: *lock = sycl::reduce_over_group(item_ct1.get_group(), val,sycl::bit_xor<>());
// CHECK-NEXT:}
__global__ void atomicXorKernel(uint32_t* lock, uint32_t val) {
asm volatile("red.relaxed.gpu.global.xor.b32 [%0], %1;\n"
::"l"(lock),"r"(val):"memory");
}

// CHECK: void atomicAndKernel(uint32_t* lock, uint32_t val) {
// CHECK-NEXT: *lock &= val;
// CHECK-NEXT:}
// CHECK: void atomicAndKernel(uint32_t* lock, uint32_t val,
// CHECK-NEXT: const sycl::nd_item<3> &item_ct1) {
// CHECK-NEXT: *lock = sycl::reduce_over_group(item_ct1.get_group(), val,sycl::bit_and<>());
// CHECK-NEXT: }
__global__ void atomicAndKernel(uint32_t* lock, uint32_t val) {
asm volatile("red.relaxed.gpu.global.and.b32 [%0], %1;\n"
::"l"(lock),"r"(val):"memory");
}

// CHECK: void atomicDecKernel(uint32_t* lock, uint32_t val) {
// CHECK-NEXT: *lock = (*lock == 0 || *lock > val) ? val : *lock - 1;
// CHECK-NEXT: }
__global__ void atomicDecKernel(uint32_t* lock, uint32_t val) {
asm volatile("red.relaxed.gpu.global.dec.u32 [%0], %1;\n"
::"l"(lock),"r"(val):"memory");
}

// CHECK: void atomicMaxKernel(uint32_t* lock, uint32_t val) {
// CHECK-NEXT: *lock = sycl::max(*lock, val);
// CHECK: void atomicMaxKernel(uint32_t* lock, uint32_t val,
// CHECK-NEXT: const sycl::nd_item<3> &item_ct1) {
// CHECK-NEXT: *lock = sycl::reduce_over_group(item_ct1.get_group(), val,sycl::maximum<>());
// CHECK-NEXT: }
__global__ void atomicMaxKernel(uint32_t* lock, uint32_t val) {
asm volatile("red.relaxed.gpu.global.max.u32 [%0], %1;\n"
::"l"(lock),"r"(val):"memory");
}

// CHECK: void atomicMinKernel(uint32_t* lock, uint32_t val) {
// CHECK-NEXT: *lock = sycl::min(*lock, val);
// CHECK: void atomicMinKernel(uint32_t* lock, uint32_t val,
// CHECK-NEXT: const sycl::nd_item<3> &item_ct1) {
// CHECK-NEXT: *lock = sycl::reduce_over_group(item_ct1.get_group(), val,sycl::minimum<>());
// CHECK-NEXT: }
__global__ void atomicMinKernel(uint32_t* lock, uint32_t val) {
asm volatile("red.relaxed.gpu.global.min.u32 [%0], %1;\n"
Expand Down

0 comments on commit c1988a8

Please sign in to comment.