Skip to content

Commit

Permalink
Adding support for torch.cuda and c10::cuda APIs
Browse files Browse the repository at this point in the history
  • Loading branch information
TejaX-Alaghari committed Dec 22, 2024
1 parent c0852a0 commit ccbecca
Show file tree
Hide file tree
Showing 6 changed files with 313 additions and 53 deletions.
46 changes: 45 additions & 1 deletion clang/test/dpct/python_migration/case_006/expected.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,58 @@
from torch import xpu

cuda_ver = torch.version.xpu

#init
torch.xpu.init()
xpu.init()
is_init = torch.xpu.is_initialized()
is_init = xpu.is_initialized()

# device APIs
devs = torch.xpu.device_count()
devs = xpu.device_count()

dev = torch.xpu.current_device()
dev = xpu.current_device()

torch.xpu.set_device(dev)
xpu.set_device(dev)

d_props = torch.xpu.get_device_properties(dev)
d_props = xpu.get_device_properties(dev)

curr_d_name = torch.xpu.get_device_name()
curr_d_name = xpu.get_device_name()
d_name = torch.xpu.get_device_name(dev)
d_name = xpu.get_device_name(dev)

d_cap = torch.xpu.get_device_capability()
d_cap = xpu.get_device_capability()
d0_cap = torch.xpu.get_device_capability(devs[0])
d0_cap = xpu.get_device_capability(devs[0])

dev_of_obj = torch.xpu.device_of(obj)
dev_of_obj = xpu.device_of(obj)

arch_list = ['']
arch_list = ['']

cuda_ver = torch.version.xpu
torch.xpu.synchronize()
xpu.synchronize()
torch.xpu.synchronize(dev)
xpu.synchronize(dev)

# stream APIs
curr_st = torch.xpu.current_stream()
curr_st = xpu.current_stream()
curr_d_st = torch.xpu.current_stream(dev)
curr_d_st = xpu.current_stream(dev)

st = torch.xpu.StreamContext(curr_st)
st = xpu.StreamContext(curr_st)

stS = torch.xpu.stream(st)
stS = xpu.stream(st)

torch.xpu.set_stream(st)
xpu.set_stream(st)
46 changes: 45 additions & 1 deletion clang/test/dpct/python_migration/case_006/input.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,58 @@
from torch import cuda

cuda_ver = torch.version.cuda

#init
torch.cuda.init()
cuda.init()
is_init = torch.cuda.is_initialized()
is_init = cuda.is_initialized()

# device APIs
devs = torch.cuda.device_count()
devs = cuda.device_count()

dev = torch.cuda.current_device()
dev = cuda.current_device()

torch.cuda.set_device(dev)
cuda.set_device(dev)

d_props = torch.cuda.get_device_properties(dev)
d_props = cuda.get_device_properties(dev)

curr_d_name = torch.cuda.get_device_name()
curr_d_name = cuda.get_device_name()
d_name = torch.cuda.get_device_name(dev)
d_name = cuda.get_device_name(dev)

d_cap = torch.cuda.get_device_capability()
d_cap = cuda.get_device_capability()
d0_cap = torch.cuda.get_device_capability(devs[0])
d0_cap = cuda.get_device_capability(devs[0])

dev_of_obj = torch.cuda.device_of(obj)
dev_of_obj = cuda.device_of(obj)

arch_list = torch.cuda.get_arch_list()
arch_list = cuda.get_arch_list()

cuda_ver = torch.version.cuda
torch.cuda.synchronize()
cuda.synchronize()
torch.cuda.synchronize(dev)
cuda.synchronize(dev)

# stream APIs
curr_st = torch.cuda.current_stream()
curr_st = cuda.current_stream()
curr_d_st = torch.cuda.current_stream(dev)
curr_d_st = cuda.current_stream(dev)

st = torch.cuda.StreamContext(curr_st)
st = cuda.StreamContext(curr_st)

stS = torch.cuda.stream(st)
stS = cuda.stream(st)

torch.cuda.set_stream(st)
cuda.set_stream(st)
29 changes: 26 additions & 3 deletions clang/test/dpct/pytorch/c10.cu
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,36 @@

#ifndef NO_BUILD_TEST
#include <iostream>
// CHECK: #include <c10/xpu/XPUStream.h>
#include <c10/cuda/CUDAStream.h>
// CHECK: #include <c10/core/DeviceGuard.h>
#include <c10/cuda/CUDAGuard.h>
// CHECK: #include <c10/xpu/XPUStream.h>
#include <c10/cuda/CUDAStream.h>
// CHECK: #include <c10/xpu/XPUFunctions.h>
#include <c10/cuda/CUDAFunctions.h>

int main() {
std::optional<c10::Device> device;
// device APIs
// CHECK: c10::DeviceIndex num_devices = c10::xpu::device_count();
c10::DeviceIndex num_devices = c10::cuda::device_count();

// CHECK: c10::DeviceIndex num_devices_ensured =
// CHECK-NEXT: c10::xpu::device_count_ensure_non_zero();
c10::DeviceIndex num_devices_ensured = c10::cuda::device_count_ensure_non_zero();

// CHECK: c10::DeviceIndex current_device = c10::xpu::current_device();
c10::DeviceIndex current_device = c10::cuda::current_device();

c10::DeviceIndex new_device = 1;
// CHECK: c10::xpu::set_device(new_device);
c10::cuda::set_device(new_device);

// CHECK: c10::DeviceIndex exchanged_device = c10::xpu::exchange_device(0);
c10::DeviceIndex exchanged_device = c10::cuda::ExchangeDevice(0);

// CHECK: c10::DeviceIndex maybe_exchanged_device = c10::xpu::maybe_exchange_device(1);
c10::DeviceIndex maybe_exchanged_device = c10::cuda::MaybeExchangeDevice(1);

std::optional<c10::Device> device;
try {
// CHECK: c10::OptionalDeviceGuard device_guard(device);
c10::cuda::OptionalCUDAGuard device_guard(device);
Expand All @@ -27,6 +49,7 @@ int main() {
return -1;
}

// stream APIs
// CHECK: auto currentStream = c10::xpu::getCurrentXPUStream();
auto currentStream = c10::cuda::getCurrentCUDAStream();

Expand Down
14 changes: 14 additions & 0 deletions clang/test/dpct/pytorch/pytorch_cuda_inc/c10/cuda/CUDAFunctions.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#include <cmath>

namespace c10 {
using DeviceIndex = int8_t;

namespace cuda {
DeviceIndex device_count();
DeviceIndex device_count_ensure_non_zero();
DeviceIndex current_device();
void set_device(DeviceIndex device);
DeviceIndex ExchangeDevice(DeviceIndex device);
DeviceIndex MaybeExchangeDevice(DeviceIndex to_device);
} // namespace cuda
} // namespace c10
65 changes: 54 additions & 11 deletions clang/test/dpct/pytorch/user_defined_rule_pytorch.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
---
# ATen header rules
- Rule: rule_ATen_core_Tensor_h
Kind: Header
Priority: Takeover
Expand All @@ -11,39 +12,81 @@
In: ATen/cuda/CUDAContext.h
Out: "<ATen/xpu/XPUContext.h>"

# stream rules
- Rule: rule_c10_cuda_CUDAStream_h
Kind: Header
Priority: Takeover
In: c10/cuda/CUDAStream.h
Out: "<c10/xpu/XPUStream.h>"

- Rule: rule_c10_cuda_CUDAGuard_h
Kind: Header
Priority: Takeover
In: c10/cuda/CUDAGuard.h
Out: "<c10/core/DeviceGuard.h>"

- Rule: rule_c10_cuda_CUDAStream
Kind: Class
Priority: Takeover
In: c10::cuda::CUDAStream
Out: c10::xpu::XPUStream
Includes: ["<c10/xpu/XPUStream.h>"]
Methods:
- In: stream
Out: "&($method_base queue())"

- Rule: rule_c10_cuda_getCurrentCUDAStream
Kind: API
Priority: Takeover
In: c10::cuda::getCurrentCUDAStream
Out: c10::xpu::getCurrentXPUStream($1)

# device rules
- Rule: rule_c10_cuda_CUDAGuard_h
Kind: Header
Priority: Takeover
In: c10/cuda/CUDAGuard.h
Out: "<c10/core/DeviceGuard.h>"

- Rule: rule_c10_cuda_OptionalCUDAGuard
Kind: Type
Priority: Takeover
In: c10::cuda::OptionalCUDAGuard
Out: c10::OptionalDeviceGuard
Includes: ["<c10/core/DeviceGuard.h>"]

- Rule: rule_c10_cuda_getCurrentCUDAStream
# CUDAFunctions rules
- Rule: rule_c10_cuda_CUDAFunctions_h
Kind: Header
Priority: Takeover
In: c10/cuda/CUDAFunctions.h
Out: "<c10/xpu/XPUFunctions.h>"

- Rule: rule_c10_cuda_device_count
Kind: API
Priority: Takeover
In: c10::cuda::getCurrentCUDAStream
Out: c10::xpu::getCurrentXPUStream($1)
Includes: ["<c10/xpu/XPUStream.h>"]
In: c10::cuda::device_count
Out: c10::xpu::device_count()

- Rule: rule_c10_cuda_device_count_ensure_non_zero
Kind: API
Priority: Takeover
In: c10::cuda::device_count_ensure_non_zero
Out: c10::xpu::device_count_ensure_non_zero()

- Rule: rule_c10_cuda_current_device
Kind: API
Priority: Takeover
In: c10::cuda::current_device
Out: c10::xpu::current_device()

- Rule: rule_c10_cuda_set_device
Kind: API
Priority: Takeover
In: c10::cuda::set_device
Out: c10::xpu::set_device($1)

- Rule: rule_c10_cuda_exchange_device
Kind: API
Priority: Takeover
In: c10::cuda::ExchangeDevice
Out: c10::xpu::exchange_device($1)

- Rule: rule_c10_cuda_maybe_exchange_device
Kind: API
Priority: Takeover
In: c10::cuda::MaybeExchangeDevice
Out: c10::xpu::maybe_exchange_device($1)
Loading

0 comments on commit ccbecca

Please sign in to comment.