Skip to content

Commit

Permalink
gpu - fix AtPoints transpose shift
Browse files Browse the repository at this point in the history
  • Loading branch information
jeremylt committed Jan 9, 2025
1 parent 1a63be7 commit 47bab60
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 16 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -186,10 +186,10 @@ inline __device__ void InterpTransposeAtPoints2d(SharedData_Cuda &data, const Ce
if (p < NUM_POINTS) {
for (CeedInt i = 0; i < Q_1D; i++) {
// Note: shifting to avoid atomic adds
const CeedInt ii = (i + (p / Q_1D)) % Q_1D;
const CeedInt ii = (i + data.t_id_x) % Q_1D;

for (CeedInt j = 0; j < Q_1D; j++) {
const CeedInt jj = (j + p) % Q_1D;
const CeedInt jj = (j + data.t_id_y) % Q_1D;

atomicAdd(&data.slice[jj + ii * Q_1D], chebyshev_x[jj] * buffer[ii]);
}
Expand Down Expand Up @@ -261,10 +261,10 @@ inline __device__ void GradTransposeAtPoints2d(SharedData_Cuda &data, const Ceed
if (p < NUM_POINTS) {
for (CeedInt i = 0; i < Q_1D; i++) {
// Note: shifting to avoid atomic adds
const CeedInt ii = (i + (p / Q_1D)) % Q_1D;
const CeedInt ii = (i + data.t_id_x) % Q_1D;

for (CeedInt j = 0; j < Q_1D; j++) {
const CeedInt jj = (j + p) % Q_1D;
const CeedInt jj = (j + data.t_id_y) % Q_1D;

atomicAdd(&data.slice[jj + ii * Q_1D], chebyshev_x[jj] * buffer[ii]);
}
Expand Down Expand Up @@ -343,10 +343,10 @@ inline __device__ void InterpTransposeAtPoints3d(SharedData_Cuda &data, const Ce
if (p < NUM_POINTS) {
for (CeedInt i = 0; i < Q_1D; i++) {
// Note: shifting to avoid atomic adds
const CeedInt ii = (i + (p / Q_1D)) % Q_1D;
const CeedInt ii = (i + data.t_id_x) % Q_1D;

for (CeedInt j = 0; j < Q_1D; j++) {
const CeedInt jj = ((j + p) % Q_1D);
const CeedInt jj = (j + data.t_id_y) % Q_1D;

atomicAdd(&data.slice[jj + ii * Q_1D], chebyshev_x[jj] * buffer[ii]);
}
Expand Down Expand Up @@ -430,10 +430,10 @@ inline __device__ void GradTransposeAtPoints3d(SharedData_Cuda &data, const Ceed
if (p < NUM_POINTS) {
for (CeedInt i = 0; i < Q_1D; i++) {
// Note: shifting to avoid atomic adds
const CeedInt ii = (i + (p / Q_1D)) % Q_1D;
const CeedInt ii = (i + data.t_id_x) % Q_1D;

for (CeedInt j = 0; j < Q_1D; j++) {
const CeedInt jj = ((j + p) % Q_1D);
const CeedInt jj = (j + data.t_id_y) % Q_1D;

atomicAdd(&data.slice[jj + ii * Q_1D], chebyshev_x[jj] * buffer[ii]);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -186,10 +186,10 @@ inline __device__ void InterpTransposeAtPoints2d(SharedData_Hip &data, const Cee
if (p < NUM_POINTS) {
for (CeedInt i = 0; i < Q_1D; i++) {
// Note: shifting to avoid atomic adds
const CeedInt ii = (i + (p / Q_1D)) % Q_1D;
const CeedInt ii = (i + data.t_id_x) % Q_1D;

for (CeedInt j = 0; j < Q_1D; j++) {
const CeedInt jj = (j + p) % Q_1D;
const CeedInt jj = (j + data.t_id_y) % Q_1D;

atomicAdd(&data.slice[jj + ii * Q_1D], chebyshev_x[jj] * buffer[ii]);
}
Expand Down Expand Up @@ -261,10 +261,10 @@ inline __device__ void GradTransposeAtPoints2d(SharedData_Hip &data, const CeedI
if (p < NUM_POINTS) {
for (CeedInt i = 0; i < Q_1D; i++) {
// Note: shifting to avoid atomic adds
const CeedInt ii = (i + (p / Q_1D)) % Q_1D;
const CeedInt ii = (i + data.t_id_x) % Q_1D;

for (CeedInt j = 0; j < Q_1D; j++) {
const CeedInt jj = (j + p) % Q_1D;
const CeedInt jj = (j + data.t_id_y) % Q_1D;

atomicAdd(&data.slice[jj + ii * Q_1D], chebyshev_x[jj] * buffer[ii]);
}
Expand Down Expand Up @@ -343,10 +343,10 @@ inline __device__ void InterpTransposeAtPoints3d(SharedData_Hip &data, const Cee
if (p < NUM_POINTS) {
for (CeedInt i = 0; i < Q_1D; i++) {
// Note: shifting to avoid atomic adds
const CeedInt ii = (i + (p / Q_1D)) % Q_1D;
const CeedInt ii = (i + data.t_id_x) % Q_1D;

for (CeedInt j = 0; j < Q_1D; j++) {
const CeedInt jj = ((j + p) % Q_1D);
const CeedInt jj = (j + data.t_id_y) % Q_1D;

atomicAdd(&data.slice[jj + ii * Q_1D], chebyshev_x[jj] * buffer[ii]);
}
Expand Down Expand Up @@ -430,10 +430,10 @@ inline __device__ void GradTransposeAtPoints3d(SharedData_Hip &data, const CeedI
if (p < NUM_POINTS) {
for (CeedInt i = 0; i < Q_1D; i++) {
// Note: shifting to avoid atomic adds
const CeedInt ii = (i + (p / Q_1D)) % Q_1D;
const CeedInt ii = (i + data.t_id_x) % Q_1D;

for (CeedInt j = 0; j < Q_1D; j++) {
const CeedInt jj = ((j + p) % Q_1D);
const CeedInt jj = (j + data.t_id_y) % Q_1D;

atomicAdd(&data.slice[jj + ii * Q_1D], chebyshev_x[jj] * buffer[ii]);
}
Expand Down

0 comments on commit 47bab60

Please sign in to comment.