Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CUDA: enable cuda support v1 - EAGER with GDR COPY #20

Open
wants to merge 9 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
85 changes: 84 additions & 1 deletion config/m4/cuda.m4
Original file line number Diff line number Diff line change
@@ -1 +1,84 @@
AM_CONDITIONAL([HAVE_CUDA], [true])
#
# Copyright (C) Mellanox Technologies Ltd. 2001-2017. ALL RIGHTS RESERVED.
# See file LICENSE for terms.
#

#
# Check for CUDA support
#
cuda_happy="no"

AC_ARG_WITH([cuda],
[AS_HELP_STRING([--with-cuda=(DIR)], [Enable the use of CUDA (default is no).])],
[], [with_cuda=no])

AS_IF([test "x$with_cuda" != "xno"],

[AS_IF([test "x$with_cuda" == "x" || test "x$with_cuda" == "xguess" || test "x$with_cuda" == "xyes"],
[
AC_MSG_NOTICE([CUDA path was not specified. Guessing ...])
with_cuda=/usr/local/cuda
],
[:])
AC_CHECK_HEADERS([$with_cuda/include/cuda.h $with_cuda/include/cuda_runtime.h],
[AC_CHECK_DECLS([cuPointerGetAttribute],
[cuda_happy="yes"],
[AC_MSG_WARN([CUDA runtime not detected. Disable.])
cuda_happy="no"],
[#include <$with_cuda/include/cuda.h>])
AS_IF([test "x$cuda_happy" == "xyes"],
[AC_DEFINE([HAVE_CUDA], 1, [Enable CUDA support])
AC_SUBST(CUDA_CPPFLAGS, "-I$with_cuda/include/ ")
AC_SUBST(CUDA_CFLAGS, "-I$with_cuda/include/ ")
AC_SUBST(CUDA_LDFLAGS, "-lcudart -lcuda -L$with_cuda/lib64")
CFLAGS="$CFLAGS $CUDA_CFLAGS"
CPPFLAGS="$CPPFLAGS $CUDA_CPPFLAGS"
LDFLAGS="$LDFLAGS $CUDA_LDFLAGS"],
[])],
[AC_MSG_WARN([CUDA not found])
AC_DEFINE([HAVE_CUDA], [0], [Disable the use of CUDA])])],
[AC_MSG_WARN([CUDA was explicitly disabled])
AC_DEFINE([HAVE_CUDA], [0], [Disable the use of CUDA])]
)


AM_CONDITIONAL([HAVE_CUDA], [test "x$cuda_happy" != xno])

AC_ARG_WITH([gdrcopy],
[AS_HELP_STRING([--with-gdrcopy=(DIR)], [Enable the use of GDR_COPY (default is no).])],
[], [with_gdrcopy=no])

AS_IF([test "x$with_gdrcopy" != "xno"],

[AS_IF([test "x$with_gdrcopy" == "x" || test "x$with_gdrcopy" == "xguess" || test "x$with_gdrcopy" == "xyes"],
[
AC_MSG_NOTICE([GDR_COPY path was not specified. Guessing ...])
with_gdrcopy=/usr/local/gdrcopy
],
[:])
AC_CHECK_HEADERS([$with_gdrcopy/include/gdrapi.h],
[AC_CHECK_DECLS([gdr_pin_buffer],
[gdrcopy_happy="yes"],
[AC_MSG_WARN([GDR_COPY runtime not detected. Disable.])
gdrcopy_happy="no"],
[#include <$with_gdrcopy/include/gdrapi.h>])
AS_IF([test "x$gdrcopy_happy" == "xyes"],
[AC_DEFINE([HAVE_GDR_COPY], 1, [Enable GDR_COPY support])
AC_SUBST(GDR_COPY_CPPFLAGS, "-I$with_gdrcopy/include/ ")
AC_SUBST(GDR_COPY_CFLAGS, "-I$with_gdrcopy/include/ ")
AC_SUBST(GDR_COPY_LDFLAGS, "-lgdrapi -L$with_gdrcopy/lib64")
CFLAGS="$CFLAGS $GDR_COPY_CFLAGS"
CPPFLAGS="$CPPFLAGS $GDR_COPY_CPPFLAGS"
LDFLAGS="$LDFLAGS $GDR_COPY_LDFLAGS"],
[])],
[AC_MSG_WARN([GDR_COPY not found])
AC_DEFINE([HAVE_GDR_COPY], [0], [Disable the use of GDR_COPY])])],
[AC_MSG_WARN([GDR_COPY was explicitly disabled])
AC_DEFINE([HAVE_GDR_COPY], [0], [Disable the use of GDR_COPY])]
)


AM_CONDITIONAL([HAVE_GDR_COPY], [test "x$gdrcopy_happy" != xno])

AC_DEFINE([HAVE_CUDA_GDR], [1], [Eanble GPU Direct RDMA])]
AM_CONDITIONAL([HAVE_CUDA_GDR], [1])
7 changes: 7 additions & 0 deletions src/ucm/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,13 @@ libucm_la_SOURCES = \
util/reloc.c \
util/sys.c

if HAVE_CUDA
libucm_la_SOURCES += \
cuda/install.c \
cuda/replace.c

endif

if HAVE_UCM_PTMALLOC283
libucm_la_CPPFLAGS += \
-I$(srcdir)/ptmalloc283/sysdeps/pthread \
Expand Down
35 changes: 34 additions & 1 deletion src/ucm/api/ucm.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,10 @@
#define UCM_H_

#include <ucs/sys/compiler_def.h>

#if HAVE_CUDA
#include <cuda_runtime.h>
#include <cuda.h>
#endif
BEGIN_C_DECLS

#include <ucs/config/types.h>
Expand All @@ -32,6 +35,8 @@ typedef enum ucm_event_type {
UCM_EVENT_SHMAT = UCS_BIT(3),
UCM_EVENT_SHMDT = UCS_BIT(4),
UCM_EVENT_SBRK = UCS_BIT(5),
/* cuda events */
UCM_EVENT_CUDAFREE = UCS_BIT(6),

/* Aggregate events */
UCM_EVENT_VM_MAPPED = UCS_BIT(16),
Expand Down Expand Up @@ -113,6 +118,17 @@ typedef union ucm_event {
intptr_t increment;
} sbrk;

#if HAVE_CUDA
/*
* UCM_EVENT_CUDAFREE
* cudaFree() is called.
*/
struct {
int result;
void *address;
} cudaFree;
#endif

/*
* UCM_EVENT_VM_MAPPED, UCM_EVENT_VM_UNMAPPED
*
Expand Down Expand Up @@ -351,6 +367,23 @@ int ucm_shmdt(const void *shmaddr);
*/
void *ucm_sbrk(intptr_t increment);

#if HAVE_CUDA

/**
* @brief Call the original implementation of @ref cudaFree without triggering events.
*/
cudaError_t ucm_orig_cudaFree(void *address);


/**
* @brief Call the original implementation of @ref cudaFree and all handlers
* associated with it.
*/
cudaError_t ucm_cudaFree(void *address);

#endif



END_C_DECLS

Expand Down
22 changes: 22 additions & 0 deletions src/ucm/cuda/cudamem.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
/**
* Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

copyright year 2017?

*
* See file LICENSE for terms.
*/

#ifndef UCM_CUDAMEM_H_
#define UCM_CUDAMEM_H_

#include <ucm/api/ucm.h>
#include <cuda.h>
#include <cuda_runtime.h>

ucs_status_t ucm_cudamem_install(int events);

void ucm_cudamem_event_test_callback(ucm_event_type_t event_type,
ucm_event_t *event, void *arg);


cudaError_t ucm_override_cudaFree(void *addr);

#endif
154 changes: 154 additions & 0 deletions src/ucm/cuda/install.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
/**
* Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED.
* Copyright (C) ARM Ltd. 2016. ALL RIGHTS RESERVED.
*
* See file LICENSE for terms.
*/

#ifdef HAVE_CONFIG_H
# include "config.h"
#endif

#include "cudamem.h"

#include <ucm/api/ucm.h>
#include <ucm/event/event.h>
#include <ucm/util/log.h>
#include <ucm/util/reloc.h>
#include <ucm/util/ucm_config.h>
#include <ucs/sys/math.h>

#include <cuda.h>
#include <cuda_runtime.h>
#include <unistd.h>
#include <pthread.h>


typedef struct ucm_cudamem_func {
ucm_reloc_patch_t patch;
ucm_event_type_t event_type;
} ucm_cudamem_func_t;

static ucm_cudamem_func_t ucm_cudamem_funcs[] = {
{ {"cudaFree", ucm_override_cudaFree}, UCM_EVENT_CUDAFREE},
{ {NULL, NULL}, 0}
};

void ucm_cudamem_event_test_callback(ucm_event_type_t event_type,
ucm_event_t *event, void *arg)
{
int *out_events = arg;
*out_events |= event_type;
}

/* Called with lock held */
static ucs_status_t ucm_cudamem_test(int events)
{
static int installed_events = 0;
ucm_event_handler_t handler;
int out_events = 0;
void *p;

if (ucs_test_all_flags(installed_events, events)) {
/* All requested events are already installed */
return UCS_OK;
}

/* Install a temporary event handler which will add the supported event
* type to out_events bitmap.
*/
handler.events = events;
handler.priority = -1;
handler.cb = ucm_cudamem_event_test_callback;
handler.arg = &out_events;
out_events = 0;

ucm_event_handler_add(&handler);

if (events & (UCM_EVENT_CUDAFREE)) {
if (cudaSuccess != cudaMalloc(&p, 64)) {
ucm_error("cudaMalloc failed");
return UCS_ERR_UNSUPPORTED;
}
cudaFree(p);
}


ucm_event_handler_remove(&handler);

/* TODO check address / stop all threads */
installed_events |= out_events;
ucm_debug("cudamem test: got 0x%x out of 0x%x, total: 0x%x", out_events, events,
installed_events);

/* Return success iff we caught all wanted events */

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

iff->if

if (!ucs_test_all_flags(out_events, events)) {
return UCS_ERR_UNSUPPORTED;
}

return UCS_OK;
}

/* Called with lock held */
static ucs_status_t ucs_cudamem_install_reloc(int events)
{
static int installed_events = 0;
ucm_cudamem_func_t *entry;
ucs_status_t status;

if (!ucm_global_config.enable_cuda_hooks) {
ucm_debug("installing cudamem relocations is disabled by configuration");
return UCS_ERR_UNSUPPORTED;
}

for (entry = ucm_cudamem_funcs; entry->patch.symbol != NULL; ++entry) {
if (!(entry->event_type & events)) {
/* Not required */
continue;
}

if (entry->event_type & installed_events) {
/* Already installed */
continue;
}

ucm_debug("cudamem: installing relocation table entry for %s = %p for event 0x%x",
entry->patch.symbol, entry->patch.value, entry->event_type);

status = ucm_reloc_modify(&entry->patch);
if (status != UCS_OK) {
ucm_warn("failed to install relocation table entry for '%s'",
entry->patch.symbol);
return status;
}

installed_events |= entry->event_type;
}

return UCS_OK;
}

ucs_status_t ucm_cudamem_install(int events)
{
static pthread_mutex_t install_mutex = PTHREAD_MUTEX_INITIALIZER;
ucs_status_t status;

pthread_mutex_lock(&install_mutex);

status = ucm_cudamem_test(events);
if (status == UCS_OK) {
goto out_unlock;
}

status = ucs_cudamem_install_reloc(events);
if (status != UCS_OK) {
ucm_debug("failed to install relocations for cudamem");
goto out_unlock;
}

status = ucm_cudamem_test(events);

out_unlock:
pthread_mutex_unlock(&install_mutex);
return status;
}
Loading