From 22f00e136e7b2c8165c312fa662ce03baaed0b11 Mon Sep 17 00:00:00 2001 From: Luna Nova Date: Tue, 24 Dec 2024 08:58:12 -0800 Subject: [PATCH] !! fixup / testing --- pkgs/by-name/uc/ucx/package.nix | 4 +- .../python-modules/torch/default.nix | 39 +++++++++++----- .../rocm-modules/6/aotriton/default.nix | 23 ++++++---- .../6/composable_kernel/default.nix | 1 + pkgs/development/rocm-modules/6/default.nix | 44 ++++++++++++++----- .../rocm-modules/6/hipblas/default.nix | 2 - .../rocm-modules/6/hipblaslt/default.nix | 14 ++++-- .../rocm-modules/6/miopen/default.nix | 2 + .../rocm-modules/6/mscclpp/default.nix | 6 +-- .../rocm-modules/6/rccl/default.nix | 6 +-- .../rocm-modules/6/rocblas/default.nix | 27 +++--------- .../rocm-modules/6/rocfft/default.nix | 4 +- .../rocm-modules/6/rocm-core/default.nix | 2 +- .../6/rocm-device-libs/default.nix | 5 --- .../rocm-modules/6/rocsolver/default.nix | 1 - .../rocm-modules/6/rocsparse/default.nix | 3 +- pkgs/top-level/stage.nix | 9 ++++ 17 files changed, 114 insertions(+), 78 deletions(-) diff --git a/pkgs/by-name/uc/ucx/package.nix b/pkgs/by-name/uc/ucx/package.nix index 87fb2eaaf824eb..cc9c3f4461d577 100644 --- a/pkgs/by-name/uc/ucx/package.nix +++ b/pkgs/by-name/uc/ucx/package.nix @@ -33,8 +33,10 @@ let paths = rocmList; }; + # rocm build fails with gcc stdenv due to unrecognised arg parallel-jobs + stdenv' = if enableRocm then rocmPackages.llvm.rocmClangStdenv else stdenv; in -stdenv.mkDerivation rec { +stdenv'.mkDerivation rec { pname = "ucx"; version = "1.17.0"; diff --git a/pkgs/development/python-modules/torch/default.nix b/pkgs/development/python-modules/torch/default.nix index f85ccc9e85f40a..2703974880bac7 100644 --- a/pkgs/development/python-modules/torch/default.nix +++ b/pkgs/development/python-modules/torch/default.nix @@ -16,11 +16,10 @@ if cudaSupport then magma-cuda-static else if rocmSupport then - magma-hip + null else magma, magma, - magma-hip, magma-cuda-static, # Use the system NCCL as long as we're targeting CUDA on a supported platform. useSystemNccl ? (cudaSupport && !cudaPackages.nccl.meta.unsupported || rocmSupport), @@ -36,6 +35,7 @@ symlinkJoin, which, pybind11, + pkg-config, removeReferencesTo, # Build inputs @@ -54,6 +54,7 @@ cffi, click, typing-extensions, + six, # ROCm build and `torch.compile` requires `triton` tritonSupport ? (!stdenv.hostPlatform.isDarwin), triton, @@ -66,7 +67,13 @@ # (dependencies without cuda support). # Instead we should rely on overlays and nixpkgsFun. # (@SomeoneSerge) - _tritonEffective ? if cudaSupport then triton-cuda else triton, + _tritonEffective ? + if cudaSupport then + triton-cuda + else if rocmSupport then + rocmPackages.triton + else + triton, triton-cuda, # Unit tests @@ -86,14 +93,13 @@ # dependencies for torch.utils.tensorboard pillow, - six, future, tensorboard, protobuf, # ROCm dependencies rocmSupport ? config.rocmSupport, - rocmPackages_5, + rocmPackages, gpuTargets ? [ ], vulkanSupport ? false, @@ -113,8 +119,6 @@ let triton = throw "python3Packages.torch: use _tritonEffective instead of triton to avoid divergence"; - rocmPackages = rocmPackages_5; - setBool = v: if v then "1" else "0"; # https://github.com/pytorch/pytorch/blob/v2.4.0/torch/utils/cpp_extension.py#L1953 @@ -180,7 +184,7 @@ let clr rccl miopen - miopengemm + aotriton rocrand rocblas rocsparse @@ -192,8 +196,11 @@ let rocfft rocsolver hipfft + hiprand hipsolver + hipblas-common hipblas + hipblaslt rocminfo rocm-thunk rocm-comgr @@ -212,6 +219,7 @@ let brokenConditions = attrsets.filterAttrs (_: cond: cond) { "CUDA and ROCm are mutually exclusive" = cudaSupport && rocmSupport; "CUDA is not targeting Linux" = cudaSupport && !stdenv.hostPlatform.isLinux; + "ROCm 6 is currently not compatible with magma" = rocmSupport && effectiveMagma != null; "Unsupported CUDA version" = cudaSupport && !(builtins.elem cudaPackages.cudaMajorVersion [ @@ -225,8 +233,6 @@ let # In particular, this triggered warnings from cuda's `aliases.nix` "Magma cudaPackages does not match cudaPackages" = cudaSupport && (effectiveMagma.cudaPackages.cudaVersion != cudaPackages.cudaVersion); - "Rocm support is currently broken because `rocmPackages.hipblaslt` is unpackaged. (2024-06-09)" = - rocmSupport; }; git-unroll = fetchFromGitea { @@ -388,6 +394,10 @@ buildPythonPackage rec { # We only do an imports check, so do not build tests either. BUILD_TEST = setBool false; + # ninja hook doesn't automatically turn on ninja + # because pytorch setup.py is responsible for this + CMAKE_GENERATOR = "Ninja"; + # Unlike MKL, oneDNN (née MKLDNN) is FOSS, so we enable support for # it by default. PyTorch currently uses its own vendored version # of oneDNN through Intel iDeep. @@ -406,6 +416,7 @@ buildPythonPackage rec { cmakeFlags = [ + (lib.cmakeFeature "PYTHON_SIX_SOURCE_DIR" "${six.src}") # (lib.cmakeBool "CMAKE_FIND_DEBUG_MODE" true) (lib.cmakeFeature "CUDAToolkit_VERSION" cudaPackages.cudaVersion) ] @@ -454,6 +465,8 @@ buildPythonPackage rec { env = { + # Builds faster without this and we don't have enough inputs that cmd length is an issue + NIX_CC_USE_RESPONSE_FILE = 0; # Suppress a weird warning in mkl-dnn, part of ideep in pytorch # (upstream seems to have fixed this in the wrong place?) # https://github.com/intel/mkl-dnn/commit/8134d346cdb7fe1695a2aa55771071d455fae0bc @@ -511,6 +524,9 @@ buildPythonPackage rec { } // lib.optionalAttrs vulkanSupport { VULKAN_SDK = shaderc.bin; + } + // lib.optionalAttrs rocmSupport { + AOTRITON_INSTALLED_PREFIX = "${rocmPackages.aotriton}"; }; nativeBuildInputs = @@ -519,6 +535,7 @@ buildPythonPackage rec { which ninja pybind11 + pkg-config removeReferencesTo ] ++ lib.optionals cudaSupport ( @@ -564,7 +581,7 @@ buildPythonPackage rec { ] ) ++ lib.optionals rocmSupport [ rocmPackages.llvm.openmp ] - ++ lib.optionals (cudaSupport || rocmSupport) [ effectiveMagma ] + ++ lib.optionals (effectiveMagma != null && (cudaSupport || rocmSupport)) [ effectiveMagma ] ++ lib.optionals stdenv.hostPlatform.isLinux [ numactl ] ++ lib.optionals stdenv.hostPlatform.isDarwin [ apple-sdk_13 diff --git a/pkgs/development/rocm-modules/6/aotriton/default.nix b/pkgs/development/rocm-modules/6/aotriton/default.nix index 45d51afaf01e08..67935c3396b737 100644 --- a/pkgs/development/rocm-modules/6/aotriton/default.nix +++ b/pkgs/development/rocm-modules/6/aotriton/default.nix @@ -33,7 +33,13 @@ buildTests ? false, buildBenchmarks ? false, buildSamples ? false, - gpuTargets ? [ "gfx908" ], # [ ] + gpuTargets ? [ + "gfx908" + "gfx90a" + "gfx942" + "gfx1030" + "gfx1100" + ], # [ ] }: stdenv.mkDerivation ( @@ -60,7 +66,7 @@ stdenv.mkDerivation ( # cd $out # tar xf ${cudaPackages.cuda_cudart.src} --strip-components=1 # ''; - cudaRtIncludes = cudaPackages.cudatoolkit; + #cudaRtIncludes = cudaPackages.cuda_cudart; triton-llvm' = builtins.trace "aotriton: TODO: confirm using same triton-llvm pinned hash as triton 3.2.x is ok" triton-llvm; in # triton-llvm' = triton-llvm.overrideAttrs (_old: { @@ -86,8 +92,6 @@ stdenv.mkDerivation ( }; env.CXX = compiler; env.ROCM_PATH = "${clr}"; - env.NIX_CC_USE_RESPONSE_FILE = 0; - env.NIX_DISABLE_WRAPPER_INCLUDES = 1; requiredSystemFeatures = [ "big-parallel" ]; outputs = @@ -142,7 +146,7 @@ stdenv.mkDerivation ( xz nlohmann_json rocmlir - cudaRtIncludes + #cudaRtIncludes # Tensile deps - not optional, building without tensile isn't actually supported msgpack # FIXME: not included in cmake! @@ -164,15 +168,16 @@ stdenv.mkDerivation ( env.JSON_SYSPATH = nlohmann_json; env.MLIR_DIR = "${triton-llvm'}/lib/cmake/mlir"; # build time dep for header only, only needs source. - env.TRITON_CUDACRT_PATH = cudaRtIncludes; - env.TRITON_CUDART_PATH = cudaRtIncludes; - env.CXXFLAGS = "-I/build/source/third_party/triton/third_party/nvidia/backend/include -I${cudaRtIncludes}/include"; + # env.TRITON_CUDACRT_PATH = cudaRtIncludes; + # env.TRITON_CUDART_PATH = cudaRtIncludes; + env.CXXFLAGS = "-I/build/source/third_party/triton/third_party/nvidia/backend/include"; # env.NOIMAGE_MODE = 1; # Fix up header issues in triton: https://github.com/triton-lang/triton/pull/3985/files preConfigure = '' mkdir third_party/triton/third_party/nvidia/backend/include/ - cp ${cudaRtIncludes}/include/*.h third_party/triton/third_party/nvidia/backend/include/ + touch third_party/triton/third_party/nvidia/backend/include/cuda.h + #cp ''${cudaRtIncludes}/include/*.h third_party/triton/third_party/nvidia/backend/include/ find third_party/triton -type f -exec sed -i 's|[<]cupti.h[>]|"cupti.h"|g' {} + find third_party/triton -type f -exec sed -i 's|[<]cuda.h[>]|"cuda.h"|g' {} + grep -ir cuda.h third_party/triton diff --git a/pkgs/development/rocm-modules/6/composable_kernel/default.nix b/pkgs/development/rocm-modules/6/composable_kernel/default.nix index 06940d03ab2776..faf4450ff3a4ba 100644 --- a/pkgs/development/rocm-modules/6/composable_kernel/default.nix +++ b/pkgs/development/rocm-modules/6/composable_kernel/default.nix @@ -59,6 +59,7 @@ stdenv.mkDerivation (finalAttrs: { clr hipify ninja + zstd ]; buildInputs = [ diff --git a/pkgs/development/rocm-modules/6/default.nix b/pkgs/development/rocm-modules/6/default.nix index 300b3de636d328..35db8428e4692b 100644 --- a/pkgs/development/rocm-modules/6/default.nix +++ b/pkgs/development/rocm-modules/6/default.nix @@ -14,6 +14,7 @@ emptyDirectory, cudaPackages, triton-llvm, + openmpi, }: lib.makeScope newScope ( @@ -21,6 +22,7 @@ lib.makeScope newScope ( let pyPackages = python3Packages; libffiorig = libffi; + openmpi-orig = openmpi; in with self; { @@ -556,17 +558,29 @@ lib.makeScope newScope ( useCPU = true; }; + openmpi = openmpi-orig.override (prev: { + ucx = prev.ucx.override { + enableCuda = false; + enableRocm = true; + }; + }); + mpi = self.openmpi; + triton-llvm = - builtins.trace "FIXME: triton-rocm needs ANOTHER different LLVM build" triton-llvm.overrideAttrs + builtins.trace "FIXME: triton-rocm needs ANOTHER different LLVM build" + (triton-llvm.override { + buildTests = false; # FIXME: why are tests failing? + }).overrideAttrs { src = fetchFromGitHub { owner = "llvm"; repo = "llvm-project"; # make sure this matches triton llvm rel branch hash for now # https://github.com/triton-lang/triton/blob/release/3.2.x/cmake/llvm-hash.txt - rev = "b5cc222d7429fe6f18c787f633d5262fac2e676f"; - hash = "sha256-iH5OBwtmJLHao2PhxKT8w+vGlFE0D2R/ry8j9nZs+TQ="; + rev = "86b69c31642e98f8357df62c09d118ad1da4e16a"; + hash = "sha256-W/mQwaLGx6/rIBjdzUTIbWrvGjdh7m4s15f70fQ1/hE="; }; + pname = "triton-llvm-rocm"; patches = [ ]; # FIXME: https://github.com/llvm/llvm-project//commit/84837e3cc1cf17ed71580e3ea38299ed2bfaa5f6.patch doesn't apply, may need to rebase }; @@ -579,30 +593,38 @@ lib.makeScope newScope ( llvm = self.triton-llvm; })).overridePythonAttrs (old: { - + doCheck = false; stdenv = self.llvm.rocmClangStdenv; version = "3.2.0"; src = fetchFromGitHub { owner = "triton-lang"; repo = "triton"; - rev = "release/3.2.x"; - hash = "sha256-cC2eARYcmZqLrzwlmMi92xkEqpGMn2d9IndZQBoGE7Q="; + rev = "64b80f0916b69e3c4d0682a2368fd126e57891ab"; # "release/3.2.x"; + hash = "sha256-xQOgMLHruVrI/9FtY3TvZKALitMOfqZ69uOyrYhXhu8="; }; buildInputs = old.buildInputs ++ [ self.clr ]; dontStrip = true; env = old.env // { - CXXFLAGS = "-gz -g1 -O3 -I${self.clr}/include -I/build/source/third_party/triton/third_party/nvidia/backend/include -I${cudaPackages.cudatoolkit}/include"; + CXXFLAGS = "-gz -g1 -O3 -I${self.clr}/include -I/build/source/third_party/triton/third_party/nvidia/backend/include"; + TRITON_OFFLINE_BUILD = 1; }; - # TRITON_BUILD_PROTON = "OFF"; # disable profiler, instead of --replace-fail 'packages += ["triton/profiler"]' ""\ patches = [ ]; postPatch = '' # Need an empty cuda.h to happily compile for ROCm + mkdir -p third_party/nvidia/include/ third_party/nvidia/include/backend/include/ echo "" > third_party/nvidia/include/cuda.h - - mkdir third_party/nvidia/backend/include/ - cp ${cudaPackages.cudatoolkit}/include/*.h third_party/nvidia/backend/include/ + touch third_party/nvidia/include/backend/include/{cuda,driver_types}.h + rm -rf third_party/nvidia + substituteInPlace CMakeLists.txt \ + --replace-fail "add_subdirectory(test)" "" + sed -i '/nvidia\|NVGPU\|registerConvertTritonGPUToLLVMPass\|mlir::test::/Id' bin/RegisterTritonDialects.h + sed -i '/TritonTestAnalysis/Id' bin/CMakeLists.txt + substituteInPlace python/setup.py \ + --replace-fail 'backends = [*BackendInstaller.copy(["nvidia", "amd"]), *BackendInstaller.copy_externals()]' \ + 'backends = [*BackendInstaller.copy(["amd"]), *BackendInstaller.copy_externals()]' + #cp ''${cudaPackages.cuda_cudart}/include/*.h third_party/nvidia/backend/include/ find . -type f -exec sed -i 's|[<]cupti.h[>]|"cupti.h"|g' {} + find . -type f -exec sed -i 's|[<]cuda.h[>]|"cuda.h"|g' {} + diff --git a/pkgs/development/rocm-modules/6/hipblas/default.nix b/pkgs/development/rocm-modules/6/hipblas/default.nix index 3f986b6d04b91e..b244d4934fdc60 100644 --- a/pkgs/development/rocm-modules/6/hipblas/default.nix +++ b/pkgs/development/rocm-modules/6/hipblas/default.nix @@ -24,8 +24,6 @@ stdenv.mkDerivation (finalAttrs: { pname = "hipblas"; version = "6.3.1"; - env.NIX_DEBUG = 1; - env.NIX_DISABLE_WRAPPER_INCLUDES = 1; outputs = [ diff --git a/pkgs/development/rocm-modules/6/hipblaslt/default.nix b/pkgs/development/rocm-modules/6/hipblaslt/default.nix index a00ad3bb269ce7..978a8592a8b4dc 100644 --- a/pkgs/development/rocm-modules/6/hipblaslt/default.nix +++ b/pkgs/development/rocm-modules/6/hipblaslt/default.nix @@ -25,7 +25,16 @@ buildTests ? false, buildBenchmarks ? false, buildSamples ? false, - gpuTargets ? [ "gfx908" ], # [ ] + # hipblaslt supports only devices with MFMA or WMMA + # WMMA on gfx1100 may be broken + # MFMA on MI100 may be broken + # MI200/MI300 known to work + gpuTargets ? [ + "gfx908" + "gfx90a" + "gfx942" + "gfx1100" + ], }: stdenv.mkDerivation ( @@ -62,14 +71,11 @@ stdenv.mkDerivation ( env.CXXFLAGS = cFlags; env.ROCM_PATH = "${clr}"; env.TENSILE_ROCM_ASSEMBLER_PATH = "${clang-sysrooted}/bin/clang++"; - env.NIX_CC_USE_RESPONSE_FILE = 0; - env.NIX_DISABLE_WRAPPER_INCLUDES = 1; env.TENSILE_GEN_ASSEMBLY_TOOLCHAIN = "${clang-sysrooted}/bin/clang++"; requiredSystemFeatures = [ "big-parallel" ]; patches = [ ./ext-op-first.diff - # ./alpha_1_init_fix.patch # libcxx bug workaround - ]; outputs = diff --git a/pkgs/development/rocm-modules/6/miopen/default.nix b/pkgs/development/rocm-modules/6/miopen/default.nix index 7cf9455a423b7a..fe0ee8adadefdd 100644 --- a/pkgs/development/rocm-modules/6/miopen/default.nix +++ b/pkgs/development/rocm-modules/6/miopen/default.nix @@ -39,6 +39,8 @@ "gfx908" "gfx90a" "gfx942" + "gfx1030" + "gfx1100" ], # clr.gpuTargets buildDocs ? false, # Needs internet because of rocm-docs-core buildTests ? false, diff --git a/pkgs/development/rocm-modules/6/mscclpp/default.nix b/pkgs/development/rocm-modules/6/mscclpp/default.nix index f734fda8c4f426..e1ac56b7683923 100644 --- a/pkgs/development/rocm-modules/6/mscclpp/default.nix +++ b/pkgs/development/rocm-modules/6/mscclpp/default.nix @@ -15,26 +15,22 @@ stdenv.mkDerivation { buildInputs = [ clr numactl - #nlohmann_json - #python3Packages.nanobind ]; postPatch = '' substituteInPlace CMakeLists.txt \ --replace-fail "gfx90a gfx941 gfx942" "gfx908 gfx90a gfx942 gfx1030 gfx1100" ''; cmakeFlags = [ - #"--trace" "-DMSCCLPP_BYPASS_GPU_CHECK=ON" "-DMSCCLPP_USE_ROCM=ON" "-DMSCCLPP_BUILD_TESTS=OFF" + "-DGPU_TARGETS=gfx908;gfx90a;gfx942;gfx1030;gfx1100" "-DAMDGPU_TARGETS=gfx908;gfx90a;gfx942;gfx1030;gfx1100" "-DMSCCLPP_BUILD_APPS_NCCL=ON" "-DMSCCLPP_BUILD_PYTHON_BINDINGS=OFF" "-DFETCHCONTENT_QUIET=OFF" "-DFETCHCONTENT_TRY_FIND_PACKAGE_MODE=ALWAYS" - #"-DFETCHCONTENT_SOURCE_DIR_NANOBIND=${nanobind_src}" "-DFETCHCONTENT_SOURCE_DIR_JSON=${nlohmann_json.src}" - #"-DFETCHCONTENT_FULLY_DISCONNECTED=ON" ]; env.ROCM_PATH = clr; src = fetchFromGitHub { diff --git a/pkgs/development/rocm-modules/6/rccl/default.nix b/pkgs/development/rocm-modules/6/rccl/default.nix index f9b3e542aa81a4..f3087377d90249 100644 --- a/pkgs/development/rocm-modules/6/rccl/default.nix +++ b/pkgs/development/rocm-modules/6/rccl/default.nix @@ -84,7 +84,7 @@ stdenv.mkDerivation (finalAttrs: { "-DROCM_PATH=${clr}" "-DHIP_COMPILER=${clr}/bin/amdclang++" "-DCMAKE_CXX_COMPILER=${clr}/bin/amdclang++" - "-DROCM_PATCH_VERSION=${rocm-core.ROCM_LIBPATCH_VERSION}" # FIXME: get from versin + "-DROCM_PATCH_VERSION=${rocm-core.ROCM_LIBPATCH_VERSION}" "-DROCM_VERSION=${rocm-core.ROCM_LIBPATCH_VERSION}" "-DBUILD_BFD=OFF" # Can't get it to detect bfd.h "-DENABLE_MSCCL_KERNEL=ON" @@ -107,8 +107,8 @@ stdenv.mkDerivation (finalAttrs: { makeFlags = [ "-l32" ]; env.CCC_OVERRIDE_OPTIONS = "+-parallel-jobs=6"; - env.CFLAGS = "-I${clr}/include -O2 -fno-strict-aliasing -gz -g1 ${san}-fno-omit-frame-pointer -momit-leaf-frame-pointer -DROCM_VERSION=60300"; - env.CXXFLAGS = "-I${clr}/include -O2 -fno-strict-aliasing -gz -g1 ${san}-fno-omit-frame-pointer -momit-leaf-frame-pointer -DROCM_VERSION=60300"; + env.CFLAGS = "-I${clr}/include -O2 -fno-strict-aliasing -gz -g1 ${san}-fno-omit-frame-pointer -momit-leaf-frame-pointer"; + env.CXXFLAGS = "-I${clr}/include -O2 -fno-strict-aliasing -gz -g1 ${san}-fno-omit-frame-pointer -momit-leaf-frame-pointer"; env.LDFLAGS = "${san}"; postPatch = '' patchShebangs src tools diff --git a/pkgs/development/rocm-modules/6/rocblas/default.nix b/pkgs/development/rocm-modules/6/rocblas/default.nix index ee5fbc79b686cb..da2ba1c77d2ced 100644 --- a/pkgs/development/rocm-modules/6/rocblas/default.nix +++ b/pkgs/development/rocm-modules/6/rocblas/default.nix @@ -25,8 +25,6 @@ buildTensile ? true, buildTests ? true, buildBenchmarks ? true, - #, tensileLogic ? "asm_full" - tensileCOVersion ? "default", # https://github.com/ROCm/Tensile/issues/1757 # Allows gfx101* users to use rocBLAS normally. # Turn the below two values to `true` after the fix has been cherry-picked @@ -42,16 +40,18 @@ # would force all `gfx101*` GPUs to run as `gfx1010`, so `gfx101*` GPUs will # always try to use `gfx1010` code objects, hence building for `gfx1012` is # useless: https://github.com/NixOS/nixpkgs/pull/298388#issuecomment-2076327152 - # , gpuTargets ? [ "gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx942;gfx1010;gfx1030;gfx1100;gfx1101;gfx1102" ] - #, gpuTargets ? [ "gfx908;gfx90a;gfx942;gfx1010;gfx1030;gfx1100;gfx1101;gfx1102" ] - #, gpuTargets ? [ "gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx1030" ] gpuTargets ? [ + "gfx900" + "gfx906" "gfx908" "gfx90a" "gfx942" + "gfx1010" "gfx1030" "gfx1100" - ], # "gfx1030" "gfx1100" ] + "gfx1101" + "gfx1102" + ], }: # FIXME: this derivation is ludicrously large, split into arch-specific derivations and symlink together? @@ -125,7 +125,6 @@ stdenv.mkDerivation (finalAttrs: { env.LDFLAGS = lib.optionalString ( buildTests || buildBenchmarks ) "-Wl,--as-needed -L${amd-blis}/lib -lblis-mt -lcblas"; - env.NIX_DISABLE_WRAPPER_INCLUDES = 1; env.TENSILE_ROCM_ASSEMBLER_PATH = "${clang-sysrooted}/bin/clang++"; cmakeFlags = @@ -156,30 +155,18 @@ stdenv.mkDerivation (finalAttrs: { "-DCMAKE_INSTALL_LIBDIR=lib" ] ++ lib.optionals buildTensile [ - #" -DCMAKE_PREFIX_PATH="${DEPS_DIR};${ROCM_PATH}" \ "-DCPACK_SET_DESTDIR=OFF" "-DLINK_BLIS=ON" "-DTensile_CODE_OBJECT_VERSION=default" "-DTensile_LOGIC=asm_full" - # "-DTensile_LOGIC=hip_lite" - #"-DTensile_SEPARATE_ARCHITECTURES=ON" - #"-DTensile_LAZY_LIBRARY_LOADING=ON" "-DTensile_LIBRARY_FORMAT=msgpack" (lib.cmakeBool "BUILD_WITH_PIP" false) - # "-DTensile_COMPILER=hipcc" - # "-DTensile_CODE_OBJECT_VERSION=V4" - # "-DTensile_LOGIC=hip_lite" - #(lib.cmakeFeature "Tensile_LOGIC" tensileLogic) - #(lib.cmakeFeature "Tensile_CODE_OBJECT_VERSION" tensileCOVersion) (lib.cmakeBool "Tensile_SEPARATE_ARCHITECTURES" tensileSepArch) (lib.cmakeBool "Tensile_LAZY_LIBRARY_LOADING" tensileLazyLib) - #(lib.cmakeBool "Tensile_PRINT_DEBUG" true) - #"-DTENSILE_GPU_ARCHS=gfx908" - #"-DTensile_VERBOSE=2" ]; preConfigure = '' - makeFlagsArray+=("-l$((NIX_BUILD_CORES / 2))") + makeFlagsArray+=("-l$(nproc)") ''; passthru.amdgpu_targets = gpuTargets'; diff --git a/pkgs/development/rocm-modules/6/rocfft/default.nix b/pkgs/development/rocm-modules/6/rocfft/default.nix index bd5eddfd6c4739..9378430176e09d 100644 --- a/pkgs/development/rocm-modules/6/rocfft/default.nix +++ b/pkgs/development/rocm-modules/6/rocfft/default.nix @@ -14,9 +14,7 @@ gtest, openmp, rocrand, - gpuTargets ? [ - "gfx908;gfx1030;gfx1100" - ], + gpuTargets ? clr.gpuTargets, }: stdenv.mkDerivation (finalAttrs: { diff --git a/pkgs/development/rocm-modules/6/rocm-core/default.nix b/pkgs/development/rocm-modules/6/rocm-core/default.nix index 13edeaa29fc1d9..e71205db3abba3 100644 --- a/pkgs/development/rocm-modules/6/rocm-core/default.nix +++ b/pkgs/development/rocm-modules/6/rocm-core/default.nix @@ -20,7 +20,7 @@ stdenv.mkDerivation (finalAttrs: { nativeBuildInputs = [ cmake ]; # FIXME: What's the correct way to set this? - env.ROCM_LIBPATCH_VERSION = "60300"; + env.ROCM_LIBPATCH_VERSION = "${lib.versions.major finalAttrs.version}0${lib.versions.minor finalAttrs.version}0${lib.versions.patch finalAttrs.version}"; env.BUILD_ID = "nixos-${finalAttrs.env.ROCM_LIBPATCH_VERSION}"; env.ROCM_BUILD_ID = "release-${finalAttrs.env.BUILD_ID}"; cmakeFlags = [ diff --git a/pkgs/development/rocm-modules/6/rocm-device-libs/default.nix b/pkgs/development/rocm-modules/6/rocm-device-libs/default.nix index d0a7c40ea9f1f3..58e3d1c75a82f0 100644 --- a/pkgs/development/rocm-modules/6/rocm-device-libs/default.nix +++ b/pkgs/development/rocm-modules/6/rocm-device-libs/default.nix @@ -49,11 +49,6 @@ stdenv.mkDerivation (finalAttrs: { dontStrip = true; env.CFLAGS = "-g1 -gz"; env.CXXFLAGS = "-g1 -gz"; - # env.NIX_DEBUG = 1; - # env.CFLAGS = "-g1 -fsanitize=undefined"; - # env.CXXFLAGS = "-g1 -fsanitize=undefined"; - # env.NIX_CFLAGS_COMPILE = "-g1"; - # env.NIX_CXXFLAGS_COMPILE = "-g1"; cmakeFlags = [ "-DCMAKE_RELEASE_TYPE=Release" diff --git a/pkgs/development/rocm-modules/6/rocsolver/default.nix b/pkgs/development/rocm-modules/6/rocsolver/default.nix index 0133d4bb40bdde..53a5d7b71c1101 100644 --- a/pkgs/development/rocm-modules/6/rocsolver/default.nix +++ b/pkgs/development/rocm-modules/6/rocsolver/default.nix @@ -15,7 +15,6 @@ lapack-reference, buildTests ? false, buildBenchmarks ? false, - #, gpuTargets ? ["gfx908:xnack-;gfx90a:xnack-;gfx90a:xnack+;gfx942;gfx1030;gfx1100;gfx1101"] gpuTargets ? [ ], }: diff --git a/pkgs/development/rocm-modules/6/rocsparse/default.nix b/pkgs/development/rocm-modules/6/rocsparse/default.nix index cc62bcbac924b4..b27380a305fca0 100644 --- a/pkgs/development/rocm-modules/6/rocsparse/default.nix +++ b/pkgs/development/rocm-modules/6/rocsparse/default.nix @@ -15,8 +15,7 @@ python3Packages, buildTests ? false, buildBenchmarks ? false, # Seems to depend on tests - #, gpuTargets ? ["gfx908:xnack-;gfx90a:xnack-;gfx90a:xnack+;gfx942;gfx1030;gfx1100;gfx1101"] - gpuTargets ? [ "gfx908;gfx1030;gfx1100" ], + gpuTargets ? clr.gpuTargets, }: stdenv.mkDerivation (finalAttrs: { diff --git a/pkgs/top-level/stage.nix b/pkgs/top-level/stage.nix index 1cedd8dd184587..6d8482e4f14058 100644 --- a/pkgs/top-level/stage.nix +++ b/pkgs/top-level/stage.nix @@ -321,6 +321,15 @@ let }; }); + # Full package set with rocm on cuda off + # Mostly useful for asserting pkgs.pkgsRocm.torchWithRocm == pkgs.torchWithRocm and similar + pkgsRocm = nixpkgsFun ({ + config = super.config // { + cudaSupport = false; + rocmSupport = true; + }; + }); + pkgsExtraHardening = nixpkgsFun { overlays = [ (self': super': {