From 2895755e7e5145a2cec04d308c40c6497581c25b Mon Sep 17 00:00:00 2001 From: Luna Nova Date: Sat, 4 Jan 2025 10:24:14 -0800 Subject: [PATCH 01/11] rocmPackages_6: 6.0.2 -> 6.3.1 --- .../rocm-modules/6/amdsmi/amd_hsmp.h | 417 +++++++ .../rocm-modules/6/amdsmi/default.nix | 79 ++ .../rocm-modules/6/aotriton/default.nix | 253 ++++ .../6/aotriton/triton-remove-distutils.patch | 68 ++ .../rocm-modules/6/clang-ocl/default.nix | 45 - ...0001-handle-v1-of-compressed-fatbins.patch | 176 +++ .../6/clr/cmake-find-x11-libgl.patch | 40 + .../rocm-modules/6/clr/default.nix | 247 ++-- .../6/clr/fix-null-stream-sync-perf.patch | 101 ++ .../6/composable_kernel/ck4inductor.nix | 43 + .../6/composable_kernel/default.nix | 123 +- .../disable-amdgpu-inline.patch | 16 + .../6/composable_kernel/unpack.nix | 15 +- pkgs/development/rocm-modules/6/default.nix | 1051 +++++++++-------- .../rocm-modules/6/half/default.nix | 22 +- .../rocm-modules/6/hip-common/default.nix | 20 +- .../rocm-modules/6/hipblas-common/default.nix | 27 + .../rocm-modules/6/hipblas/default.nix | 36 +- .../rocm-modules/6/hipblaslt/default.nix | 221 ++++ .../6/hipblaslt/ext-op-first.diff | 22 + ...ra-definition-of-hipBinUtilPtr_-in-d.patch | 39 + .../rocm-modules/6/hipcc/default.nix | 37 +- .../rocm-modules/6/hipcub/default.nix | 124 +- .../rocm-modules/6/hipfft/default.nix | 11 +- .../rocm-modules/6/hipfort/default.nix | 24 +- .../rocm-modules/6/hipify/default.nix | 47 +- .../rocm-modules/6/hiprand/default.nix | 13 +- .../rocm-modules/6/hipsolver/default.nix | 17 +- .../rocm-modules/6/hipsparse/default.nix | 27 +- .../6/hsa-amd-aqlprofile-bin/default.nix | 19 +- pkgs/development/rocm-modules/6/llvm/base.nix | 216 ---- .../clang-at-least-16-LLVMgold-path.patch | 14 + .../clang-bodge-ignore-systemwide-incls.diff | 23 + .../rocm-modules/6/llvm/clang-log-jobs.diff | 40 + .../rocm-modules/6/llvm/default.nix | 602 ++++++++-- .../6/llvm/stage-1/clang-unwrapped.nix | 48 - .../rocm-modules/6/llvm/stage-1/lld.nix | 15 - .../rocm-modules/6/llvm/stage-1/llvm.nix | 11 - .../rocm-modules/6/llvm/stage-1/runtimes.nix | 32 - .../stage-2/1000-libcxx-failing-tests.list | 175 --- .../6/llvm/stage-2/bintools-unwrapped.nix | 29 - .../6/llvm/stage-2/compiler-rt.nix | 64 - .../rocm-modules/6/llvm/stage-2/libc.nix | 27 - .../rocm-modules/6/llvm/stage-2/libcxx.nix | 43 - .../rocm-modules/6/llvm/stage-2/libcxxabi.nix | 38 - .../rocm-modules/6/llvm/stage-2/libunwind.nix | 27 - .../rocm-modules/6/llvm/stage-2/rstdenv.nix | 37 - .../stage-3/1000-openmp-failing-tests.list | 122 -- .../llvm/stage-3/1001-mlir-failing-tests.list | 11 - .../6/llvm/stage-3/clang-tools-extra.nix | 43 - .../rocm-modules/6/llvm/stage-3/clang.nix | 77 -- .../rocm-modules/6/llvm/stage-3/flang.nix | 32 - .../rocm-modules/6/llvm/stage-3/libclc.nix | 38 - .../rocm-modules/6/llvm/stage-3/lldb.nix | 40 - .../rocm-modules/6/llvm/stage-3/mlir.nix | 61 - .../rocm-modules/6/llvm/stage-3/openmp.nix | 55 - .../rocm-modules/6/llvm/stage-3/polly.nix | 19 - .../rocm-modules/6/llvm/stage-3/pstl.nix | 16 - .../rocm-modules/6/migraphx/default.nix | 10 +- .../rocm-modules/6/miopen/default.nix | 135 ++- .../rocm-modules/6/miopen/fix-isnan.patch | 31 + .../6/miopen/skip-preexisting-dbs.patch | 22 + .../rocm-modules/6/mivisionx/default.nix | 11 +- .../rocm-modules/6/mscclpp/default.nix | 42 + .../rocm-modules/6/rccl/default.nix | 78 +- .../6/rccl/enable-mscclpp-on-all-gfx9.diff | 13 + .../6/rccl/fix-mainline-support-and-ub.diff | 178 +++ .../6/rccl/rccl-test-missing-iomanip.diff | 10 + .../rocm-modules/6/rdc/default.nix | 12 +- .../rocm-modules/6/rocalution/default.nix | 19 +- .../rocm-modules/6/rocblas/default.nix | 141 ++- .../6/rocblas/offload-compress.diff | 68 ++ .../6/rocblas/offload-compress.py | 85 ++ .../rocm-modules/6/rocdbgapi/default.nix | 137 ++- .../rocm-modules/6/rocfft/default.nix | 21 +- .../rocm-modules/6/rocgdb/default.nix | 9 +- .../rocm-modules/6/rocm-cmake/default.nix | 23 +- .../rocm-modules/6/rocm-comgr/default.nix | 40 +- .../rocm-modules/6/rocm-core/default.nix | 32 +- .../6/rocm-device-libs/cmake.patch | 4 +- .../6/rocm-device-libs/default.nix | 46 +- .../rocm-modules/6/rocm-docs-core/default.nix | 45 +- .../rocm-modules/6/rocm-path/default.nix | 25 + .../rocm-modules/6/rocm-runtime/default.nix | 72 +- .../remove-hsa-aqlprofile-dep.patch | 27 + .../rocm-modules/6/rocm-runtime/ub.patch | 66 ++ .../rocm-modules/6/rocm-smi/default.nix | 23 +- .../rocm-modules/6/rocm-tests/default.nix | 31 + .../rocm-modules/6/rocm-thunk/default.nix | 54 - .../rocm-modules/6/rocminfo/default.nix | 15 +- .../rocm-modules/6/rocmlir/default.nix | 49 +- .../6/rocmlir/initparamdata-sort-const.patch | 13 + .../rocm-modules/6/rocprim/default.nix | 127 +- .../6/rocprofiler-register/default.nix | 82 ++ .../rocm-modules/6/rocprofiler/default.nix | 56 +- .../6/rocprofiler/optional-aql-in-cmake.patch | 147 +++ .../6/rocr-debug-agent/default.nix | 11 +- .../rocm-modules/6/rocrand/default.nix | 124 +- .../rocm-modules/6/rocsolver/default.nix | 45 +- .../rocm-modules/6/rocsparse/default.nix | 18 +- .../rocm-modules/6/rocthrust/default.nix | 15 +- .../rocm-modules/6/roctracer/default.nix | 11 +- .../rocm-modules/6/rocwmma/default.nix | 17 +- .../rocm-modules/6/rpp/default.nix | 18 +- .../0001-solutionstructs-perf-fix.diff | 48 + .../6/tensile/0002-msgpack-zstd.diff | 56 + .../rocm-modules/6/tensile/Parallel.py | 146 +++ .../rocm-modules/6/tensile/default.nix | 89 +- .../gen_assembly-venv-err-handling.diff | 36 + .../rocm-modules/6/tensile/log-fallback.diff | 101 ++ ...-6.3.0-create-library-dont-copy-twice.diff | 20 + ...ensile-create-library-dont-copy-twice.diff | 37 + pkgs/development/rocm-modules/6/update.nix | 40 +- 113 files changed, 5260 insertions(+), 2906 deletions(-) create mode 100644 pkgs/development/rocm-modules/6/amdsmi/amd_hsmp.h create mode 100644 pkgs/development/rocm-modules/6/amdsmi/default.nix create mode 100644 pkgs/development/rocm-modules/6/aotriton/default.nix create mode 100644 pkgs/development/rocm-modules/6/aotriton/triton-remove-distutils.patch delete mode 100644 pkgs/development/rocm-modules/6/clang-ocl/default.nix create mode 100644 pkgs/development/rocm-modules/6/clr/0001-handle-v1-of-compressed-fatbins.patch create mode 100644 pkgs/development/rocm-modules/6/clr/cmake-find-x11-libgl.patch create mode 100644 pkgs/development/rocm-modules/6/clr/fix-null-stream-sync-perf.patch create mode 100644 pkgs/development/rocm-modules/6/composable_kernel/ck4inductor.nix create mode 100644 pkgs/development/rocm-modules/6/composable_kernel/disable-amdgpu-inline.patch create mode 100644 pkgs/development/rocm-modules/6/hipblas-common/default.nix create mode 100644 pkgs/development/rocm-modules/6/hipblaslt/default.nix create mode 100644 pkgs/development/rocm-modules/6/hipblaslt/ext-op-first.diff create mode 100644 pkgs/development/rocm-modules/6/hipcc/0001-hipcc-Remove-extra-definition-of-hipBinUtilPtr_-in-d.patch delete mode 100644 pkgs/development/rocm-modules/6/llvm/base.nix create mode 100644 pkgs/development/rocm-modules/6/llvm/clang-at-least-16-LLVMgold-path.patch create mode 100644 pkgs/development/rocm-modules/6/llvm/clang-bodge-ignore-systemwide-incls.diff create mode 100644 pkgs/development/rocm-modules/6/llvm/clang-log-jobs.diff delete mode 100644 pkgs/development/rocm-modules/6/llvm/stage-1/clang-unwrapped.nix delete mode 100644 pkgs/development/rocm-modules/6/llvm/stage-1/lld.nix delete mode 100644 pkgs/development/rocm-modules/6/llvm/stage-1/llvm.nix delete mode 100644 pkgs/development/rocm-modules/6/llvm/stage-1/runtimes.nix delete mode 100644 pkgs/development/rocm-modules/6/llvm/stage-2/1000-libcxx-failing-tests.list delete mode 100644 pkgs/development/rocm-modules/6/llvm/stage-2/bintools-unwrapped.nix delete mode 100644 pkgs/development/rocm-modules/6/llvm/stage-2/compiler-rt.nix delete mode 100644 pkgs/development/rocm-modules/6/llvm/stage-2/libc.nix delete mode 100644 pkgs/development/rocm-modules/6/llvm/stage-2/libcxx.nix delete mode 100644 pkgs/development/rocm-modules/6/llvm/stage-2/libcxxabi.nix delete mode 100644 pkgs/development/rocm-modules/6/llvm/stage-2/libunwind.nix delete mode 100644 pkgs/development/rocm-modules/6/llvm/stage-2/rstdenv.nix delete mode 100644 pkgs/development/rocm-modules/6/llvm/stage-3/1000-openmp-failing-tests.list delete mode 100644 pkgs/development/rocm-modules/6/llvm/stage-3/1001-mlir-failing-tests.list delete mode 100644 pkgs/development/rocm-modules/6/llvm/stage-3/clang-tools-extra.nix delete mode 100644 pkgs/development/rocm-modules/6/llvm/stage-3/clang.nix delete mode 100644 pkgs/development/rocm-modules/6/llvm/stage-3/flang.nix delete mode 100644 pkgs/development/rocm-modules/6/llvm/stage-3/libclc.nix delete mode 100644 pkgs/development/rocm-modules/6/llvm/stage-3/lldb.nix delete mode 100644 pkgs/development/rocm-modules/6/llvm/stage-3/mlir.nix delete mode 100644 pkgs/development/rocm-modules/6/llvm/stage-3/openmp.nix delete mode 100644 pkgs/development/rocm-modules/6/llvm/stage-3/polly.nix delete mode 100644 pkgs/development/rocm-modules/6/llvm/stage-3/pstl.nix create mode 100644 pkgs/development/rocm-modules/6/miopen/fix-isnan.patch create mode 100644 pkgs/development/rocm-modules/6/miopen/skip-preexisting-dbs.patch create mode 100644 pkgs/development/rocm-modules/6/mscclpp/default.nix create mode 100644 pkgs/development/rocm-modules/6/rccl/enable-mscclpp-on-all-gfx9.diff create mode 100644 pkgs/development/rocm-modules/6/rccl/fix-mainline-support-and-ub.diff create mode 100644 pkgs/development/rocm-modules/6/rccl/rccl-test-missing-iomanip.diff create mode 100644 pkgs/development/rocm-modules/6/rocblas/offload-compress.diff create mode 100644 pkgs/development/rocm-modules/6/rocblas/offload-compress.py create mode 100644 pkgs/development/rocm-modules/6/rocm-path/default.nix create mode 100644 pkgs/development/rocm-modules/6/rocm-runtime/remove-hsa-aqlprofile-dep.patch create mode 100644 pkgs/development/rocm-modules/6/rocm-runtime/ub.patch create mode 100644 pkgs/development/rocm-modules/6/rocm-tests/default.nix delete mode 100644 pkgs/development/rocm-modules/6/rocm-thunk/default.nix create mode 100644 pkgs/development/rocm-modules/6/rocmlir/initparamdata-sort-const.patch create mode 100644 pkgs/development/rocm-modules/6/rocprofiler-register/default.nix create mode 100644 pkgs/development/rocm-modules/6/rocprofiler/optional-aql-in-cmake.patch create mode 100644 pkgs/development/rocm-modules/6/tensile/0001-solutionstructs-perf-fix.diff create mode 100644 pkgs/development/rocm-modules/6/tensile/0002-msgpack-zstd.diff create mode 100644 pkgs/development/rocm-modules/6/tensile/Parallel.py create mode 100644 pkgs/development/rocm-modules/6/tensile/gen_assembly-venv-err-handling.diff create mode 100644 pkgs/development/rocm-modules/6/tensile/log-fallback.diff create mode 100644 pkgs/development/rocm-modules/6/tensile/tensile-6.3.0-create-library-dont-copy-twice.diff create mode 100644 pkgs/development/rocm-modules/6/tensile/tensile-create-library-dont-copy-twice.diff diff --git a/pkgs/development/rocm-modules/6/amdsmi/amd_hsmp.h b/pkgs/development/rocm-modules/6/amdsmi/amd_hsmp.h new file mode 100644 index 0000000000000..b027cec2ade05 --- /dev/null +++ b/pkgs/development/rocm-modules/6/amdsmi/amd_hsmp.h @@ -0,0 +1,417 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + +#ifndef _UAPI_ASM_X86_AMD_HSMP_H_ +#define _UAPI_ASM_X86_AMD_HSMP_H_ + +#include + +#pragma pack(4) + +#define HSMP_MAX_MSG_LEN 8 + +/* + * HSMP Messages supported + */ +enum hsmp_message_ids { + HSMP_TEST = 1, /* 01h Increments input value by 1 */ + HSMP_GET_SMU_VER, /* 02h SMU FW version */ + HSMP_GET_PROTO_VER, /* 03h HSMP interface version */ + HSMP_GET_SOCKET_POWER, /* 04h average package power consumption */ + HSMP_SET_SOCKET_POWER_LIMIT, /* 05h Set the socket power limit */ + HSMP_GET_SOCKET_POWER_LIMIT, /* 06h Get current socket power limit */ + HSMP_GET_SOCKET_POWER_LIMIT_MAX,/* 07h Get maximum socket power value */ + HSMP_SET_BOOST_LIMIT, /* 08h Set a core maximum frequency limit */ + HSMP_SET_BOOST_LIMIT_SOCKET, /* 09h Set socket maximum frequency level */ + HSMP_GET_BOOST_LIMIT, /* 0Ah Get current frequency limit */ + HSMP_GET_PROC_HOT, /* 0Bh Get PROCHOT status */ + HSMP_SET_XGMI_LINK_WIDTH, /* 0Ch Set max and min width of xGMI Link */ + HSMP_SET_DF_PSTATE, /* 0Dh Alter APEnable/Disable messages behavior */ + HSMP_SET_AUTO_DF_PSTATE, /* 0Eh Enable DF P-State Performance Boost algorithm */ + HSMP_GET_FCLK_MCLK, /* 0Fh Get FCLK and MEMCLK for current socket */ + HSMP_GET_CCLK_THROTTLE_LIMIT, /* 10h Get CCLK frequency limit in socket */ + HSMP_GET_C0_PERCENT, /* 11h Get average C0 residency in socket */ + HSMP_SET_NBIO_DPM_LEVEL, /* 12h Set max/min LCLK DPM Level for a given NBIO */ + HSMP_GET_NBIO_DPM_LEVEL, /* 13h Get LCLK DPM level min and max for a given NBIO */ + HSMP_GET_DDR_BANDWIDTH, /* 14h Get theoretical maximum and current DDR Bandwidth */ + HSMP_GET_TEMP_MONITOR, /* 15h Get socket temperature */ + HSMP_GET_DIMM_TEMP_RANGE, /* 16h Get per-DIMM temperature range and refresh rate */ + HSMP_GET_DIMM_POWER, /* 17h Get per-DIMM power consumption */ + HSMP_GET_DIMM_THERMAL, /* 18h Get per-DIMM thermal sensors */ + HSMP_GET_SOCKET_FREQ_LIMIT, /* 19h Get current active frequency per socket */ + HSMP_GET_CCLK_CORE_LIMIT, /* 1Ah Get CCLK frequency limit per core */ + HSMP_GET_RAILS_SVI, /* 1Bh Get SVI-based Telemetry for all rails */ + HSMP_GET_SOCKET_FMAX_FMIN,/* 1Ch Get Fmax and Fmin per socket */ + HSMP_GET_IOLINK_BANDWITH, /* 1Dh Get current bandwidth on IO Link */ + HSMP_GET_XGMI_BANDWITH, /* 1Eh Get current bandwidth on xGMI Link */ + HSMP_SET_GMI3_WIDTH, /* 1Fh Set max and min GMI3 Link width */ + HSMP_SET_PCI_RATE, /* 20h Control link rate on PCIe devices */ + HSMP_SET_POWER_MODE, /* 21h Select power efficiency profile policy */ + HSMP_SET_PSTATE_MAX_MIN, /* 22h Set the max and min DF P-State */ + HSMP_GET_METRIC_TABLE_VER,/* 23h Get metrics table version */ + HSMP_GET_METRIC_TABLE, /* 24h Get metrics table */ + HSMP_GET_METRIC_TABLE_DRAM_ADDR,/* 25h Get metrics table dram address */ + HSMP_MSG_ID_MAX, +}; + +struct hsmp_message { + __u32 msg_id; /* Message ID */ + __u16 num_args; /* Number of input argument words in message */ + __u16 response_sz; /* Number of expected output/response words */ + __u32 args[HSMP_MAX_MSG_LEN]; /* argument/response buffer */ + __u16 sock_ind; /* socket number */ +}; + +enum hsmp_msg_type { + HSMP_RSVD = -1, + HSMP_SET = 0, + HSMP_GET = 1, +}; + +enum hsmp_proto_versions { + HSMP_PROTO_VER2 = 2, + HSMP_PROTO_VER3, + HSMP_PROTO_VER4, + HSMP_PROTO_VER5, + HSMP_PROTO_VER6 +}; + +struct hsmp_msg_desc { + int num_args; + int response_sz; + enum hsmp_msg_type type; +}; + +/* + * User may use these comments as reference, please find the + * supported list of messages and message definition in the + * HSMP chapter of respective family/model PPR. + * + * Not supported messages would return -ENOMSG. + */ +static const struct hsmp_msg_desc hsmp_msg_desc_table[] + __attribute__((unused)) = { + /* RESERVED */ + {0, 0, HSMP_RSVD}, + + /* + * HSMP_TEST, num_args = 1, response_sz = 1 + * input: args[0] = xx + * output: args[0] = xx + 1 + */ + {1, 1, HSMP_GET}, + + /* + * HSMP_GET_SMU_VER, num_args = 0, response_sz = 1 + * output: args[0] = smu fw ver + */ + {0, 1, HSMP_GET}, + + /* + * HSMP_GET_PROTO_VER, num_args = 0, response_sz = 1 + * output: args[0] = proto version + */ + {0, 1, HSMP_GET}, + + /* + * HSMP_GET_SOCKET_POWER, num_args = 0, response_sz = 1 + * output: args[0] = socket power in mWatts + */ + {0, 1, HSMP_GET}, + + /* + * HSMP_SET_SOCKET_POWER_LIMIT, num_args = 1, response_sz = 0 + * input: args[0] = power limit value in mWatts + */ + {1, 0, HSMP_SET}, + + /* + * HSMP_GET_SOCKET_POWER_LIMIT, num_args = 0, response_sz = 1 + * output: args[0] = socket power limit value in mWatts + */ + {0, 1, HSMP_GET}, + + /* + * HSMP_GET_SOCKET_POWER_LIMIT_MAX, num_args = 0, response_sz = 1 + * output: args[0] = maximuam socket power limit in mWatts + */ + {0, 1, HSMP_GET}, + + /* + * HSMP_SET_BOOST_LIMIT, num_args = 1, response_sz = 0 + * input: args[0] = apic id[31:16] + boost limit value in MHz[15:0] + */ + {1, 0, HSMP_SET}, + + /* + * HSMP_SET_BOOST_LIMIT_SOCKET, num_args = 1, response_sz = 0 + * input: args[0] = boost limit value in MHz + */ + {1, 0, HSMP_SET}, + + /* + * HSMP_GET_BOOST_LIMIT, num_args = 1, response_sz = 1 + * input: args[0] = apic id + * output: args[0] = boost limit value in MHz + */ + {1, 1, HSMP_GET}, + + /* + * HSMP_GET_PROC_HOT, num_args = 0, response_sz = 1 + * output: args[0] = proc hot status + */ + {0, 1, HSMP_GET}, + + /* + * HSMP_SET_XGMI_LINK_WIDTH, num_args = 1, response_sz = 0 + * input: args[0] = min link width[15:8] + max link width[7:0] + */ + {1, 0, HSMP_SET}, + + /* + * HSMP_SET_DF_PSTATE, num_args = 1, response_sz = 0 + * input: args[0] = df pstate[7:0] + */ + {1, 0, HSMP_SET}, + + /* HSMP_SET_AUTO_DF_PSTATE, num_args = 0, response_sz = 0 */ + {0, 0, HSMP_SET}, + + /* + * HSMP_GET_FCLK_MCLK, num_args = 0, response_sz = 2 + * output: args[0] = fclk in MHz, args[1] = mclk in MHz + */ + {0, 2, HSMP_GET}, + + /* + * HSMP_GET_CCLK_THROTTLE_LIMIT, num_args = 0, response_sz = 1 + * output: args[0] = core clock in MHz + */ + {0, 1, HSMP_GET}, + + /* + * HSMP_GET_C0_PERCENT, num_args = 0, response_sz = 1 + * output: args[0] = average c0 residency + */ + {0, 1, HSMP_GET}, + + /* + * HSMP_SET_NBIO_DPM_LEVEL, num_args = 1, response_sz = 0 + * input: args[0] = nbioid[23:16] + max dpm level[15:8] + min dpm level[7:0] + */ + {1, 0, HSMP_SET}, + + /* + * HSMP_GET_NBIO_DPM_LEVEL, num_args = 1, response_sz = 1 + * input: args[0] = nbioid[23:16] + * output: args[0] = max dpm level[15:8] + min dpm level[7:0] + */ + {1, 1, HSMP_GET}, + + /* + * HSMP_GET_DDR_BANDWIDTH, num_args = 0, response_sz = 1 + * output: args[0] = max bw in Gbps[31:20] + utilised bw in Gbps[19:8] + + * bw in percentage[7:0] + */ + {0, 1, HSMP_GET}, + + /* + * HSMP_GET_TEMP_MONITOR, num_args = 0, response_sz = 1 + * output: args[0] = temperature in degree celsius. [15:8] integer part + + * [7:5] fractional part + */ + {0, 1, HSMP_GET}, + + /* + * HSMP_GET_DIMM_TEMP_RANGE, num_args = 1, response_sz = 1 + * input: args[0] = DIMM address[7:0] + * output: args[0] = refresh rate[3] + temperature range[2:0] + */ + {1, 1, HSMP_GET}, + + /* + * HSMP_GET_DIMM_POWER, num_args = 1, response_sz = 1 + * input: args[0] = DIMM address[7:0] + * output: args[0] = DIMM power in mW[31:17] + update rate in ms[16:8] + + * DIMM address[7:0] + */ + {1, 1, HSMP_GET}, + + /* + * HSMP_GET_DIMM_THERMAL, num_args = 1, response_sz = 1 + * input: args[0] = DIMM address[7:0] + * output: args[0] = temperature in degree celsius[31:21] + update rate in ms[16:8] + + * DIMM address[7:0] + */ + {1, 1, HSMP_GET}, + + /* + * HSMP_GET_SOCKET_FREQ_LIMIT, num_args = 0, response_sz = 1 + * output: args[0] = frequency in MHz[31:16] + frequency source[15:0] + */ + {0, 1, HSMP_GET}, + + /* + * HSMP_GET_CCLK_CORE_LIMIT, num_args = 1, response_sz = 1 + * input: args[0] = apic id [31:0] + * output: args[0] = frequency in MHz[31:0] + */ + {1, 1, HSMP_GET}, + + /* + * HSMP_GET_RAILS_SVI, num_args = 0, response_sz = 1 + * output: args[0] = power in mW[31:0] + */ + {0, 1, HSMP_GET}, + + /* + * HSMP_GET_SOCKET_FMAX_FMIN, num_args = 0, response_sz = 1 + * output: args[0] = fmax in MHz[31:16] + fmin in MHz[15:0] + */ + {0, 1, HSMP_GET}, + + /* + * HSMP_GET_IOLINK_BANDWITH, num_args = 1, response_sz = 1 + * input: args[0] = link id[15:8] + bw type[2:0] + * output: args[0] = io bandwidth in Mbps[31:0] + */ + {1, 1, HSMP_GET}, + + /* + * HSMP_GET_XGMI_BANDWITH, num_args = 1, response_sz = 1 + * input: args[0] = link id[15:8] + bw type[2:0] + * output: args[0] = xgmi bandwidth in Mbps[31:0] + */ + {1, 1, HSMP_GET}, + + /* + * HSMP_SET_GMI3_WIDTH, num_args = 1, response_sz = 0 + * input: args[0] = min link width[15:8] + max link width[7:0] + */ + {1, 0, HSMP_SET}, + + /* + * HSMP_SET_PCI_RATE, num_args = 1, response_sz = 1 + * input: args[0] = link rate control value + * output: args[0] = previous link rate control value + */ + {1, 1, HSMP_SET}, + + /* + * HSMP_SET_POWER_MODE, num_args = 1, response_sz = 0 + * input: args[0] = power efficiency mode[2:0] + */ + {1, 0, HSMP_SET}, + + /* + * HSMP_SET_PSTATE_MAX_MIN, num_args = 1, response_sz = 0 + * input: args[0] = min df pstate[15:8] + max df pstate[7:0] + */ + {1, 0, HSMP_SET}, + + /* + * HSMP_GET_METRIC_TABLE_VER, num_args = 0, response_sz = 1 + * output: args[0] = metrics table version + */ + {0, 1, HSMP_GET}, + + /* + * HSMP_GET_METRIC_TABLE, num_args = 0, response_sz = 0 + */ + {0, 0, HSMP_GET}, + + /* + * HSMP_GET_METRIC_TABLE_DRAM_ADDR, num_args = 0, response_sz = 2 + * output: args[0] = lower 32 bits of the address + * output: args[1] = upper 32 bits of the address + */ + {0, 2, HSMP_GET}, +}; + +/* Metrics table (supported only with proto version 6) */ +struct hsmp_metric_table { + __u32 accumulation_counter; + + /* TEMPERATURE */ + __u32 max_socket_temperature; + __u32 max_vr_temperature; + __u32 max_hbm_temperature; + __u64 max_socket_temperature_acc; + __u64 max_vr_temperature_acc; + __u64 max_hbm_temperature_acc; + + /* POWER */ + __u32 socket_power_limit; + __u32 max_socket_power_limit; + __u32 socket_power; + + /* ENERGY */ + __u64 timestamp; + __u64 socket_energy_acc; + __u64 ccd_energy_acc; + __u64 xcd_energy_acc; + __u64 aid_energy_acc; + __u64 hbm_energy_acc; + + /* FREQUENCY */ + __u32 cclk_frequency_limit; + __u32 gfxclk_frequency_limit; + __u32 fclk_frequency; + __u32 uclk_frequency; + __u32 socclk_frequency[4]; + __u32 vclk_frequency[4]; + __u32 dclk_frequency[4]; + __u32 lclk_frequency[4]; + __u64 gfxclk_frequency_acc[8]; + __u64 cclk_frequency_acc[96]; + + /* FREQUENCY RANGE */ + __u32 max_cclk_frequency; + __u32 min_cclk_frequency; + __u32 max_gfxclk_frequency; + __u32 min_gfxclk_frequency; + __u32 fclk_frequency_table[4]; + __u32 uclk_frequency_table[4]; + __u32 socclk_frequency_table[4]; + __u32 vclk_frequency_table[4]; + __u32 dclk_frequency_table[4]; + __u32 lclk_frequency_table[4]; + __u32 max_lclk_dpm_range; + __u32 min_lclk_dpm_range; + + /* XGMI */ + __u32 xgmi_width; + __u32 xgmi_bitrate; + __u64 xgmi_read_bandwidth_acc[8]; + __u64 xgmi_write_bandwidth_acc[8]; + + /* ACTIVITY */ + __u32 socket_c0_residency; + __u32 socket_gfx_busy; + __u32 dram_bandwidth_utilization; + __u64 socket_c0_residency_acc; + __u64 socket_gfx_busy_acc; + __u64 dram_bandwidth_acc; + __u32 max_dram_bandwidth; + __u64 dram_bandwidth_utilization_acc; + __u64 pcie_bandwidth_acc[4]; + + /* THROTTLERS */ + __u32 prochot_residency_acc; + __u32 ppt_residency_acc; + __u32 socket_thm_residency_acc; + __u32 vr_thm_residency_acc; + __u32 hbm_thm_residency_acc; + __u32 spare; + + /* New items at the end to maintain driver compatibility */ + __u32 gfxclk_frequency[8]; +}; + +/* Reset to default packing */ +#pragma pack() + +/* Define unique ioctl command for hsmp msgs using generic _IOWR */ +#define HSMP_BASE_IOCTL_NR 0xF8 +#define HSMP_IOCTL_CMD _IOWR(HSMP_BASE_IOCTL_NR, 0, struct hsmp_message) + +#endif /*_ASM_X86_AMD_HSMP_H_*/ diff --git a/pkgs/development/rocm-modules/6/amdsmi/default.nix b/pkgs/development/rocm-modules/6/amdsmi/default.nix new file mode 100644 index 0000000000000..24ea25b3171ce --- /dev/null +++ b/pkgs/development/rocm-modules/6/amdsmi/default.nix @@ -0,0 +1,79 @@ +{ + lib, + stdenv, + fetchFromGitHub, + rocmUpdateScript, + cmake, + pkg-config, + libdrm, + wrapPython, + autoPatchelfHook, +}: + +let + esmi_ib_src = fetchFromGitHub { + owner = "amd"; + repo = "esmi_ib_library"; + rev = "esmi_pkg_ver-3.0.3"; + hash = "sha256-q0w5c5c+CpXkklmSyfzc+sbkt4cHNxscGJA3AXwvHxQ="; + }; +in +stdenv.mkDerivation (finalAttrs: { + pname = "amdsmi"; + version = "6.3.1"; + src = fetchFromGitHub { + owner = "rocm"; + repo = "amdsmi"; + rev = "rocm-${finalAttrs.version}"; + hash = "sha256-ZHr7G2/A4t3yH4S5urt1u8DZqGRcXpZUC/eavhkgPMY="; + }; + + postPatch = '' + substituteInPlace goamdsmi_shim/CMakeLists.txt \ + --replace-fail "amd_smi)" ${"'"}''${AMD_SMI_TARGET})' \ + --replace-fail 'target_link_libraries(''${GOAMDSMI_SHIM_TARGET} -L' '#' + + cp -rf --no-preserve=mode ${esmi_ib_src} ./esmi_ib_library + mkdir -p ./esmi_ib_library/include/asm + cp ${./amd_hsmp.h} ./esmi_ib_library/include/asm/amd_hsmp.h + ''; + + patches = [ ]; + + nativeBuildInputs = [ + cmake + pkg-config + wrapPython + autoPatchelfHook + ]; + + buildInputs = [ + libdrm + ]; + + cmakeFlags = [ + # Manually define CMAKE_INSTALL_ + # See: https://github.com/NixOS/nixpkgs/pull/197838 + "-DCMAKE_INSTALL_BINDIR=bin" + "-DCMAKE_INSTALL_LIBDIR=lib" + "-DCMAKE_INSTALL_INCLUDEDIR=include" + ]; + + postInstall = '' + wrapPythonProgramsIn $out + ''; + + passthru.updateScript = rocmUpdateScript { + name = finalAttrs.pname; + inherit (finalAttrs.src) owner; + inherit (finalAttrs.src) repo; + }; + + meta = with lib; { + description = "System management interface for AMD GPUs supported by ROCm"; + homepage = "https://github.com/ROCm/rocm_smi_lib"; + license = with licenses; [ mit ]; + maintainers = with maintainers; [ lovesegfault ] ++ teams.rocm.members; + platforms = [ "x86_64-linux" ]; + }; +}) diff --git a/pkgs/development/rocm-modules/6/aotriton/default.nix b/pkgs/development/rocm-modules/6/aotriton/default.nix new file mode 100644 index 0000000000000..9b50d435afdcc --- /dev/null +++ b/pkgs/development/rocm-modules/6/aotriton/default.nix @@ -0,0 +1,253 @@ +{ + lib, + stdenv, + fetchFromGitHub, + cmake, + rocm-cmake, + clr, + rocblas, + rocsolver, + gtest, + msgpack, + libxml2, + python3, + python3Packages, + openmp, + hipblas-common, + hipblas, + cudaPackages, + nlohmann_json, + triton-llvm, + rocmlir, + lapack-reference, + ninja, + ncurses, + libffi, + zlib, + zstd, + xz, + pkg-config, + clang, + writeShellScriptBin, + rocmUpdateScript, + buildTests ? false, + buildBenchmarks ? false, + buildSamples ? false, + gpuTargets ? [ + # aotriton GPU support list: + # https://github.com/ROCm/aotriton/blob/main/v2python/gpu_targets.py + "gfx90a" + "gfx942" + "gfx1100" + "gfx1101" + ], +}: + +stdenv.mkDerivation ( + finalAttrs: + let + py = python3.withPackages (ps: [ + ps.pyyaml + ps.distutils + ps.setuptools + ps.packaging + ps.numpy + ps.wheel + ps.filelock + ps.iniconfig + ps.pluggy + ps.pybind11 + ]); + gpuTargets' = lib.concatStringsSep ";" gpuTargets; + compiler = "amdclang++"; + cFlags = "-O3 -DNDEBUG"; + triton-llvm' = triton-llvm; + in + { + pname = "aotriton"; + version = "0.8.0b"; + + src = fetchFromGitHub { + owner = "ROCm"; + repo = "aotriton"; + rev = "0.8b"; + hash = "sha256-C5Qr0EgV+pU6Hnmxqy76Nmryqr7qNkoE6iDcg9z35Hk="; + fetchSubmodules = true; + }; + env.CXX = compiler; + env.ROCM_PATH = "${clr}"; + requiredSystemFeatures = [ "big-parallel" ]; + + outputs = + [ + "out" + ] + ++ lib.optionals buildTests [ + "test" + ] + ++ lib.optionals buildBenchmarks [ + "benchmark" + ] + ++ lib.optionals buildSamples [ + "sample" + ]; + + # ROCM doesn't include an empty cuda.h? + # It does in AMD's distribution + # :confused: + # https://github.com/pytorch/pytorch/issues/42430 + postPatch = '' + echo "" > third_party/triton/third_party/nvidia/include/cuda.h + ''; + + doCheck = false; + doInstallCheck = false; + + nativeBuildInputs = [ + cmake + rocm-cmake + pkg-config + py + clr + #git + #gfortran + ninja + (writeShellScriptBin "amdclang++" '' + exec clang++ "$@" + '') + ]; + + buildInputs = + [ + rocblas + rocsolver + hipblas-common + hipblas + openmp + libffi + ncurses + xz + nlohmann_json + rocmlir + #cudaRtIncludes + + # Tensile deps - not optional, building without tensile isn't actually supported + msgpack # FIXME: not included in cmake! + libxml2 + python3Packages.msgpack + zlib + zstd + #python3Packages.joblib + ] + ++ lib.optionals buildTests [ + gtest + ] + ++ lib.optionals (buildTests || buildBenchmarks) [ + lapack-reference + ]; + + env.TRITON_OFFLINE_BUILD = 1; + env.LLVM_SYSPATH = "${triton-llvm'}"; + env.JSON_SYSPATH = nlohmann_json; + env.MLIR_DIR = "${triton-llvm'}/lib/cmake/mlir"; + # build time dep for header only, only needs source. + # env.TRITON_CUDACRT_PATH = cudaRtIncludes; + # env.TRITON_CUDART_PATH = cudaRtIncludes; + env.CXXFLAGS = "-I/build/source/third_party/triton/third_party/nvidia/backend/include"; + # env.NOIMAGE_MODE = 1; + + # Fix up header issues in triton: https://github.com/triton-lang/triton/pull/3985/files + preConfigure = '' + mkdir third_party/triton/third_party/nvidia/backend/include/ + touch third_party/triton/third_party/nvidia/backend/include/cuda.h + #cp ''${cudaRtIncludes}/include/*.h third_party/triton/third_party/nvidia/backend/include/ + find third_party/triton -type f -exec sed -i 's|[<]cupti.h[>]|"cupti.h"|g' {} + + find third_party/triton -type f -exec sed -i 's|[<]cuda.h[>]|"cuda.h"|g' {} + + grep -ir cuda.h third_party/triton + find third_party/triton -name 'cuda.h' + # echo $TRITON_CUDACRT_PATH + # ls $TRITON_CUDACRT_PATH + # exit 1 + sed -i '2s;^;set(CMAKE_SUPPRESS_DEVELOPER_WARNINGS ON CACHE BOOL "ON")\n;' CMakeLists.txt + sed -i '2s;^;set(CMAKE_VERBOSE_MAKEFILE ON CACHE BOOL "ON")\n;' CMakeLists.txt + sed -i '2s;^;set(CMAKE_SUPPRESS_DEVELOPER_WARNINGS ON CACHE BOOL "ON")\n;' third_party/triton/CMakeLists.txt + sed -i '2s;^;set(CMAKE_VERBOSE_MAKEFILE ON CACHE BOOL "ON")\n;' third_party/triton/CMakeLists.txt + substituteInPlace third_party/triton/python/setup.py \ + --replace-fail "from distutils.command.clean import clean" "import setuptools;from distutils.command.clean import clean" \ + --replace-fail 'system == "Linux"' 'False' + # sed -i 's|^download_and_copy|dict|g' third_party/triton/python/setup.py + cmakeFlagsArray+=( + '-DCMAKE_C_FLAGS_RELEASE=${cFlags}' + '-DCMAKE_CXX_FLAGS_RELEASE=${cFlags}' + ) + prependToVar cmakeFlags "-GNinja" + mkdir -p /build/tmp-home + export HOME=/build/tmp-home + ''; + + # From README: + # Note: do not run ninja separately, due to the limit of the current build system, + # ninja install will run the whole build process unconditionally. + buildPhase = '' + echo "Skipping build phase due to aotriton bug" + ''; + + installPhase = '' + ninja -v install + ''; + + cmakeFlags = + [ + #"--debug" + #"--trace" + "-Wno-dev" + "-DAOTRITON_NOIMAGE_MODE=ON" # FIXME: Should be able to build with object code but generate_shim is failing + "-DCMAKE_BUILD_TYPE=Release" + "-DCMAKE_VERBOSE_MAKEFILE=ON" + # "-DCMAKE_CXX_COMPILER=hipcc" # MUST be set because tensile uses this + # "-DCMAKE_C_COMPILER=${lib.getBin clr}/bin/hipcc" + "-DVIRTUALENV_PYTHON_EXENAME=${lib.getExe py}" + "-DCMAKE_CXX_COMPILER=${compiler}" + # Manually define CMAKE_INSTALL_ + # See: https://github.com/NixOS/nixpkgs/pull/197838 + "-DCMAKE_INSTALL_BINDIR=bin" + "-DCMAKE_INSTALL_LIBDIR=lib" + "-DCMAKE_INSTALL_INCLUDEDIR=include" + "-DAMDGPU_TARGETS=${gpuTargets'}" + "-DGPU_TARGETS=${gpuTargets'}" + ] + ++ lib.optionals buildTests [ + "-DBUILD_CLIENTS_TESTS=ON" + ] + ++ lib.optionals buildBenchmarks [ + "-DBUILD_CLIENTS_BENCHMARKS=ON" + ] + ++ lib.optionals buildSamples [ + "-DBUILD_CLIENTS_SAMPLES=ON" + ]; + + postInstall = + lib.optionalString buildTests '' + mkdir -p $test/bin + mv $out/bin/hipblas-test $test/bin + '' + + lib.optionalString buildBenchmarks '' + mkdir -p $benchmark/bin + mv $out/bin/hipblas-bench $benchmark/bin + '' + + lib.optionalString buildSamples '' + mkdir -p $sample/bin + mv $out/bin/example-* $sample/bin + '' + + lib.optionalString (buildTests || buildBenchmarks || buildSamples) '' + rmdir $out/bin + ''; + meta = with lib; { + description = "ROCm BLAS marshalling library"; + homepage = "https://github.com/ROCm/hipBLAS"; + license = with licenses; [ mit ]; + maintainers = teams.rocm.members; + platforms = platforms.linux; + }; + } +) diff --git a/pkgs/development/rocm-modules/6/aotriton/triton-remove-distutils.patch b/pkgs/development/rocm-modules/6/aotriton/triton-remove-distutils.patch new file mode 100644 index 0000000000000..12ece274d4333 --- /dev/null +++ b/pkgs/development/rocm-modules/6/aotriton/triton-remove-distutils.patch @@ -0,0 +1,68 @@ +From https://github.com/triton-lang/triton/pull/1400/files +diff --git a/python/setup.py b/python/setup.py +index 1d5eb89c591d..9dfd5a62ad63 100644 +--- a/third_party/triton/python/setup.py ++++ b/third_party/triton/python/setup.py +@@ -1,14 +1,13 @@ +-import distutils + import os + import platform + import re + import shutil + import subprocess + import sys ++import sysconfig + import tarfile + import tempfile + import urllib.request +-from distutils.version import LooseVersion + from pathlib import Path + from typing import NamedTuple + +@@ -154,10 +153,10 @@ def run(self): + "CMake must be installed to build the following extensions: " + ", ".join(e.name for e in self.extensions) + ) + +- if platform.system() == "Windows": +- cmake_version = LooseVersion(re.search(r"version\s*([\d.]+)", out.decode()).group(1)) +- if cmake_version < "3.1.0": +- raise RuntimeError("CMake >= 3.1.0 is required on Windows") ++ match = re.search(r"version\s*(?P\d+)\.(?P\d+)([\d.]+)?", out.decode()) ++ cmake_major, cmake_minor = int(match.group("major")), int(match.group("minor")) ++ if (cmake_major, cmake_minor) < (3, 20): ++ raise RuntimeError("CMake >= 3.20.0 is required") + + for ext in self.extensions: + self.build_extension(ext) +@@ -176,7 +175,7 @@ def build_extension(self, ext): + if not os.path.exists(self.build_temp): + os.makedirs(self.build_temp) + # python directories +- python_include_dir = distutils.sysconfig.get_python_inc() ++ python_include_dir = sysconfig.get_path("platinclude") + cmake_args = [ + "-DLLVM_ENABLE_WERROR=ON", + "-DCMAKE_LIBRARY_OUTPUT_DIRECTORY=" + extdir, +@@ -200,9 +199,8 @@ def build_extension(self, ext): + cmake_args += ["-A", "x64"] + build_args += ["--", "/m"] + else: +- import multiprocessing + cmake_args += ["-DCMAKE_BUILD_TYPE=" + cfg] +- build_args += ['-j' + str(2 * multiprocessing.cpu_count())] ++ build_args += ['-j' + str(2 * os.cpu_count())] + + env = os.environ.copy() + subprocess.check_call(["cmake", self.base_dir] + cmake_args, cwd=self.build_temp, env=env) +@@ -245,6 +243,11 @@ def build_extension(self, ext): + "Topic :: Software Development :: Build Tools", + "License :: OSI Approved :: MIT License", + "Programming Language :: Python :: 3.6", ++ "Programming Language :: Python :: 3.7", ++ "Programming Language :: Python :: 3.8", ++ "Programming Language :: Python :: 3.9", ++ "Programming Language :: Python :: 3.10", ++ "Programming Language :: Python :: 3.11", + ], + test_suite="tests", + extras_require={ diff --git a/pkgs/development/rocm-modules/6/clang-ocl/default.nix b/pkgs/development/rocm-modules/6/clang-ocl/default.nix deleted file mode 100644 index 0ef8a04a744b2..0000000000000 --- a/pkgs/development/rocm-modules/6/clang-ocl/default.nix +++ /dev/null @@ -1,45 +0,0 @@ -{ - lib, - stdenv, - fetchFromGitHub, - rocmUpdateScript, - cmake, - rocm-cmake, - rocm-device-libs, -}: - -stdenv.mkDerivation (finalAttrs: { - pname = "clang-ocl"; - version = "6.0.2"; - - src = fetchFromGitHub { - owner = "ROCm"; - repo = "clang-ocl"; - rev = "rocm-${finalAttrs.version}"; - hash = "sha256-uMSvcVJj+me2E+7FsXZ4l4hTcK6uKEegXpkHGcuist0="; - }; - - nativeBuildInputs = [ - cmake - rocm-cmake - ]; - - buildInputs = [ rocm-device-libs ]; - - passthru.updateScript = rocmUpdateScript { - name = finalAttrs.pname; - owner = finalAttrs.src.owner; - repo = finalAttrs.src.repo; - }; - - meta = with lib; { - description = "OpenCL compilation with clang compiler"; - homepage = "https://github.com/ROCm/clang-ocl"; - license = with licenses; [ mit ]; - maintainers = teams.rocm.members; - platforms = platforms.linux; - broken = - versions.minor finalAttrs.version != versions.minor stdenv.cc.version - || versionAtLeast finalAttrs.version "7.0.0"; - }; -}) diff --git a/pkgs/development/rocm-modules/6/clr/0001-handle-v1-of-compressed-fatbins.patch b/pkgs/development/rocm-modules/6/clr/0001-handle-v1-of-compressed-fatbins.patch new file mode 100644 index 0000000000000..a48579d1d5381 --- /dev/null +++ b/pkgs/development/rocm-modules/6/clr/0001-handle-v1-of-compressed-fatbins.patch @@ -0,0 +1,176 @@ +From 26a65d37e388c25898a13b60a42ab606d63fda2e Mon Sep 17 00:00:00 2001 +From: Tom Rix +Date: Fri, 25 Oct 2024 08:38:53 -0700 +Subject: [PATCH] handle v1 of compressed fatbins + +The size of the compressed modules is needed to uncompress them but +that information is only stored in the header in v2. + +Because the uncompressed size is known, the compressed size can be +greater than what it actually is. + +So if v1 is detected use the maximum possible size. + +Signed-off-by: Tom Rix +--- + hipamd/src/hip_code_object.cpp | 9 ++++++--- + hipamd/src/hip_code_object.hpp | 2 +- + hipamd/src/hip_fatbin.cpp | 8 +++++--- + hipamd/src/hip_fatbin.hpp | 2 +- + rocclr/os/os.hpp | 2 +- + rocclr/os/os_posix.cpp | 3 ++- + rocclr/os/os_win32.cpp | 2 +- + 7 files changed, 17 insertions(+), 11 deletions(-) + +diff --git a/hipamd/src/hip_code_object.cpp b/hipamd/src/hip_code_object.cpp +index 73b9e2603c98..15098612ebf7 100644 +--- a/hipamd/src/hip_code_object.cpp ++++ b/hipamd/src/hip_code_object.cpp +@@ -600,10 +600,13 @@ hipError_t CodeObject::extractCodeObjectFromFatBinary( + } + + // ================================================================================================ +-size_t CodeObject::getFatbinSize(const void* data, const bool isCompressed) { ++size_t CodeObject::getFatbinSize(const void* data, const bool isCompressed, size_t maximum_possible_size) { + if (isCompressed) { + const auto obheader = reinterpret_cast(data); +- return obheader->totalSize; ++ if (obheader->versionNumber > 1) ++ return obheader->totalSize; ++ else ++ return maximum_possible_size; + } else { + const auto obheader = reinterpret_cast(data); + const __ClangOffloadBundleInfo* desc = &obheader->desc[0]; +@@ -632,7 +635,7 @@ hipError_t CodeObject::extractCodeObjectFromFatBinaryUsingComgr( + return hipErrorInvalidKernelFile; + } + +- if (size == 0) size = getFatbinSize(data, isCompressed); ++ size = getFatbinSize(data, isCompressed, size); + + amd_comgr_data_t dataCodeObj{0}; + amd_comgr_data_set_t dataSetBundled{0}; +diff --git a/hipamd/src/hip_code_object.hpp b/hipamd/src/hip_code_object.hpp +index f0407f7bd48f..1dbcc2ab44e1 100644 +--- a/hipamd/src/hip_code_object.hpp ++++ b/hipamd/src/hip_code_object.hpp +@@ -66,7 +66,7 @@ class CodeObject { + static bool IsClangOffloadMagicBundle(const void* data, bool& isCompressed); + + // Return size of fat bin +- static size_t getFatbinSize(const void* data, const bool isCompressed = false); ++ static size_t getFatbinSize(const void* data, const bool isCompressed = false, size_t maximum_possible_size = 0); + + /** + * @brief Extract code object from fatbin using comgr unbundling action +diff --git a/hipamd/src/hip_fatbin.cpp b/hipamd/src/hip_fatbin.cpp +index 8b52e9f32fc6..1d39fa18b636 100644 +--- a/hipamd/src/hip_fatbin.cpp ++++ b/hipamd/src/hip_fatbin.cpp +@@ -126,6 +126,7 @@ hipError_t FatBinaryInfo::ExtractFatBinaryUsingCOMGR(const std::vector= 2 && minor >= 8) { +- hip_status = ExtractFatBinaryUsingCOMGR(image_, devices); ++ hip_status = ExtractFatBinaryUsingCOMGR(image_, maximum_possible_size, devices); + break; + } else if (isCompressed) { + LogPrintfError( +@@ -467,6 +468,7 @@ hipError_t FatBinaryInfo::BuildProgram(const int device_id) { + + // ================================================================================================ + hipError_t FatBinaryInfo::ExtractFatBinaryUsingCOMGR(const void *data, ++ size_t maximum_possible_size, + const std::vector& devices) { + hipError_t hip_status = hipSuccess; + // At this line, image should be a valid ptr. +@@ -481,7 +483,7 @@ hipError_t FatBinaryInfo::ExtractFatBinaryUsingCOMGR(const void *data, + device_names.push_back(devices[dev_idx]->devices()[0]->isa().isaName()); + } + +- hip_status = CodeObject::extractCodeObjectFromFatBinaryUsingComgr(data, 0, ++ hip_status = CodeObject::extractCodeObjectFromFatBinaryUsingComgr(data, maximum_possible_size, + device_names, code_objs); + if (hip_status == hipErrorNoBinaryForGpu || hip_status == hipSuccess) { + for (size_t dev_idx = 0; dev_idx < devices.size(); ++dev_idx) { +diff --git a/hipamd/src/hip_fatbin.hpp b/hipamd/src/hip_fatbin.hpp +index 5c4ea29761af..afd3cb2a2061 100644 +--- a/hipamd/src/hip_fatbin.hpp ++++ b/hipamd/src/hip_fatbin.hpp +@@ -79,7 +79,7 @@ public: + * + * @see CodeObject::extractCodeObjectFromFatBinaryUsingComgr() + */ +- hipError_t ExtractFatBinaryUsingCOMGR(const void* data, ++ hipError_t ExtractFatBinaryUsingCOMGR(const void* data, size_t maximum_possible_size, + const std::vector& devices); + hipError_t ExtractFatBinary(const std::vector& devices); + hipError_t AddDevProgram(const int device_id); +diff --git a/rocclr/os/os.hpp b/rocclr/os/os.hpp +index c9bd0b99e5cd..752f62f2f8c9 100644 +--- a/rocclr/os/os.hpp ++++ b/rocclr/os/os.hpp +@@ -117,7 +117,7 @@ class Os : AllStatic { + + // Returns the file name & file offset of mapped memory if the file is mapped. + static bool FindFileNameFromAddress(const void* image, std::string* fname_ptr, +- size_t* foffset_ptr); ++ size_t* foffset_ptr, size_t *max_possible_size); + + // Given a valid file descriptor returns mmaped memory for size and offset + static bool MemoryMapFileDesc(FileDesc fdesc, size_t fsize, size_t foffset, +diff --git a/rocclr/os/os_posix.cpp b/rocclr/os/os_posix.cpp +index 739795e0cb19..659bbb54a253 100644 +--- a/rocclr/os/os_posix.cpp ++++ b/rocclr/os/os_posix.cpp +@@ -789,7 +789,7 @@ bool Os::GetFileHandle(const char* fname, FileDesc* fd_ptr, size_t* sz_ptr) { + } + + bool amd::Os::FindFileNameFromAddress(const void* image, std::string* fname_ptr, +- size_t* foffset_ptr) { ++ size_t* foffset_ptr, size_t *maximum_possible_size) { + + // Get the list of mapped file list + bool ret_value = false; +@@ -831,6 +831,7 @@ bool amd::Os::FindFileNameFromAddress(const void* image, std::string* fname_ptr, + + *fname_ptr = uri_file_path; + *foffset_ptr = offset + address - low_address; ++ *maximum_possible_size = high_address - address + 1; + ret_value = true; + break; + } +diff --git a/rocclr/os/os_win32.cpp b/rocclr/os/os_win32.cpp +index 3923ec37dfc5..6fca0d9f4ccc 100644 +--- a/rocclr/os/os_win32.cpp ++++ b/rocclr/os/os_win32.cpp +@@ -936,7 +936,7 @@ bool Os::MemoryMapFileTruncated(const char* fname, const void** mmap_ptr, size_t + return true; + } + +-bool Os::FindFileNameFromAddress(const void* image, std::string* fname_ptr, size_t* foffset_ptr) { ++bool Os::FindFileNameFromAddress(const void* image, std::string* fname_ptr, size_t* foffset_ptr, size_t *maximum_possible_size) { + // TODO: Implementation on windows side pending. + return false; + } +-- +2.47.0 + diff --git a/pkgs/development/rocm-modules/6/clr/cmake-find-x11-libgl.patch b/pkgs/development/rocm-modules/6/clr/cmake-find-x11-libgl.patch new file mode 100644 index 0000000000000..5a36a2c2c74d0 --- /dev/null +++ b/pkgs/development/rocm-modules/6/clr/cmake-find-x11-libgl.patch @@ -0,0 +1,40 @@ +diff --git a/rocclr/cmake/ROCclr.cmake b/rocclr/cmake/ROCclr.cmake +index 3f233b72f..67bdc62ee 100644 +--- a/rocclr/cmake/ROCclr.cmake ++++ b/rocclr/cmake/ROCclr.cmake +@@ -44,6 +44,19 @@ find_package(Threads REQUIRED) + + find_package(AMD_OPENCL) + ++# Find X11 package ++find_package(X11 REQUIRED) ++if(NOT X11_FOUND) ++ message(FATAL_ERROR "X11 libraries not found") ++endif() ++ ++# Find OpenGL package ++find_package(OpenGL REQUIRED) ++if(NOT OpenGL_FOUND) ++ message(FATAL_ERROR "OpenGL not found") ++endif() ++ ++ + add_library(rocclr STATIC) + + include(ROCclrCompilerOptions) +@@ -123,9 +136,14 @@ target_include_directories(rocclr PUBLIC + ${ROCCLR_SRC_DIR}/device + ${ROCCLR_SRC_DIR}/elf + ${ROCCLR_SRC_DIR}/include ++ ${X11_INCLUDE_DIR} ++ ${OPENGL_INCLUDE_DIR} + ${AMD_OPENCL_INCLUDE_DIRS}) + +-target_link_libraries(rocclr PUBLIC Threads::Threads) ++target_link_libraries(rocclr PUBLIC ++ Threads::Threads ++ ${X11_LIBRARIES} ++ ${OPENGL_LIBRARIES}) + # IPC on Windows is not supported + if(UNIX) + target_link_libraries(rocclr PUBLIC rt) diff --git a/pkgs/development/rocm-modules/6/clr/default.nix b/pkgs/development/rocm-modules/6/clr/default.nix index a3dc49695c50d..a939ad41c6274 100644 --- a/pkgs/development/rocm-modules/6/clr/default.nix +++ b/pkgs/development/rocm-modules/6/clr/default.nix @@ -3,53 +3,57 @@ stdenv, callPackage, fetchFromGitHub, - fetchpatch, - fetchurl, rocmUpdateScript, makeWrapper, cmake, perl, - clang, hip-common, hipcc, rocm-device-libs, rocm-comgr, rocm-runtime, + rocm-core, roctracer, rocminfo, rocm-smi, numactl, + libffi, + zstd, + zlib, libGL, libxml2, libX11, python3Packages, + rocm-merged-llvm, + khronos-ocl-icd-loader, + gcc-unwrapped, + writeShellScriptBin, + localGpuTargets ? null, }: let + hipClangPath = rocm-merged-llvm; wrapperArgs = [ "--prefix PATH : $out/bin" "--prefix LD_LIBRARY_PATH : ${rocm-runtime}" "--set HIP_PLATFORM amd" "--set HIP_PATH $out" - "--set HIP_CLANG_PATH ${clang}/bin" + "--set HIP_CLANG_PATH ${hipClangPath}/bin" "--set DEVICE_LIB_PATH ${rocm-device-libs}/amdgcn/bitcode" "--set HSA_PATH ${rocm-runtime}" "--set ROCM_PATH $out" ]; - - # https://github.com/NixOS/nixpkgs/issues/305641 - # Not needed when 3.29.2 is in unstable - cmake' = cmake.overrideAttrs (old: rec { - version = "3.29.2"; - src = fetchurl { - url = "https://cmake.org/files/v${lib.versions.majorMinor version}/cmake-${version}.tar.gz"; - hash = "sha256-NttLaSaqt0G6bksuotmckZMiITIwi03IJNQSPLcwNS4="; - }; - }); + ROCM_LIBPATCH_VERSION = rocm-core.ROCM_LIBPATCH_VERSION; + amdclang = writeShellScriptBin "amdclang" '' + exec clang "$@" + ''; + amdclangxx = writeShellScriptBin "amdclang++" '' + exec clang++ "$@" + ''; in stdenv.mkDerivation (finalAttrs: { pname = "clr"; - version = "6.0.2"; + version = "6.3.1"; outputs = [ "out" @@ -60,15 +64,17 @@ stdenv.mkDerivation (finalAttrs: { owner = "ROCm"; repo = "clr"; rev = "rocm-${finalAttrs.version}"; - hash = "sha256-ZMpA7vCW2CcpGdBLZfPimMHcgjhN1PHuewJiYwZMgGY="; + hash = "sha256-wo3kwk6HQJsP+ycaVh2mmMjEgGlj/Z6KXNXOXbJ1KLs="; }; nativeBuildInputs = [ makeWrapper - cmake' + cmake perl python3Packages.python python3Packages.cppheaderparser + amdclang + amdclangxx ]; buildInputs = [ @@ -76,9 +82,15 @@ stdenv.mkDerivation (finalAttrs: { libGL libxml2 libX11 + khronos-ocl-icd-loader + hipClangPath + libffi + zstd + zlib ]; propagatedBuildInputs = [ + rocm-core rocm-device-libs rocm-comgr rocm-runtime @@ -86,6 +98,7 @@ stdenv.mkDerivation (finalAttrs: { ]; cmakeFlags = [ + "-DCMAKE_BUILD_TYPE=Release" "-DCMAKE_POLICY_DEFAULT_CMP0072=NEW" # Prefer newer OpenGL libraries "-DCLR_BUILD_HIP=ON" "-DCLR_BUILD_OCL=ON" @@ -94,6 +107,9 @@ stdenv.mkDerivation (finalAttrs: { "-DHIP_PLATFORM=amd" "-DPROF_API_HEADER_PATH=${roctracer.src}/inc/ext" "-DROCM_PATH=${rocminfo}" + "-DBUILD_ICD=ON" + "-DHIP_ENABLE_ROCPROFILER_REGISTER=OFF" # circular dep - may need -minimal and -full builds? + "-DAMD_ICD_LIBRARY_DIR=${khronos-ocl-icd-loader}" # Temporarily set variables to work around upstream CMakeLists issue # Can be removed once https://github.com/ROCm/rocm-cmake/issues/121 is fixed @@ -102,27 +118,34 @@ stdenv.mkDerivation (finalAttrs: { "-DCMAKE_INSTALL_LIBDIR=lib" ]; + env.LLVM_DIR = ""; + + # TODO: rebase patches patches = [ - (fetchpatch { - name = "add-missing-operators.patch"; - url = "https://github.com/ROCm/clr/commit/86bd518981b364c138f9901b28a529899d8654f3.patch"; - hash = "sha256-lbswri+zKLxif0hPp4aeJDeVfadhWZz4z+m+G2XcCPI="; - }) - (fetchpatch { - name = "static-functions.patch"; - url = "https://github.com/ROCm/clr/commit/77c581a3ebd47b5e2908973b70adea66891159ee.patch"; - hash = "sha256-auBedbd7rghlKav7A9V6l64J7VmtE9GizIdi5gWj+fs="; - }) - (fetchpatch { - name = "extend-hip-isa-compatibility-check.patch"; - url = "https://salsa.debian.org/rocm-team/rocm-hipamd/-/raw/d6d20142c37e1dff820950b16ff8f0523241d935/debian/patches/0026-extend-hip-isa-compatibility-check.patch"; - hash = "sha256-eG0ALZZQLRzD7zJueJFhi2emontmYy6xx8Rsm346nQI="; - }) - (fetchpatch { - name = "improve-rocclr-isa-compatibility-check.patch"; - url = "https://salsa.debian.org/rocm-team/rocm-hipamd/-/raw/d6d20142c37e1dff820950b16ff8f0523241d935/debian/patches/0025-improve-rocclr-isa-compatibility-check.patch"; - hash = "sha256-8eowuRiOAdd9ucKv4Eg9FPU7c6367H3eP3fRAGfXc6Y="; - }) + ./cmake-find-x11-libgl.patch + ./0001-handle-v1-of-compressed-fatbins.patch # https://github.com/ROCm/clr/issues/99 + # ./fix-null-stream-sync-perf.patch # https://github.com/ROCm/clr/issues/78 + # (fetchpatch { + # name = "add-missing-operators.patch"; + # url = "https://github.com/ROCm/clr/commit/86bd518981b364c138f9901b28a529899d8654f3.patch"; + # hash = "sha256-lbswri+zKLxif0hPp4aeJDeVfadhWZz4z+m+G2XcCPI="; + # }) + # (fetchpatch { + # name = "static-functions.patch"; + # url = "https://github.com/ROCm/clr/commit/77c581a3ebd47b5e2908973b70adea66891159ee.patch"; + # hash = "sha256-auBedbd7rghlKav7A9V6l64J7VmtE9GizIdi5gWj+fs="; + # }) + + # (fetchpatch { + # name = "extend-hip-isa-compatibility-check.patch"; + # url = "https://salsa.debian.org/rocm-team/rocm-hipamd/-/raw/d6d20142c37e1dff820950b16ff8f0523241d935/debian/patches/0026-extend-hip-isa-compatibility-check.patch"; + # hash = "sha256-eG0ALZZQLRzD7zJueJFhi2emontmYy6xx8Rsm346nQI="; + # }) + # (fetchpatch { + # name = "improve-rocclr-isa-compatibility-check.patch"; + # url = "https://salsa.debian.org/rocm-team/rocm-hipamd/-/raw/d6d20142c37e1dff820950b16ff8f0523241d935/debian/patches/0025-improve-rocclr-isa-compatibility-check.patch"; + # hash = "sha256-8eowuRiOAdd9ucKv4Eg9FPU7c6367H3eP3fRAGfXc6Y="; + # }) ]; postPatch = '' @@ -135,79 +158,134 @@ stdenv.mkDerivation (finalAttrs: { --replace "install(PROGRAMS \''${HIPCC_BIN_DIR}/hipconfig.bat DESTINATION bin)" "" substituteInPlace hipamd/src/hip_embed_pch.sh \ - --replace "\''$LLVM_DIR/bin/clang" "${clang}/bin/clang" - - # https://lists.debian.org/debian-ai/2024/02/msg00178.html - substituteInPlace rocclr/utils/flags.hpp \ - --replace-fail "HIP_USE_RUNTIME_UNBUNDLER, false" "HIP_USE_RUNTIME_UNBUNDLER, true" + --replace "\''$LLVM_DIR/bin/clang" "${hipClangPath}/bin/clang" substituteInPlace opencl/khronos/icd/loader/icd_platform.h \ --replace-fail '#define ICD_VENDOR_PATH "/etc/OpenCL/vendors/";' \ '#define ICD_VENDOR_PATH "/run/opengl-driver/etc/OpenCL/vendors/";' + + # new unbundler has better error messages, defaulting it on + substituteInPlace rocclr/utils/flags.hpp \ + --replace-fail "HIP_ALWAYS_USE_NEW_COMGR_UNBUNDLING_ACTION, false" "HIP_ALWAYS_USE_NEW_COMGR_UNBUNDLING_ACTION, true" ''; postInstall = '' + chmod +x $out/bin/* patchShebangs $out/bin - # hipcc.bin and hipconfig.bin is mysteriously never installed - cp -a ${hipcc}/bin/{hipcc.bin,hipconfig.bin} $out/bin + cp ${amdclang}/bin/* $out/bin/ + cp ${amdclangxx}/bin/* $out/bin/ - wrapProgram $out/bin/hipcc.bin ${lib.concatStringsSep " " wrapperArgs} - wrapProgram $out/bin/hipconfig.bin ${lib.concatStringsSep " " wrapperArgs} + wrapProgram $out/bin/hipcc ${lib.concatStringsSep " " wrapperArgs} + wrapProgram $out/bin/hipconfig ${lib.concatStringsSep " " wrapperArgs} wrapProgram $out/bin/hipcc.pl ${lib.concatStringsSep " " wrapperArgs} wrapProgram $out/bin/hipconfig.pl ${lib.concatStringsSep " " wrapperArgs} + mkdir -p $out/nix-support/ + echo ' + CORE_LIM="''${NIX_BUILD_CORES:-1}" + if ((CORE_LIM <= 0)); then + guess=$(nproc 2>/dev/null || true) + ((CORE_LIM = guess <= 1 ? 1 : guess)) + ((CORE_LIM = CORE_LIM >= 3 ? 3 : CORE_LIM)) + fi + CORE_LIM=$(( ''${NIX_LOAD_LIMIT:-''${CORE_LIM:-$(nproc)}} / 2 )) + # Set HIPCC_JOBS with min and max constraints + export HIPCC_JOBS=$CORE_LIM + export HIPCC_JOBS_LINK=$CORE_LIM + export CFLAGS="''${CFLAGS:-} -parallel-jobs=$CORE_LIM" + export CXXFLAGS="''${CXXFLAGS:-} -parallel-jobs=$CORE_LIM" + #export HIPCC_COMPILE_FLAGS_APPEND="-O3 -Wno-format-nonliteral -parallel-jobs=$HIPCC_JOBS" + export HIP_PATH="${placeholder "out"}" + export HIP_PLATFORM=amd + export HIP_DEVICE_LIB_PATH="${rocm-device-libs}/amdgcn/bitcode" + export NIX_CC_USE_RESPONSE_FILE=0 + export HIP_CLANG_PATH="${hipClangPath}/bin" + export ROCM_LIBPATCH_VERSION="${ROCM_LIBPATCH_VERSION}" + export HSA_PATH="${rocm-runtime}"' > $out/nix-support/setup-hook + # Just link rocminfo, it's easier ln -s ${rocminfo}/bin/* $out/bin + ln -s ${rocm-core}/include/* $out/include/ # Replace rocm-opencl-icd functionality mkdir -p $icd/etc/OpenCL/vendors echo "$out/lib/libamdocl64.so" > $icd/etc/OpenCL/vendors/amdocl64.icd # add version info to output (downstream rocmPackages look for this) - mkdir $out/.info - echo "${finalAttrs.version}" > $out/.info/version + ln -s ${rocm-core}/.info/ $out/.info + + ln -s ${hipClangPath} $out/llvm ''; - passthru = { - # All known and valid general GPU targets - # We cannot use this for each ROCm library, as each defines their own supported targets - # See: https://github.com/ROCm/ROCm/blob/77cbac4abab13046ee93d8b5bf410684caf91145/README.md#library-target-matrix - gpuTargets = lib.forEach [ - "803" - "900" - "906" - "908" - "90a" - "940" - "941" - "942" - "1010" - "1012" - "1030" - "1100" - "1101" - "1102" - ] (target: "gfx${target}"); - - updateScript = rocmUpdateScript { - name = finalAttrs.pname; - owner = finalAttrs.src.owner; - repo = finalAttrs.src.repo; - page = "tags?per_page=1"; - filter = ".[0].name | split(\"-\") | .[1]"; - }; + disallowedRequisites = [ + gcc-unwrapped + ]; + # postFixup = '' + # objdump --syms $out/lib/libamdhip64.so.6 + # readelf --debug-dump=line $out/lib/libamdhip64.so.6 + # exit 1 + # ''; - impureTests = { - rocm-smi = callPackage ./test-rocm-smi.nix { - inherit rocm-smi; - clr = finalAttrs.finalPackage; + passthru = + { + # All known and valid general GPU targets + # We cannot use this for each ROCm library, as each defines their own supported targets + # See: https://github.com/ROCm/ROCm/blob/77cbac4abab13046ee93d8b5bf410684caf91145/README.md#library-target-matrix + # Generic targets are not yet available in rocm-6.3.1 llvm + gpuTargets = lib.forEach [ + # "9-generic" + "900" # MI25, Vega 56/64 + "906" # MI50/60, Radeon VII + "908" # MI100 + "90a" # MI210 / MI250 + # "9-4-generic" + # 940/1 - never released publicly, maybe HPE cray specific MI3xx? + "942" # MI300 + # "10-1-generic" + "1010" + "1012" + # "10-3-generic" + "1030" # W6800, various Radeon cards + # "11-generic" + "1100" + "1101" + "1102" + ] (target: "gfx${target}"); + + updateScript = rocmUpdateScript { + name = finalAttrs.pname; + inherit (finalAttrs.src) owner; + inherit (finalAttrs.src) repo; + page = "tags?per_page=4"; }; - opencl-example = callPackage ./test-opencl-example.nix { - clr = finalAttrs.finalPackage; + + impureTests = { + rocm-smi = callPackage ./test-rocm-smi.nix { + inherit rocm-smi; + clr = finalAttrs.finalPackage; + }; + opencl-example = callPackage ./test-opencl-example.nix { + clr = finalAttrs.finalPackage; + }; }; + + selectGpuTargets = + { + supported ? [ ], + }: + supported; + gpuArchSuffix = ""; + } + // lib.optionalAttrs (localGpuTargets != null) { + inherit localGpuTargets; + gpuArchSuffix = "-" + (builtins.concatStringsSep "-" localGpuTargets); + selectGpuTargets = + { + supported ? [ ], + }: + if supported == [ ] then localGpuTargets else lib.lists.intersectLists localGpuTargets supported; }; - }; meta = with lib; { description = "AMD Common Language Runtime for hipamd, opencl, and rocclr"; @@ -215,8 +293,5 @@ stdenv.mkDerivation (finalAttrs: { license = with licenses; [ mit ]; maintainers = with maintainers; [ lovesegfault ] ++ teams.rocm.members; platforms = platforms.linux; - broken = - versions.minor finalAttrs.version != versions.minor stdenv.cc.version - || versionAtLeast finalAttrs.version "7.0.0"; }; }) diff --git a/pkgs/development/rocm-modules/6/clr/fix-null-stream-sync-perf.patch b/pkgs/development/rocm-modules/6/clr/fix-null-stream-sync-perf.patch new file mode 100644 index 0000000000000..49c789d172788 --- /dev/null +++ b/pkgs/development/rocm-modules/6/clr/fix-null-stream-sync-perf.patch @@ -0,0 +1,101 @@ +From 17e7b7c2ef6023be77b22ae83162e78de0a5a936 Mon Sep 17 00:00:00 2001 +From: Anusha GodavarthySurya +Date: Fri, 11 Oct 2024 17:10:12 +0000 +Subject: [PATCH] SWDEV-472840 SWDEV-461980 - Fix null stream sync performance + +=> If null stream is not created during sync skip nullstrm creation +=> Do cpu wait on blocking & null stream if it exists + +Change-Id: I90d6ced6a2dd1782ba58f3fed4e3608fc0efa55a +--- + hipamd/src/hip_device.cpp | 23 +++++++++++++++++++---- + hipamd/src/hip_internal.hpp | 2 +- + hipamd/src/hip_stream.cpp | 22 ++++++++++++++++------ + 3 files changed, 36 insertions(+), 11 deletions(-) + +diff --git a/hipamd/src/hip_device.cpp b/hipamd/src/hip_device.cpp +index 9f6a8e3d0..20889b0fe 100644 +--- a/hipamd/src/hip_device.cpp ++++ b/hipamd/src/hip_device.cpp +@@ -257,15 +257,30 @@ void Device::destroyAllStreams() { + } + + // ================================================================================================ +-void Device::SyncAllStreams( bool cpu_wait) { ++void Device::SyncAllStreams(bool cpu_wait, bool wait_blocking_streams_only) { + // Make a local copy to avoid stalls for GPU finish with multiple threads + std::vector streams; + streams.reserve(streamSet.size()); + { + amd::ScopedLock lock(streamSetLock); +- for (auto it : streamSet) { +- streams.push_back(it); +- it->retain(); ++ if (wait_blocking_streams_only) { ++ auto null_stream = GetNullStream(); ++ for (auto it : streamSet) { ++ if (it != null_stream && (it->Flags() & hipStreamNonBlocking) == 0) { ++ streams.push_back(it); ++ it->retain(); ++ } ++ } ++ // Add null stream to the end of the list so that wait happens after all blocking streams. ++ if (null_stream != nullptr) { ++ streams.push_back(null_stream); ++ null_stream->retain(); ++ } ++ } else { ++ for (auto it : streamSet) { ++ streams.push_back(it); ++ it->retain(); ++ } + } + } + for (auto it : streams) { +diff --git a/hipamd/src/hip_internal.hpp b/hipamd/src/hip_internal.hpp +index d0a6dca57..47749c012 100644 +--- a/hipamd/src/hip_internal.hpp ++++ b/hipamd/src/hip_internal.hpp +@@ -595,7 +595,7 @@ class stream_per_thread { + + void destroyAllStreams(); + +- void SyncAllStreams( bool cpu_wait = true); ++ void SyncAllStreams( bool cpu_wait = true, bool wait_blocking_streams_only = false); + + bool StreamCaptureBlocking(); + +diff --git a/hipamd/src/hip_stream.cpp b/hipamd/src/hip_stream.cpp +index 937374977..76a732acd 100644 +--- a/hipamd/src/hip_stream.cpp ++++ b/hipamd/src/hip_stream.cpp +@@ -357,13 +357,23 @@ hipError_t hipStreamSynchronize_common(hipStream_t stream) { + HIP_RETURN(hipErrorStreamCaptureUnsupported); + } + } +- bool wait = (stream == nullptr || stream == hipStreamLegacy) ? true : false; +- auto hip_stream = hip::getStream(stream, wait); + +- // Wait for the current host queue +- hip_stream->finish(); +- // Release freed memory for all memory pools on the device +- hip_stream->GetDevice()->ReleaseFreedMemory(); ++ if (stream == nullptr) { ++ // Do cpu wait on null stream and only on blocking streams ++ constexpr bool WaitblockingStreamOnly = true; ++ getCurrentDevice()->SyncAllStreams(true, WaitblockingStreamOnly); ++ ++ // Release freed memory for all memory pools on the device ++ getCurrentDevice()->ReleaseFreedMemory(); ++ } else { ++ constexpr bool wait = false; ++ auto hip_stream = hip::getStream(stream, wait); ++ ++ // Wait for the current host queue ++ hip_stream->finish(); ++ // Release freed memory for all memory pools on the device ++ hip_stream->GetDevice()->ReleaseFreedMemory(); ++ } + return hipSuccess; + } + diff --git a/pkgs/development/rocm-modules/6/composable_kernel/ck4inductor.nix b/pkgs/development/rocm-modules/6/composable_kernel/ck4inductor.nix new file mode 100644 index 0000000000000..cdc27a2808568 --- /dev/null +++ b/pkgs/development/rocm-modules/6/composable_kernel/ck4inductor.nix @@ -0,0 +1,43 @@ +{ + buildPythonPackage, + python, + composable_kernel_build, + lib, + setuptools, + setuptools-scm, + rocm-merged-llvm, +}: +buildPythonPackage { + pyproject = true; + pname = "ck4inductor"; + build-system = [ + setuptools + setuptools-scm + ]; + version = "6.4.0"; + inherit (composable_kernel_build) src; + pythonImportsCheck = [ + "ck4inductor" + "ck4inductor.universal_gemm.gen_instances" + "ck4inductor.universal_gemm.gen_instances" + "ck4inductor.universal_gemm.op" + ]; + propagatedBuildInputs = [ + # At runtime will fail to compile anything with ck4inductor without this + # can't easily use in checks phase because most of the compiler machinery is in torch + rocm-merged-llvm + ]; + checkPhase = '' + if [ ! -d "$out/${python.sitePackages}/ck4inductor" ]; then + echo "ck4inductor isn't at the expected location in $out/${python.sitePackages}/ck4inductor" + exit 1 + fi + ''; + meta = with lib; { + description = "pytorch inductor backend which uses composable_kernel universal GEMM implementations"; + homepage = "https://github.com/ROCm/composable_kernel"; + license = with licenses; [ mit ]; + maintainers = teams.rocm.members; + platforms = platforms.linux; + }; +} diff --git a/pkgs/development/rocm-modules/6/composable_kernel/default.nix b/pkgs/development/rocm-modules/6/composable_kernel/default.nix index 16f3f05f75098..6ae723952e704 100644 --- a/pkgs/development/rocm-modules/6/composable_kernel/default.nix +++ b/pkgs/development/rocm-modules/6/composable_kernel/default.nix @@ -5,20 +5,38 @@ rocmUpdateScript, cmake, rocm-cmake, + rocm-merged-llvm, clr, - openmp, - clang-tools-extra, + rocm-device-libs, + rocminfo, + hipify, git, gtest, zstd, + ninja, buildTests ? false, buildExamples ? false, - gpuTargets ? [ ], # gpuTargets = [ "gfx803" "gfx900" "gfx1030" ... ] + # FIXME: This arch list needs to grow, had build issues and will need to test + # but testing is very slow + gpuTargets ? ( + clr.localGpuTargets or [ + "gfx900" + "gfx906" + "gfx908" + "gfx90a" + "gfx942" + "gfx1030" + "gfx1100" + "gfx1101" + "gfx1102" + ] + ), }: stdenv.mkDerivation (finalAttrs: { - pname = "composable_kernel"; - version = "6.0.2"; + pname = "composable_kernel${clr.gpuArchSuffix}"; + # This version must be PEP 440 compatible because it's the version of the ck4inductor python package too + version = "6.4.0-unstable-20241220"; outputs = [ @@ -31,32 +49,68 @@ stdenv.mkDerivation (finalAttrs: { "example" ]; + patches = [ + # for Gentoo this gives a significant speedup in build times + # not observing speedup. possibly because our LLVM has been patched to fix amdgpu-early-inline-all issues? + # ./disable-amdgpu-inline.patch + ]; + src = fetchFromGitHub { owner = "ROCm"; repo = "composable_kernel"; - rev = "rocm-${finalAttrs.version}"; - hash = "sha256-NCqMganmNyQfz3X+KQOrfrimnrgd3HbAGK5DeC4+J+o="; + rev = "07339c738396ebeae57374771ded4dcf11bddf1e"; + hash = "sha256-EvEBxlOpQ71BF57VW79WBo/cdxAwTKFXFMiYKyGyyEs="; }; nativeBuildInputs = [ git cmake + rocminfo + clr + hipify + ninja + zstd + ]; + + buildInputs = [ rocm-cmake clr - clang-tools-extra zstd ]; - buildInputs = [ openmp ]; + strictDeps = true; + enableParallelBuilding = true; + requiredSystemFeatures = [ "big-parallel" ]; + env.ROCM_PATH = clr; + env.HIP_CLANG_PATH = "${rocm-merged-llvm}/bin"; cmakeFlags = [ - "-DCMAKE_C_COMPILER=hipcc" - "-DCMAKE_CXX_COMPILER=hipcc" + "-DCMAKE_MODULE_PATH=${clr}/hip/cmake" + "-DCMAKE_BUILD_TYPE=Release" + "-DCMAKE_POLICY_DEFAULT_CMP0069=NEW" + # Manually define CMAKE_INSTALL_ + # See: https://github.com/NixOS/nixpkgs/pull/197838 + # "-DDL_KERNELS=ON" + # Not turned on because don't think deps require it, slightly speeds up build + # "-DCK_USE_CODEGEN=ON" + "-DCMAKE_INSTALL_BINDIR=bin" + "-DCMAKE_INSTALL_LIBDIR=lib" + "-DCMAKE_INSTALL_INCLUDEDIR=include" + "-DBUILD_DEV=OFF" + "-DROCM_PATH=${clr}" + "-DCMAKE_HIP_COMPILER_ROCM_ROOT=${clr}" + + # FP8 can build for 908/90a but very slow build + # and produces unusably slow kernels that are huge + "-DCK_USE_FP8_ON_UNSUPPORTED_ARCH=OFF" ] ++ lib.optionals (gpuTargets != [ ]) [ - "-DGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}" - "-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}" + # We intentionally set GPU_ARCHS and not AMD/GPU_TARGETS + # per readme this is required if archs are dissimilar + # In rocm-6.3.x not setting any arch flag worked + # but setting dissimilar arches always failed + "-DGPU_ARCHS=${lib.concatStringsSep ";" gpuTargets}" ] ++ lib.optionals buildTests [ "-DGOOGLETEST_DIR=${gtest.src}" # Custom linker names @@ -64,22 +118,45 @@ stdenv.mkDerivation (finalAttrs: { # No flags to build selectively it seems... postPatch = - lib.optionalString (!buildTests) '' + '' + export HIP_DEVICE_LIB_PATH=${rocm-device-libs}/amdgcn/bitcode + '' + + lib.optionalString (!buildTests) '' substituteInPlace CMakeLists.txt \ - --replace "add_subdirectory(test)" "" + --replace-fail "add_subdirectory(test)" "" + substituteInPlace codegen/CMakeLists.txt \ + --replace-fail "include(ROCMTest)" "" '' + lib.optionalString (!buildExamples) '' substituteInPlace CMakeLists.txt \ - --replace "add_subdirectory(example)" "" + --replace-fail "add_subdirectory(example)" "" '' + '' substituteInPlace CMakeLists.txt \ - --replace "add_subdirectory(profiler)" "" + --replace-fail "add_subdirectory(profiler)" "" ''; + # Clamp parallelism based on free memory at build start to avoid OOM + preConfigure = '' + export NINJA_SUMMARIZE_BUILD=1 + export NINJA_STATUS="[%r jobs | %P %f/%t @ %o/s | %w | ETA %W ] " + MEM_GB_TOTAL=$(awk '/MemTotal/ { printf "%d \n", $2/1024/1024 }' /proc/meminfo) + MEM_GB_AVAILABLE=$(awk '/MemAvailable/ { printf "%d \n", $2/1024/1024 }' /proc/meminfo) + APPX_GB=$((MEM_GB_AVAILABLE > MEM_GB_TOTAL ? MEM_GB_TOTAL : MEM_GB_AVAILABLE)) + MAX_CORES=$((1 + APPX_GB / 2)) + MAX_CORES_LINK=$((1 + APPX_GB / 8)) + MAX_CORES_LINK=$((MAX_CORES_LINK > NIX_BUILD_CORES ? NIX_BUILD_CORES : MAX_CORES_LINK)) + export NIX_BUILD_CORES="$((NIX_BUILD_CORES > MAX_CORES ? MAX_CORES : NIX_BUILD_CORES))" + echo "Picked new core limits NIX_BUILD_CORES=$NIX_BUILD_CORES MAX_CORES_LINK=$MAX_CORES_LINK based on available mem: $APPX_GB GB" + cmakeFlagsArray+=( + "-DCK_PARALLEL_LINK_JOBS=$MAX_CORES_LINK" + "-DCK_PARALLEL_COMPILE_JOBS=$NIX_BUILD_CORES" + ) + ''; + postInstall = '' - zstd --rm $out/lib/libdevice_operations.a + zstd --rm $out/lib/libdevice_*_operations.a '' + lib.optionalString buildTests '' mkdir -p $test/bin @@ -92,21 +169,15 @@ stdenv.mkDerivation (finalAttrs: { passthru.updateScript = rocmUpdateScript { name = finalAttrs.pname; - owner = finalAttrs.src.owner; - repo = finalAttrs.src.repo; + inherit (finalAttrs.src) owner; + inherit (finalAttrs.src) repo; }; - # Times out otherwise - requiredSystemFeatures = [ "big-parallel" ]; - meta = with lib; { description = "Performance portable programming model for machine learning tensor operators"; homepage = "https://github.com/ROCm/composable_kernel"; license = with licenses; [ mit ]; maintainers = teams.rocm.members; platforms = platforms.linux; - broken = - versions.minor finalAttrs.version != versions.minor stdenv.cc.version - || versionAtLeast finalAttrs.version "7.0.0"; }; }) diff --git a/pkgs/development/rocm-modules/6/composable_kernel/disable-amdgpu-inline.patch b/pkgs/development/rocm-modules/6/composable_kernel/disable-amdgpu-inline.patch new file mode 100644 index 0000000000000..5ccf5239cb1e2 --- /dev/null +++ b/pkgs/development/rocm-modules/6/composable_kernel/disable-amdgpu-inline.patch @@ -0,0 +1,16 @@ +Flag -amdgpu-early-inline-all explodes memory consumption, so that build does not fit 64GB of RAM. +LLVM bug: https://github.com/llvm/llvm-project/issues/86332 +--- a/CMakeLists.txt ++++ b/CMakeLists.txt +@@ -213,11 +213,6 @@ if(NOT WIN32 AND check-coerce AND ${hip_VERSION_FLAT} GREATER 600241132 AND ${hi + message("Adding the amdgpu-coerce-illegal-types=1") + add_compile_options("SHELL: -mllvm -amdgpu-coerce-illegal-types=1") + endif() +-if(NOT WIN32 AND ${hip_VERSION_FLAT} GREATER 600241132) +- message("Adding -amdgpu-early-inline-all=true and -amdgpu-function-calls=false") +- add_compile_options("SHELL: -mllvm -amdgpu-early-inline-all=true") +- add_compile_options("SHELL: -mllvm -amdgpu-function-calls=false") +-endif() + # + # Seperate linking jobs from compiling + # Too many concurrent linking jobs can break the build diff --git a/pkgs/development/rocm-modules/6/composable_kernel/unpack.nix b/pkgs/development/rocm-modules/6/composable_kernel/unpack.nix index ff98df6eff3b0..c0e40e16965fc 100644 --- a/pkgs/development/rocm-modules/6/composable_kernel/unpack.nix +++ b/pkgs/development/rocm-modules/6/composable_kernel/unpack.nix @@ -1,20 +1,29 @@ { runCommandLocal, composable_kernel_build, + ck4inductor, zstd, }: let ck = composable_kernel_build; in -runCommandLocal "unpack-${ck.name}" +runCommandLocal "unpack-${ck.pname}" { nativeBuildInputs = [ zstd ]; - meta = ck.meta; + inherit (ck) meta; } '' mkdir -p $out cp -r --no-preserve=mode ${ck}/* $out - zstd -dv --rm $out/lib/libdevice_operations.a.zst -o $out/lib/libdevice_operations.a + for zs in $out/lib/libdevice_*_operations.a.zst; do + zstd -dv --rm "$zs" -o "''${zs/.zst}" + done substituteInPlace $out/lib/cmake/composable_kernel/*.cmake \ --replace "${ck}" "$out" + cp -r --no-preserve=mode ${ck4inductor}/* $out/ + + if [ ! -e $out/lib/python3.12/site-packages/ck4inductor/library/src/tensor_operation_instance/gpu/gemm_universal ]; then + echo "Missing gemm_universal at expected path for pytorch CK backend" + exit 1 + fi '' diff --git a/pkgs/development/rocm-modules/6/default.nix b/pkgs/development/rocm-modules/6/default.nix index 6cd85b0a575a1..a0fce5e563979 100644 --- a/pkgs/development/rocm-modules/6/default.nix +++ b/pkgs/development/rocm-modules/6/default.nix @@ -1,530 +1,535 @@ -{ stdenv -, lib -, config -, callPackage -, recurseIntoAttrs -, symlinkJoin -, fetchFromGitHub -, cudaPackages -, python3Packages -, elfutils -, boost179 -, opencv -, ffmpeg_4 -, libjpeg_turbo +{ + lib, + callPackage, + newScope, + recurseIntoAttrs, + symlinkJoin, + fetchFromGitHub, + boost179, + opencv, + ffmpeg_4, + libjpeg_turbo, + python3Packages, + triton-llvm, + openmpi, + rocmGpuArches ? [ ], }: let - rocmUpdateScript = callPackage ./update.nix { }; -in rec { - ## ROCm ## - llvm = recurseIntoAttrs (callPackage ./llvm/default.nix { inherit rocmUpdateScript rocm-device-libs rocm-runtime rocm-thunk clr; }); - - rocm-core = callPackage ./rocm-core { - inherit rocmUpdateScript; - stdenv = llvm.rocmClangStdenv; - }; - - rocm-cmake = callPackage ./rocm-cmake { - inherit rocmUpdateScript; - stdenv = llvm.rocmClangStdenv; - }; - - rocm-thunk = callPackage ./rocm-thunk { - inherit rocmUpdateScript; - stdenv = llvm.rocmClangStdenv; - }; - - rocm-smi = python3Packages.callPackage ./rocm-smi { - inherit rocmUpdateScript; - stdenv = llvm.rocmClangStdenv; - }; - - # Eventually will be in the LLVM repo - rocm-device-libs = callPackage ./rocm-device-libs { - inherit rocmUpdateScript rocm-cmake; - stdenv = llvm.rocmClangStdenv; - }; - - rocm-runtime = callPackage ./rocm-runtime { - inherit rocmUpdateScript rocm-device-libs rocm-thunk; - stdenv = llvm.rocmClangStdenv; - }; - - # Eventually will be in the LLVM repo - rocm-comgr = callPackage ./rocm-comgr { - inherit rocmUpdateScript rocm-cmake rocm-device-libs; - stdenv = llvm.rocmClangStdenv; - }; - - rocminfo = callPackage ./rocminfo { - inherit rocmUpdateScript rocm-cmake rocm-runtime; - stdenv = llvm.rocmClangStdenv; - }; - - clang-ocl = callPackage ./clang-ocl { - inherit rocmUpdateScript rocm-cmake rocm-device-libs; - stdenv = llvm.rocmClangStdenv; - }; - - # Unfree - hsa-amd-aqlprofile-bin = callPackage ./hsa-amd-aqlprofile-bin { - stdenv = llvm.rocmClangStdenv; - }; - - # Broken, too many errors - rdc = callPackage ./rdc { - inherit rocmUpdateScript rocm-smi rocm-runtime stdenv; - # stdenv = llvm.rocmClangStdenv; - }; - - rocm-docs-core = python3Packages.callPackage ./rocm-docs-core { inherit stdenv; }; - - hip-common = callPackage ./hip-common { - inherit rocmUpdateScript; - stdenv = llvm.rocmClangStdenv; - }; - - # Eventually will be in the LLVM repo - hipcc = callPackage ./hipcc { - inherit rocmUpdateScript; - stdenv = llvm.rocmClangStdenv; - }; - - # Replaces hip, opencl-runtime, and rocclr - clr = callPackage ./clr { - inherit rocmUpdateScript hip-common hipcc rocm-device-libs rocm-comgr rocm-runtime roctracer rocminfo rocm-smi; - inherit (llvm) clang; - stdenv = llvm.rocmClangStdenv; - }; - - hipify = callPackage ./hipify { - inherit rocmUpdateScript; - inherit (llvm) clang; - stdenv = llvm.rocmClangStdenv; - }; - - # Needs GCC - rocprofiler = callPackage ./rocprofiler { - inherit rocmUpdateScript clr rocm-core rocm-thunk rocm-device-libs roctracer rocdbgapi rocm-smi hsa-amd-aqlprofile-bin stdenv; - inherit (llvm) clang; - }; - - # Needs GCC - roctracer = callPackage ./roctracer { - inherit rocmUpdateScript rocm-device-libs rocm-runtime clr stdenv; - }; - - rocgdb = callPackage ./rocgdb { - inherit rocmUpdateScript rocdbgapi; - stdenv = llvm.rocmClangStdenv; - }; - - rocdbgapi = callPackage ./rocdbgapi { - inherit rocmUpdateScript rocm-cmake rocm-comgr rocm-runtime; - stdenv = llvm.rocmClangStdenv; - }; - - rocr-debug-agent = callPackage ./rocr-debug-agent { - inherit rocmUpdateScript clr rocdbgapi; - stdenv = llvm.rocmClangStdenv; - }; - - rocprim = callPackage ./rocprim { - inherit rocmUpdateScript rocm-cmake clr; - stdenv = llvm.rocmClangStdenv; - }; - - rocsparse = callPackage ./rocsparse { - inherit rocmUpdateScript rocm-cmake rocprim clr; - stdenv = llvm.rocmClangStdenv; - }; - - rocthrust = callPackage ./rocthrust { - inherit rocmUpdateScript rocm-cmake rocprim clr; - stdenv = llvm.rocmClangStdenv; - }; - - rocrand = callPackage ./rocrand { - inherit rocmUpdateScript rocm-cmake clr; - stdenv = llvm.rocmClangStdenv; - }; - - hiprand = callPackage ./hiprand { - inherit rocmUpdateScript rocm-cmake clr rocrand; - stdenv = llvm.rocmClangStdenv; - }; - - rocfft = callPackage ./rocfft { - inherit rocmUpdateScript rocm-cmake rocrand rocfft clr; - inherit (llvm) openmp; - stdenv = llvm.rocmClangStdenv; - }; - - rccl = callPackage ./rccl { - inherit rocmUpdateScript rocm-cmake rocm-smi clr hipify; - stdenv = llvm.rocmClangStdenv; - }; - - hipcub = callPackage ./hipcub { - inherit rocmUpdateScript rocm-cmake rocprim clr; - stdenv = llvm.rocmClangStdenv; - }; - - hipsparse = callPackage ./hipsparse { - inherit rocmUpdateScript rocm-cmake rocsparse clr; - inherit (llvm) openmp; - stdenv = llvm.rocmClangStdenv; - }; - - hipfort = callPackage ./hipfort { - inherit rocmUpdateScript rocm-cmake; - stdenv = llvm.rocmClangStdenv; - }; - - hipfft = callPackage ./hipfft { - inherit rocmUpdateScript rocm-cmake rocfft clr; - inherit (llvm) openmp; - stdenv = llvm.rocmClangStdenv; - }; - - tensile = python3Packages.callPackage ./tensile { - inherit rocmUpdateScript rocminfo; - stdenv = llvm.rocmClangStdenv; - }; - - rocblas = callPackage ./rocblas { - inherit rocmUpdateScript rocm-cmake clr tensile; - inherit (llvm) openmp; - stdenv = llvm.rocmClangStdenv; - }; - - rocsolver = callPackage ./rocsolver { - inherit rocmUpdateScript rocm-cmake rocblas rocsparse clr; - stdenv = llvm.rocmClangStdenv; - }; - - rocwmma = callPackage ./rocwmma { - inherit rocmUpdateScript rocm-cmake rocm-smi rocblas clr; - inherit (llvm) openmp; - stdenv = llvm.rocmClangStdenv; - }; - - rocalution = callPackage ./rocalution { - inherit rocmUpdateScript rocm-cmake rocprim rocsparse rocrand rocblas clr; - inherit (llvm) openmp; - stdenv = llvm.rocmClangStdenv; - }; - - rocmlir = callPackage ./rocmlir { - inherit rocmUpdateScript rocm-cmake rocminfo clr; - stdenv = llvm.rocmClangStdenv; - }; - - rocmlir-rock = rocmlir.override { - buildRockCompiler = true; - }; - - hipsolver = callPackage ./hipsolver { - inherit rocmUpdateScript rocm-cmake rocblas rocsolver clr; - stdenv = llvm.rocmClangStdenv; - }; - - hipblas = callPackage ./hipblas { - inherit rocmUpdateScript rocm-cmake rocblas rocsolver clr; - stdenv = llvm.rocmClangStdenv; - }; - - # hipBlasLt - Very broken with Tensile at the moment, only supports GFX9 - # hipTensor - Only supports GFX9 - - composable_kernel = callPackage ./composable_kernel/unpack.nix { - composable_kernel_build = callPackage ./composable_kernel { - inherit rocmUpdateScript rocm-cmake clr; - inherit (llvm) openmp clang-tools-extra; + outer = lib.makeScope newScope ( + self: + let + pyPackages = python3Packages; + openmpi-orig = openmpi; + llvm = self.llvm; + in + { + inherit rocmGpuArches; + buildTests = false; + buildBenchmarks = false; stdenv = llvm.rocmClangStdenv; - }; - }; - - half = callPackage ./half { - inherit rocmUpdateScript rocm-cmake; - stdenv = llvm.rocmClangStdenv; - }; - - miopen = callPackage ./miopen { - inherit rocmUpdateScript rocm-cmake rocblas clang-ocl composable_kernel rocm-comgr clr rocm-docs-core half roctracer; - inherit (llvm) clang-tools-extra; - stdenv = llvm.rocmClangStdenv; - rocmlir = rocmlir-rock; - boost = boost179.override { enableStatic = true; }; - }; - - miopen-hip = miopen; - - migraphx = callPackage ./migraphx { - inherit rocmUpdateScript rocm-cmake rocblas composable_kernel miopen clr half rocm-device-libs; - inherit (llvm) openmp clang-tools-extra; - stdenv = llvm.rocmClangStdenv; - rocmlir = rocmlir-rock; - }; - - rpp = callPackage ./rpp { - inherit rocmUpdateScript rocm-cmake rocm-docs-core clr half; - inherit (llvm) openmp; - stdenv = llvm.rocmClangStdenv; - }; - - rpp-hip = rpp.override { - useOpenCL = false; - useCPU = false; - }; - - rpp-opencl = rpp.override { - useOpenCL = true; - useCPU = false; - }; - - rpp-cpu = rpp.override { - useOpenCL = false; - useCPU = true; - }; - - mivisionx = callPackage ./mivisionx { - inherit rocmUpdateScript rocm-cmake rocm-device-libs clr rpp rocblas miopen migraphx half rocm-docs-core; - inherit (llvm) clang openmp; - opencv = opencv.override { enablePython = true; }; - ffmpeg = ffmpeg_4; - stdenv = llvm.rocmClangStdenv; - - # Unfortunately, rocAL needs a custom libjpeg-turbo until further notice - # See: https://github.com/ROCm/MIVisionX/issues/1051 - libjpeg_turbo = libjpeg_turbo.overrideAttrs { - version = "2.0.6.1"; - - src = fetchFromGitHub { - owner = "rrawther"; - repo = "libjpeg-turbo"; - rev = "640d7ee1917fcd3b6a5271aa6cf4576bccc7c5fb"; - sha256 = "sha256-T52whJ7nZi8jerJaZtYInC2YDN0QM+9tUDqiNr6IsNY="; + + rocmPath = self.callPackage ./rocm-path { }; + rocmUpdateScript = self.callPackage ./update.nix { }; + + ## ROCm ## + llvm = recurseIntoAttrs ( + callPackage ./llvm/default.nix { + inherit (self) rocm-device-libs rocm-runtime; + } + ); + inherit (self.llvm) rocm-merged-llvm clang openmp; + + rocm-core = self.callPackage ./rocm-core { }; + amdsmi = pyPackages.callPackage ./amdsmi { + inherit (self) rocmUpdateScript; + }; + + rocm-cmake = self.callPackage ./rocm-cmake { }; + + rocm-smi = pyPackages.callPackage ./rocm-smi { + inherit (self) rocmUpdateScript; + }; + + rocm-device-libs = self.callPackage ./rocm-device-libs { + inherit (llvm) rocm-merged-llvm; + }; + + rocm-runtime = self.callPackage ./rocm-runtime { + inherit (llvm) rocm-merged-llvm; + }; + + rocm-comgr = self.callPackage ./rocm-comgr { + inherit (llvm) rocm-merged-llvm; + }; + + rocminfo = self.callPackage ./rocminfo { }; + + # Unfree + hsa-amd-aqlprofile-bin = self.callPackage ./hsa-amd-aqlprofile-bin { }; + + rdc = self.callPackage ./rdc { }; + + rocm-docs-core = python3Packages.callPackage ./rocm-docs-core { }; + + hip-common = self.callPackage ./hip-common { }; + + # Eventually will be in the LLVM repo + hipcc = self.callPackage ./hipcc { + inherit (llvm) rocm-merged-llvm; + }; + + # Replaces hip, opencl-runtime, and rocclr + clr = self.callPackage ./clr { }; + + aotriton = self.callPackage ./aotriton { }; + + hipify = self.callPackage ./hipify { + inherit (llvm) + clang + rocm-merged-llvm + ; + }; + + # hsakmt was merged into rocm-runtime + hsakmt = self.rocm-runtime; + + rocprofiler = self.callPackage ./rocprofiler { + inherit (llvm) clang; + }; + rocprofiler-register = self.callPackage ./rocprofiler-register { + inherit (llvm) clang; + }; + + # Needs GCC + roctracer = self.callPackage ./roctracer { }; + + rocgdb = self.callPackage ./rocgdb { }; + + rocdbgapi = self.callPackage ./rocdbgapi { }; + + rocr-debug-agent = self.callPackage ./rocr-debug-agent { }; + + rocprim = self.callPackage ./rocprim { }; + + rocsparse = self.callPackage ./rocsparse { }; + + rocthrust = self.callPackage ./rocthrust { }; + + rocrand = self.callPackage ./rocrand { }; + + hiprand = self.callPackage ./hiprand { }; + + rocfft = self.callPackage ./rocfft { }; + + mscclpp = self.callPackage ./mscclpp { }; + + rccl = self.callPackage ./rccl { }; + + # RCCL with sanitizers and tests + # Can't have with sanitizer build as dep of other packages without + # runtime crashes due to ASAN not loading first + rccl-tests = self.callPackage ./rccl { + buildTests = true; + }; + + hipcub = self.callPackage ./hipcub { }; + + hipsparse = self.callPackage ./hipsparse { }; + + hipfort = self.callPackage ./hipfort { }; + + hipfft = self.callPackage ./hipfft { }; + + tensile = pyPackages.callPackage ./tensile { + inherit (self) + rocmUpdateScript + rocminfo + ; + }; + + rocblas = self.callPackage ./rocblas { + buildTests = true; + buildBenchmarks = true; + }; + + rocsolver = self.callPackage ./rocsolver { }; + + rocwmma = self.callPackage ./rocwmma { }; + + rocalution = self.callPackage ./rocalution { }; + + rocmlir = self.callPackage ./rocmlir { + buildRockCompiler = true; + }; + + hipsolver = self.callPackage ./hipsolver { }; + + hipblas-common = self.callPackage ./hipblas-common { }; + + hipblas = self.callPackage ./hipblas { }; + + hipblaslt = self.callPackage ./hipblaslt { }; + + # hipTensor - Only supports GFX9 + + composable_kernel_build = self.callPackage ./composable_kernel { }; + + # FIXME: we have compressed code objects now, may be able to skip two stages? + composable_kernel = self.callPackage ./composable_kernel/unpack.nix { }; + ck4inductor = pyPackages.callPackage ./composable_kernel/ck4inductor.nix { + inherit (self) composable_kernel_build; + inherit (llvm) rocm-merged-llvm; + }; + + half = self.callPackage ./half { }; + + miopen = self.callPackage ./miopen { + boost = boost179.override { enableStatic = true; }; + }; + + miopen-hip = self.miopen; + + migraphx = self.callPackage ./migraphx { }; + + rpp = self.callPackage ./rpp { }; + + rpp-hip = self.rpp.override { + useOpenCL = false; + useCPU = false; + }; + + rpp-opencl = self.rpp.override { + useOpenCL = true; + useCPU = false; + }; + + rpp-cpu = self.rpp.override { + useOpenCL = false; + useCPU = true; }; - # overwrite all patches, since patches for newer version do not apply - patches = [ ./0001-Compile-transupp.c-as-part-of-the-library.patch ]; - }; - }; - - mivisionx-hip = mivisionx.override { - rpp = rpp-hip; - useOpenCL = false; - useCPU = false; - }; - - mivisionx-cpu = mivisionx.override { - rpp = rpp-cpu; - useOpenCL = false; - useCPU = true; - }; - - ## Meta ## - # Emulate common ROCm meta layout - # These are mainly for users. I strongly suggest NOT using these in nixpkgs derivations - # Don't put these into `propagatedBuildInputs` unless you want PATH/PYTHONPATH issues! - # See: https://rocm.docs.amd.com/en/docs-5.7.1/_images/image.004.png - # See: https://rocm.docs.amd.com/en/docs-5.7.1/deploy/linux/os-native/package_manager_integration.html - meta = rec { - rocm-developer-tools = symlinkJoin { - name = "rocm-developer-tools-meta"; - - paths = [ - hsa-amd-aqlprofile-bin - rocm-core - rocr-debug-agent - roctracer - rocdbgapi - rocprofiler - rocgdb - rocm-language-runtime - ]; - }; - - rocm-ml-sdk = symlinkJoin { - name = "rocm-ml-sdk-meta"; - - paths = [ - rocm-core - miopen-hip - rocm-hip-sdk - rocm-ml-libraries - ]; - }; - - rocm-ml-libraries = symlinkJoin { - name = "rocm-ml-libraries-meta"; - - paths = [ - llvm.clang - llvm.mlir - llvm.openmp - rocm-core - miopen-hip - rocm-hip-libraries - ]; - }; - - rocm-hip-sdk = symlinkJoin { - name = "rocm-hip-sdk-meta"; - - paths = [ - rocprim - rocalution - hipfft - rocm-core - hipcub - hipblas - rocrand - rocfft - rocsparse - rccl - rocthrust - rocblas - hipsparse - hipfort - rocwmma - hipsolver - rocsolver - rocm-hip-libraries - rocm-hip-runtime-devel - ]; - }; - - rocm-hip-libraries = symlinkJoin { - name = "rocm-hip-libraries-meta"; - - paths = [ - rocblas - hipfort - rocm-core - rocsolver - rocalution - rocrand - hipblas - rocfft - hipfft - rccl - rocsparse - hipsparse - hipsolver - rocm-hip-runtime - ]; - }; - - rocm-openmp-sdk = symlinkJoin { - name = "rocm-openmp-sdk-meta"; - - paths = [ - rocm-core - llvm.clang - llvm.mlir - llvm.openmp # openmp-extras-devel (https://github.com/ROCm/aomp) - rocm-language-runtime - ]; - }; - - rocm-opencl-sdk = symlinkJoin { - name = "rocm-opencl-sdk-meta"; - - paths = [ - rocm-core - rocm-runtime - clr - clr.icd - rocm-thunk - rocm-opencl-runtime - ]; - }; - - rocm-opencl-runtime = symlinkJoin { - name = "rocm-opencl-runtime-meta"; - - paths = [ - rocm-core - clr - clr.icd - rocm-language-runtime - ]; - }; - - rocm-hip-runtime-devel = symlinkJoin { - name = "rocm-hip-runtime-devel-meta"; - - paths = [ - clr - rocm-core - hipify - rocm-cmake - llvm.clang - llvm.mlir - llvm.openmp - rocm-thunk - rocm-runtime - rocm-hip-runtime - ]; - }; - - rocm-hip-runtime = symlinkJoin { - name = "rocm-hip-runtime-meta"; - - paths = [ - rocm-core - rocminfo - clr - rocm-language-runtime - ]; - }; - - rocm-language-runtime = symlinkJoin { - name = "rocm-language-runtime-meta"; - - paths = [ - rocm-runtime - rocm-core - rocm-comgr - llvm.openmp # openmp-extras-runtime (https://github.com/ROCm/aomp) - ]; - }; - - rocm-all = symlinkJoin { - name = "rocm-all-meta"; - - paths = [ - rocm-developer-tools - rocm-ml-sdk - rocm-ml-libraries - rocm-hip-sdk - rocm-hip-libraries - rocm-openmp-sdk - rocm-opencl-sdk - rocm-opencl-runtime - rocm-hip-runtime-devel - rocm-hip-runtime - rocm-language-runtime - ]; - }; - }; -} // lib.optionalAttrs config.allowAliases { - miopengemm= throw '' - 'miopengemm' has been deprecated. - It is still available for some time as part of rocmPackages_5. - ''; # Added 2024-3-3 - - miopen-opencl= throw '' - 'miopen-opencl' has been deprecated. - It is still available for some time as part of rocmPackages_5. - ''; # Added 2024-3-3 - - mivisionx-opencl = throw '' - 'mivisionx-opencl' has been deprecated. - Other versions of mivisionx are still available. - It is also still available for some time as part of rocmPackages_5. - ''; # Added 2024-3-24 + mivisionx = self.callPackage ./mivisionx { + inherit (llvm) clang; + opencv = opencv.override { enablePython = true; }; + ffmpeg = ffmpeg_4; + # Unfortunately, rocAL needs a custom libjpeg-turbo until further notice + # See: https://github.com/ROCm/MIVisionX/issues/1051 + libjpeg_turbo = libjpeg_turbo.overrideAttrs { + version = "2.0.6.1"; + src = fetchFromGitHub { + owner = "rrawther"; + repo = "libjpeg-turbo"; + rev = "640d7ee1917fcd3b6a5271aa6cf4576bccc7c5fb"; + sha256 = "sha256-T52whJ7nZi8jerJaZtYInC2YDN0QM+9tUDqiNr6IsNY="; + }; + # overwrite all patches, since patches for newer version do not apply + patches = [ ./0001-Compile-transupp.c-as-part-of-the-library.patch ]; + }; + }; + + mivisionx-hip = self.mivisionx.override { + rpp = self.rpp-hip; + useOpenCL = false; + useCPU = false; + }; + + # mivisionx-opencl = throw '' + # 'mivisionx-opencl' has been deprecated. + # Other versions of mivisionx are still available. + # It is also still available for some time as part of rocmPackages_5. + # ''; # Added 2024-3-24 + + mivisionx-cpu = self.mivisionx.override { + rpp = self.rpp-cpu; + useOpenCL = false; + useCPU = true; + }; + + openmpi = openmpi-orig.override (prev: { + ucx = prev.ucx.override { + enableCuda = false; + enableRocm = true; + }; + }); + mpi = self.openmpi; + + triton-llvm = + (triton-llvm.override { + # Workaround https://github.com/NixOS/nixpkgs/issues/363965 so we can test + # not root caused + buildTests = false; + }).overrideAttrs + { + src = fetchFromGitHub { + owner = "llvm"; + repo = "llvm-project"; + # make sure this matches triton llvm rel branch hash for now + # https://github.com/triton-lang/triton/blob/release/3.2.x/cmake/llvm-hash.txt + rev = "86b69c31642e98f8357df62c09d118ad1da4e16a"; + hash = "sha256-W/mQwaLGx6/rIBjdzUTIbWrvGjdh7m4s15f70fQ1/hE="; + }; + pname = "triton-llvm-rocm"; + patches = [ ]; # FIXME: https://github.com/llvm/llvm-project//commit/84837e3cc1cf17ed71580e3ea38299ed2bfaa5f6.patch doesn't apply, may need to rebase + }; + + triton = + (pyPackages.triton-no-cuda.override (_old: { + rocmPackages = self; + rocmSupport = true; + # buildPythonPackage = x: old.buildPythonPackage (x // { stdenv = llvmPackagesRocm.rocmClangStdenv;}); + stdenv = self.llvm.rocmClangStdenv; + llvm = self.triton-llvm; + })).overridePythonAttrs + (old: { + doCheck = false; + stdenv = self.llvm.rocmClangStdenv; + version = "3.2.0"; + src = fetchFromGitHub { + owner = "triton-lang"; + repo = "triton"; + rev = "64b80f0916b69e3c4d0682a2368fd126e57891ab"; # "release/3.2.x"; + hash = "sha256-xQOgMLHruVrI/9FtY3TvZKALitMOfqZ69uOyrYhXhu8="; + }; + buildInputs = old.buildInputs ++ [ + self.clr + ]; + dontStrip = true; + env = old.env // { + CXXFLAGS = "-O3 -I${self.clr}/include -I/build/source/third_party/triton/third_party/nvidia/backend/include"; + TRITON_OFFLINE_BUILD = 1; + }; + patches = [ ]; + postPatch = '' + # Need an empty cuda.h to happily compile for ROCm + mkdir -p third_party/nvidia/include/ third_party/nvidia/include/backend/include/ + echo "" > third_party/nvidia/include/cuda.h + touch third_party/nvidia/include/backend/include/{cuda,driver_types}.h + rm -rf third_party/nvidia + substituteInPlace CMakeLists.txt \ + --replace-fail "add_subdirectory(test)" "" + sed -i '/nvidia\|NVGPU\|registerConvertTritonGPUToLLVMPass\|mlir::test::/Id' bin/RegisterTritonDialects.h + sed -i '/TritonTestAnalysis/Id' bin/CMakeLists.txt + substituteInPlace python/setup.py \ + --replace-fail 'backends = [*BackendInstaller.copy(["nvidia", "amd"]), *BackendInstaller.copy_externals()]' \ + 'backends = [*BackendInstaller.copy(["amd"]), *BackendInstaller.copy_externals()]' + #cp ''${cudaPackages.cuda_cudart}/include/*.h third_party/nvidia/backend/include/ + find . -type f -exec sed -i 's|[<]cupti.h[>]|"cupti.h"|g' {} + + find . -type f -exec sed -i 's|[<]cuda.h[>]|"cuda.h"|g' {} + + # remove any downloads + substituteInPlace python/setup.py \ + --replace-fail "[get_json_package_info()]" "[]"\ + --replace-fail "[get_llvm_package_info()]" "[]"\ + --replace-fail "curr_version != version" "False" + # Don't fetch googletest + substituteInPlace cmake/AddTritonUnitTest.cmake \ + --replace-fail 'include(''${PROJECT_SOURCE_DIR}/unittest/googletest.cmake)' "" \ + --replace-fail "include(GoogleTest)" "find_package(GTest REQUIRED)" + substituteInPlace third_party/amd/backend/compiler.py \ + --replace-fail '"/opt/rocm/llvm/bin/ld.lld"' "os.environ['ROCM_PATH']"' + "/llvm/bin/ld.lld"' + ''; + }); + + ## Meta ## + # Emulate common ROCm meta layout + # These are mainly for users. I strongly suggest NOT using these in nixpkgs derivations + # Don't put these into `propagatedBuildInputs` unless you want PATH/PYTHONPATH issues! + # See: https://rocm.docs.amd.com/en/docs-5.7.1/_images/image.004.png + # See: https://rocm.docs.amd.com/en/docs-5.7.1/deploy/linux/os-native/package_manager_integration.html + meta = with self; rec { + rocm-developer-tools = symlinkJoin { + name = "rocm-developer-tools-meta"; + paths = [ + hsa-amd-aqlprofile-bin + rocm-core + rocr-debug-agent + roctracer + rocdbgapi + rocprofiler + rocgdb + rocm-language-runtime + ]; + }; + rocm-ml-sdk = symlinkJoin { + name = "rocm-ml-sdk-meta"; + paths = [ + rocm-core + miopen-hip + rocm-hip-sdk + rocm-ml-libraries + ]; + }; + rocm-ml-libraries = symlinkJoin { + name = "rocm-ml-libraries-meta"; + paths = [ + llvm.clang + llvm.mlir + llvm.openmp + rocm-core + miopen-hip + rocm-hip-libraries + ]; + }; + rocm-hip-sdk = symlinkJoin { + name = "rocm-hip-sdk-meta"; + paths = [ + rocprim + rocalution + hipfft + rocm-core + hipcub + hipblas + hipblaslt + rocrand + rocfft + rocsparse + rccl + rocthrust + rocblas + hipsparse + hipfort + rocwmma + hipsolver + rocsolver + rocm-hip-libraries + rocm-hip-runtime-devel + ]; + }; + rocm-hip-libraries = symlinkJoin { + name = "rocm-hip-libraries-meta"; + paths = [ + rocblas + hipfort + rocm-core + rocsolver + rocalution + rocrand + hipblas + hipblaslt + rocfft + hipfft + rccl + rocsparse + hipsparse + hipsolver + rocm-hip-runtime + ]; + }; + rocm-openmp-sdk = symlinkJoin { + name = "rocm-openmp-sdk-meta"; + paths = [ + rocm-core + llvm.clang + llvm.mlir + llvm.openmp # openmp-extras-devel (https://github.com/ROCm/aomp) + rocm-language-runtime + ]; + }; + rocm-opencl-sdk = symlinkJoin { + name = "rocm-opencl-sdk-meta"; + paths = [ + rocm-core + rocm-runtime + clr + clr.icd + rocm-opencl-runtime + ]; + }; + rocm-opencl-runtime = symlinkJoin { + name = "rocm-opencl-runtime-meta"; + paths = [ + rocm-core + clr + clr.icd + rocm-language-runtime + ]; + }; + rocm-hip-runtime-devel = symlinkJoin { + name = "rocm-hip-runtime-devel-meta"; + paths = [ + clr + rocm-core + hipify + rocm-cmake + llvm.clang + llvm.mlir + llvm.openmp + rocm-runtime + rocm-hip-runtime + ]; + }; + rocm-hip-runtime = symlinkJoin { + name = "rocm-hip-runtime-meta"; + paths = [ + rocm-core + rocminfo + clr + rocm-language-runtime + ]; + }; + rocm-language-runtime = symlinkJoin { + name = "rocm-language-runtime-meta"; + paths = [ + rocm-runtime + rocm-core + rocm-comgr + llvm.openmp # openmp-extras-runtime (https://github.com/ROCm/aomp) + ]; + }; + rocm-all = symlinkJoin { + name = "rocm-all-meta"; + paths = [ + rocm-developer-tools + rocm-ml-sdk + rocm-ml-libraries + rocm-hip-sdk + rocm-hip-libraries + rocm-openmp-sdk + rocm-opencl-sdk + rocm-opencl-runtime + rocm-hip-runtime-devel + rocm-hip-runtime + rocm-language-runtime + ]; + }; + }; + + rocm-tests = self.callPackage ./rocm-tests { + rocmPackages = self; + }; + } + ); + scopeForArches = + arches: + outer.overrideScope ( + final: prev: { + clr = prev.clr.override { + localGpuTargets = arches; + }; + } + ); +in +outer +// builtins.listToAttrs ( + builtins.map (arch: { + name = arch; + value = scopeForArches [ arch ]; + }) outer.clr.gpuTargets +) +// { + gfx9 = scopeForArches [ + "gfx906" + "gfx908" + "gfx90a" + "gfx942" + ]; + gfx10 = scopeForArches [ + "gfx1010" + "gfx1030" + ]; + gfx11 = scopeForArches [ + "gfx1100" + "gfx1101" + "gfx1102" + ]; } diff --git a/pkgs/development/rocm-modules/6/half/default.nix b/pkgs/development/rocm-modules/6/half/default.nix index 77f37790a8903..303f9b957fc83 100644 --- a/pkgs/development/rocm-modules/6/half/default.nix +++ b/pkgs/development/rocm-modules/6/half/default.nix @@ -1,20 +1,21 @@ -{ lib -, stdenv -, fetchFromGitHub -, rocmUpdateScript -, cmake -, rocm-cmake +{ + lib, + stdenv, + fetchFromGitHub, + rocmUpdateScript, + cmake, + rocm-cmake, }: stdenv.mkDerivation (finalAttrs: { pname = "half"; - version = "6.0.2"; + version = "6.3.1"; src = fetchFromGitHub { owner = "ROCm"; repo = "half"; rev = "rocm-${finalAttrs.version}"; - hash = "sha256-wvl8ny7pbY9hUGGtJ70R7/4YIsahgI7qcVzUnxmUfZM="; + hash = "sha256-H8Ogm4nxaxDB0WHx+KhRjUO3vzp3AwCqrIQ6k8R+xkc="; }; nativeBuildInputs = [ @@ -24,8 +25,8 @@ stdenv.mkDerivation (finalAttrs: { passthru.updateScript = rocmUpdateScript { name = finalAttrs.pname; - owner = finalAttrs.src.owner; - repo = finalAttrs.src.repo; + inherit (finalAttrs.src) owner; + inherit (finalAttrs.src) repo; }; meta = with lib; { @@ -34,6 +35,5 @@ stdenv.mkDerivation (finalAttrs: { license = with licenses; [ mit ]; maintainers = teams.rocm.members; platforms = platforms.unix; - broken = versions.minor finalAttrs.version != versions.minor stdenv.cc.version || versionAtLeast finalAttrs.version "7.0.0"; }; }) diff --git a/pkgs/development/rocm-modules/6/hip-common/default.nix b/pkgs/development/rocm-modules/6/hip-common/default.nix index 8b0a4fc5add29..b9292936099bf 100644 --- a/pkgs/development/rocm-modules/6/hip-common/default.nix +++ b/pkgs/development/rocm-modules/6/hip-common/default.nix @@ -1,18 +1,21 @@ -{ lib -, stdenv -, fetchFromGitHub -, rocmUpdateScript +{ + lib, + stdenv, + fetchFromGitHub, + rocmUpdateScript, }: stdenv.mkDerivation (finalAttrs: { pname = "hip-common"; - version = "6.0.2"; + version = "6.3.1"; src = fetchFromGitHub { owner = "ROCm"; repo = "HIP"; rev = "rocm-${finalAttrs.version}"; - hash = "sha256-51u3By0R4LKoWiklNacFP6HILL845jxpN6FD7rQB+zQ="; + hash = "sha256-y85S2fULvbQfwxZukIsMLuQAqWEv1kHL8fdozK4kj5I="; + # rev = "5f2d2d109c34e749d7947b48834098eec26a5e67"; + # hash = "sha256-Lws65mzRJZP/JE9UiHHfX4Y3zOYA6FPxgbAea48D9Gk="; }; dontConfigure = true; @@ -29,8 +32,8 @@ stdenv.mkDerivation (finalAttrs: { passthru.updateScript = rocmUpdateScript { name = finalAttrs.pname; - owner = finalAttrs.src.owner; - repo = finalAttrs.src.repo; + inherit (finalAttrs.src) owner; + inherit (finalAttrs.src) repo; }; meta = with lib; { @@ -39,6 +42,5 @@ stdenv.mkDerivation (finalAttrs: { license = with licenses; [ mit ]; maintainers = with maintainers; [ lovesegfault ] ++ teams.rocm.members; platforms = platforms.linux; - broken = versions.minor finalAttrs.version != versions.minor stdenv.cc.version || versionAtLeast finalAttrs.version "7.0.0"; }; }) diff --git a/pkgs/development/rocm-modules/6/hipblas-common/default.nix b/pkgs/development/rocm-modules/6/hipblas-common/default.nix new file mode 100644 index 0000000000000..6107be282af2c --- /dev/null +++ b/pkgs/development/rocm-modules/6/hipblas-common/default.nix @@ -0,0 +1,27 @@ +{ + stdenv, + cmake, + fetchFromGitHub, + rocm-cmake, + rocmUpdateScript, +}: +stdenv.mkDerivation (finalAttrs: { + pname = "hipblas-common"; + version = "6.3.1"; + nativeBuildInputs = [ + cmake + rocm-cmake + ]; + + passthru.updateScript = rocmUpdateScript { + name = finalAttrs.pname; + inherit (finalAttrs.src) owner; + inherit (finalAttrs.src) repo; + }; + src = fetchFromGitHub { + owner = "ROCm"; + repo = "hipBLAS-common"; + rev = "rocm-${finalAttrs.version}"; + hash = "sha256-tvNz4ymQ1y3YSUQxAtNu2who79QzSKR+3JEevr+GDWo="; + }; +}) diff --git a/pkgs/development/rocm-modules/6/hipblas/default.nix b/pkgs/development/rocm-modules/6/hipblas/default.nix index 3e4436b7d3f72..809093b428d6b 100644 --- a/pkgs/development/rocm-modules/6/hipblas/default.nix +++ b/pkgs/development/rocm-modules/6/hipblas/default.nix @@ -7,10 +7,14 @@ rocm-cmake, clr, gfortran, + hipblas-common, rocblas, rocsolver, + rocsparse, + rocprim, gtest, lapack-reference, + writeShellScriptBin, buildTests ? false, buildBenchmarks ? false, buildSamples ? false, @@ -19,7 +23,7 @@ # Can also use cuBLAS stdenv.mkDerivation (finalAttrs: { pname = "hipblas"; - version = "6.0.2"; + version = "6.3.1"; outputs = [ @@ -39,20 +43,34 @@ stdenv.mkDerivation (finalAttrs: { owner = "ROCm"; repo = "hipBLAS"; rev = "rocm-${finalAttrs.version}"; - hash = "sha256-Fq7o2sMmHlHIv9UKJw+u/h9K/ZhKVJWwosYTdYIsscA="; + #rev = "a4b23dec749d9d623f0e7699045f381ec3eddfab"; + hash = "sha256-Rz1KAhBUbvErHTF2PM1AkVhqo4OHldfSNMSpp5Tx9yk="; }; + postPatch = '' + substituteInPlace library/CMakeLists.txt \ + --replace-fail "find_package(Git REQUIRED)" "" + ''; + nativeBuildInputs = [ cmake + #ninja rocm-cmake clr gfortran + (writeShellScriptBin "amdclang++" '' + exec clang++ "$@" + '') ]; buildInputs = [ rocblas + rocprim + rocsparse rocsolver + # hipblaslt + hipblas-common ] ++ lib.optionals buildTests [ gtest @@ -63,13 +81,16 @@ stdenv.mkDerivation (finalAttrs: { cmakeFlags = [ - "-DCMAKE_C_COMPILER=hipcc" - "-DCMAKE_CXX_COMPILER=hipcc" + "-DCMAKE_BUILD_TYPE=Release" + "-DCMAKE_CXX_COMPILER=${lib.getBin clr}/bin/hipcc" + # Upstream is migrating to amdclang++, it is likely this will be correct in next version bump + #"-DCMAKE_CXX_COMPILER=${lib.getBin clr}/bin/amdclang++" # Manually define CMAKE_INSTALL_ # See: https://github.com/NixOS/nixpkgs/pull/197838 "-DCMAKE_INSTALL_BINDIR=bin" "-DCMAKE_INSTALL_LIBDIR=lib" "-DCMAKE_INSTALL_INCLUDEDIR=include" + "-DAMDGPU_TARGETS=${rocblas.amdgpu_targets}" # FIXME: ] ++ lib.optionals buildTests [ "-DBUILD_CLIENTS_TESTS=ON" @@ -100,8 +121,8 @@ stdenv.mkDerivation (finalAttrs: { passthru.updateScript = rocmUpdateScript { name = finalAttrs.pname; - owner = finalAttrs.src.owner; - repo = finalAttrs.src.repo; + inherit (finalAttrs.src) owner; + inherit (finalAttrs.src) repo; }; meta = with lib; { @@ -110,8 +131,5 @@ stdenv.mkDerivation (finalAttrs: { license = with licenses; [ mit ]; maintainers = teams.rocm.members; platforms = platforms.linux; - broken = - versions.minor finalAttrs.version != versions.minor stdenv.cc.version - || versionAtLeast finalAttrs.version "7.0.0"; }; }) diff --git a/pkgs/development/rocm-modules/6/hipblaslt/default.nix b/pkgs/development/rocm-modules/6/hipblaslt/default.nix new file mode 100644 index 0000000000000..407cbbb9b9b9a --- /dev/null +++ b/pkgs/development/rocm-modules/6/hipblaslt/default.nix @@ -0,0 +1,221 @@ +{ + lib, + stdenv, + fetchFromGitHub, + cmake, + rocm-cmake, + clr, + gfortran, + gtest, + msgpack, + libxml2, + python3, + python3Packages, + openmp, + hipblas-common, + tensile, + lapack-reference, + ncurses, + libffi, + zlib, + zstd, + writeShellScriptBin, + rocmUpdateScript, + buildTests ? false, + buildBenchmarks ? false, + buildSamples ? false, + # hipblaslt supports only devices with MFMA or WMMA + # WMMA on gfx1100 may be broken + # MFMA on MI100 may be broken + # MI200/MI300 known to work + gpuTargets ? ( + clr.localGpuTargets or [ + # "gfx908" FIXME: confirm MFMA on MI100 works + "gfx90a" + "gfx942" + # "gfx1100" FIXME: confirm WMMA targets work + ] + ), +}: + +stdenv.mkDerivation ( + finalAttrs: + let + tensile' = (tensile.override { isTensileLite = true; }).overrideAttrs { + inherit (finalAttrs) src; + sourceRoot = "${finalAttrs.src.name}/tensilelite"; + env.ROCM_PATH = "${clr}"; + }; + py = python3.withPackages (ps: [ + ps.pyyaml + ps.setuptools + ps.packaging + ]); + gpuTargets' = lib.concatStringsSep ";" gpuTargets; + compiler = "hipcc"; # FIXME: amdclang++ in future + cFlags = "-O3 -I${msgpack}/include"; # FIXME: cmake files need patched to include this properly + in + { + # build will fail with llvm libcxx, must use gnu libstdcxx + # https://github.com/llvm/llvm-project/issues/98734 + pname = "hipblaslt${clr.gpuArchSuffix}"; + version = "6.3.1"; + + src = fetchFromGitHub { + owner = "ROCm"; + repo = "hipBLASLt"; + rev = "rocm-${finalAttrs.version}"; + hash = "sha256-ozfHwsxcczzYXN9SIkyfRvdtaCqlDN4bh3UHZNS2oVQ="; + }; + env.CXX = compiler; + env.CFLAGS = cFlags; + env.CXXFLAGS = cFlags; + env.ROCM_PATH = "${clr}"; + env.TENSILE_ROCM_ASSEMBLER_PATH = "${stdenv.cc}/bin/clang++"; + env.TENSILE_GEN_ASSEMBLY_TOOLCHAIN = "${stdenv.cc}/bin/clang++"; + requiredSystemFeatures = [ "big-parallel" ]; + + patches = [ + ./ext-op-first.diff + ]; + + outputs = + [ + "out" + ] + ++ lib.optionals buildTests [ + "test" + ] + ++ lib.optionals buildBenchmarks [ + "benchmark" + ] + ++ lib.optionals buildSamples [ + "sample" + ]; + + postPatch = '' + rm -rf tensilelite + # sed -i '1i variable_watch(__CMAKE_C_COMPILER_OUTPUT)' CMakeLists.txt + # sed -i '1i variable_watch(__CMAKE_CXX_COMPILER_OUTPUT)' CMakeLists.txt + # sed -i '1i variable_watch(OUTPUT)' CMakeLists.txt + mkdir -p build/Tensile/library + # substituteInPlace tensilelite/Tensile/Ops/gen_assembly.sh \ + # --replace-fail '. ''${venv}/bin/activate' 'set -x; . ''${venv}/bin/activate' + # git isn't needed and we have no .git + substituteInPlace cmake/Dependencies.cmake \ + --replace-fail "find_package(Git REQUIRED)" "" + substituteInPlace CMakeLists.txt \ + --replace-fail "include(virtualenv)" "" \ + --replace-fail "virtualenv_install(\''${Tensile_TEST_LOCAL_PATH})" "" \ + --replace-fail "virtualenv_install(\''${CMAKE_SOURCE_DIR}/tensilelite)" "" \ + --replace-fail 'find_package(Tensile 4.33.0 EXACT REQUIRED HIP LLVM OpenMP PATHS "''${INSTALLED_TENSILE_PATH}")' "find_package(Tensile)" + if [ -f library/src/amd_detail/rocblaslt/src/kernels/compile_code_object.sh ]; then + substituteInPlace library/src/amd_detail/rocblaslt/src/kernels/compile_code_object.sh \ + --replace-fail '${"\${rocm_path}"}/bin/' "" + fi + ''; + + doCheck = false; + doInstallCheck = false; + + nativeBuildInputs = [ + cmake + rocm-cmake + py + clr + gfortran + # need make to get streaming console output so nix knows build is still running + # so deliberately not using ninja + # ninja + (writeShellScriptBin "amdclang++" '' + exec clang++ "$@" + '') + ]; + + buildInputs = + [ + hipblas-common + tensile' + openmp + libffi + ncurses + + # Tensile deps - not optional, building without tensile isn't actually supported + msgpack # FIXME: not included in cmake! + libxml2 + python3Packages.msgpack + python3Packages.joblib + zlib + zstd + ] + ++ lib.optionals buildTests [ + gtest + ] + ++ lib.optionals (buildTests || buildBenchmarks) [ + lapack-reference + ]; + + cmakeFlags = + [ + "-Wno-dev" + "-DCMAKE_BUILD_TYPE=Release" + "-DCMAKE_VERBOSE_MAKEFILE=ON" + "-DVIRTUALENV_PYTHON_EXENAME=${lib.getExe py}" + "-DTENSILE_USE_HIP=ON" + "-DTENSILE_BUILD_CLIENT=OFF" + "-DTENSILE_USE_FLOAT16_BUILTIN=ON" + "-DCMAKE_CXX_COMPILER=${compiler}" + # Manually define CMAKE_INSTALL_ + # See: https://github.com/NixOS/nixpkgs/pull/197838 + "-DCMAKE_INSTALL_BINDIR=bin" + "-DCMAKE_INSTALL_LIBDIR=lib" + "-DCMAKE_INSTALL_INCLUDEDIR=include" + "-DHIPBLASLT_ENABLE_MARKER=Off" + # FIXME what are the implications of hardcoding this? + "-DTensile_CODE_OBJECT_VERSION=V5" + "-DTensile_COMPILER=${compiler}" # amdclang++ in future + "-DAMDGPU_TARGETS=${gpuTargets'}" + "-DGPU_TARGETS=${gpuTargets'}" + "-DTensile_LIBRARY_FORMAT=msgpack" + ] + ++ lib.optionals buildTests [ + "-DBUILD_CLIENTS_TESTS=ON" + ] + ++ lib.optionals buildBenchmarks [ + "-DBUILD_CLIENTS_BENCHMARKS=ON" + ] + ++ lib.optionals buildSamples [ + "-DBUILD_CLIENTS_SAMPLES=ON" + ]; + + postInstall = + lib.optionalString buildTests '' + mkdir -p $test/bin + mv $out/bin/hipblas-test $test/bin + '' + + lib.optionalString buildBenchmarks '' + mkdir -p $benchmark/bin + mv $out/bin/hipblas-bench $benchmark/bin + '' + + lib.optionalString buildSamples '' + mkdir -p $sample/bin + mv $out/bin/example-* $sample/bin + '' + + lib.optionalString (buildTests || buildBenchmarks || buildSamples) '' + rmdir $out/bin + ''; + passthru.updateScript = rocmUpdateScript { + name = finalAttrs.pname; + inherit (finalAttrs.src) owner; + inherit (finalAttrs.src) repo; + }; + passthru.tensilelite = tensile'; + meta = with lib; { + description = "ROCm BLAS marshalling library"; + homepage = "https://github.com/ROCm/hipBLAS"; + license = with licenses; [ mit ]; + maintainers = teams.rocm.members; + platforms = platforms.linux; + }; + } +) diff --git a/pkgs/development/rocm-modules/6/hipblaslt/ext-op-first.diff b/pkgs/development/rocm-modules/6/hipblaslt/ext-op-first.diff new file mode 100644 index 0000000000000..87438bf6a5e54 --- /dev/null +++ b/pkgs/development/rocm-modules/6/hipblaslt/ext-op-first.diff @@ -0,0 +1,22 @@ +diff --git a/library/src/amd_detail/rocblaslt/src/CMakeLists.txt b/library/src/amd_detail/rocblaslt/src/CMakeLists.txt +index 3d5ace35..8c5a3841 100644 +--- a/library/src/amd_detail/rocblaslt/src/CMakeLists.txt ++++ b/library/src/amd_detail/rocblaslt/src/CMakeLists.txt +@@ -58,6 +58,8 @@ if( BUILD_WITH_TENSILE ) + set(Tensile_Options ${Tensile_Options} LAZY_LIBRARY_LOADING) + endif() + ++ #TensileCreateExtOpLibraries("${PROJECT_BINARY_DIR}/Tensile/library" "${Tensile_ARCHITECTURE}") ++ + # Add a build target for Tensile kernel library + # Runtime language is HIP by default + # warning our Tensile_ variables may shadow variable in TensileCreateLibraryFiles +@@ -86,8 +88,6 @@ if( BUILD_WITH_TENSILE ) + ) + endif() + +- TensileCreateExtOpLibraries("${PROJECT_BINARY_DIR}/Tensile/library" "${Tensile_ARCHITECTURE}") +- + # Create a unique name for TensileHost compiled for rocBLAS + set_target_properties( TensileHost PROPERTIES OUTPUT_NAME rocblaslt-tensile CXX_EXTENSIONS NO ) + diff --git a/pkgs/development/rocm-modules/6/hipcc/0001-hipcc-Remove-extra-definition-of-hipBinUtilPtr_-in-d.patch b/pkgs/development/rocm-modules/6/hipcc/0001-hipcc-Remove-extra-definition-of-hipBinUtilPtr_-in-d.patch new file mode 100644 index 0000000000000..bfc386c620cb3 --- /dev/null +++ b/pkgs/development/rocm-modules/6/hipcc/0001-hipcc-Remove-extra-definition-of-hipBinUtilPtr_-in-d.patch @@ -0,0 +1,39 @@ +From f259eca77c592813e11752a46c4e1f9a74c64091 Mon Sep 17 00:00:00 2001 +From: Luna Nova +Date: Fri, 11 Oct 2024 02:56:22 -0700 +Subject: [PATCH] [hipcc] Remove extra definition of hipBinUtilPtr_ in derived + platforms + +Fixes UB when hipBinUtilPtr_ is used. +--- + amd/hipcc/src/hipBin_amd.h | 1 - + amd/hipcc/src/hipBin_nvidia.h | 1 - + 2 files changed, 2 deletions(-) + +diff --git a/amd/hipcc/src/hipBin_amd.h b/amd/hipcc/src/hipBin_amd.h +index 0a782d1beab9..36cd625ae8bc 100644 +--- a/src/hipBin_amd.h ++++ b/src/hipBin_amd.h +@@ -42,7 +42,6 @@ THE SOFTWARE. + + class HipBinAmd : public HipBinBase { + private: +- HipBinUtil* hipBinUtilPtr_; + string hipClangPath_ = ""; + string roccmPathEnv_, hipRocclrPathEnv_, hsaPathEnv_; + PlatformInfo platformInfoAMD_; +diff --git a/amd/hipcc/src/hipBin_nvidia.h b/amd/hipcc/src/hipBin_nvidia.h +index ff142cc1cea2..09b7b80979c7 100644 +--- a/src/hipBin_nvidia.h ++++ b/src/hipBin_nvidia.h +@@ -31,7 +31,6 @@ THE SOFTWARE. + + class HipBinNvidia : public HipBinBase { + private: +- HipBinUtil* hipBinUtilPtr_; + string cudaPath_ = ""; + PlatformInfo platformInfoNV_; + string hipCFlags_, hipCXXFlags_, hipLdFlags_; +-- +2.46.0 + diff --git a/pkgs/development/rocm-modules/6/hipcc/default.nix b/pkgs/development/rocm-modules/6/hipcc/default.nix index 5f2ac080cb50d..9eaa072b931d1 100644 --- a/pkgs/development/rocm-modules/6/hipcc/default.nix +++ b/pkgs/development/rocm-modules/6/hipcc/default.nix @@ -1,49 +1,46 @@ { lib, stdenv, - fetchFromGitHub, - rocmUpdateScript, + rocm-merged-llvm, cmake, lsb-release, }: stdenv.mkDerivation (finalAttrs: { pname = "hipcc"; - version = "6.0.2"; - - src = fetchFromGitHub { - owner = "ROCm"; - repo = "HIPCC"; - rev = "rocm-${finalAttrs.version}"; - hash = "sha256-/LRQN+RSMBPk2jS/tdp3psUL/B0RJZQhRri7e67KsG4="; - }; + # In-tree with ROCm LLVM + inherit (rocm-merged-llvm) version; + src = rocm-merged-llvm.llvm-src; + sourceRoot = "${finalAttrs.src.name}/amd/hipcc"; nativeBuildInputs = [ cmake ]; + buildInputs = [ rocm-merged-llvm ]; + + patches = [ + # https://github.com/ROCm/llvm-project/pull/183 + # Fixes always-invoked UB in hipcc + ./0001-hipcc-Remove-extra-definition-of-hipBinUtilPtr_-in-d.patch + ]; + postPatch = '' substituteInPlace src/hipBin_amd.h \ - --replace "/usr/bin/lsb_release" "${lsb-release}/bin/lsb_release" + --replace-fail "/usr/bin/lsb_release" "${lsb-release}/bin/lsb_release" ''; + cmakeFlags = [ + "-DCMAKE_BUILD_TYPE=Release" + ]; postInstall = '' rm -r $out/hip/bin ln -s $out/bin $out/hip/bin ''; - passthru.updateScript = rocmUpdateScript { - name = finalAttrs.pname; - owner = finalAttrs.src.owner; - repo = finalAttrs.src.repo; - }; - meta = with lib; { description = "Compiler driver utility that calls clang or nvcc"; homepage = "https://github.com/ROCm/HIPCC"; license = with licenses; [ mit ]; maintainers = with maintainers; [ lovesegfault ] ++ teams.rocm.members; platforms = platforms.linux; - broken = - versions.minor finalAttrs.version != versions.minor stdenv.cc.version - || versionAtLeast finalAttrs.version "7.0.0"; }; }) diff --git a/pkgs/development/rocm-modules/6/hipcub/default.nix b/pkgs/development/rocm-modules/6/hipcub/default.nix index 8f6f97ed5f20c..f191cc1d01348 100644 --- a/pkgs/development/rocm-modules/6/hipcub/default.nix +++ b/pkgs/development/rocm-modules/6/hipcub/default.nix @@ -1,36 +1,40 @@ -{ lib -, stdenv -, fetchFromGitHub -, rocmUpdateScript -, cmake -, rocm-cmake -, rocprim -, clr -, gtest -, gbenchmark -, buildTests ? false -, buildBenchmarks ? false -, gpuTargets ? [ ] +{ + lib, + stdenv, + fetchFromGitHub, + rocmUpdateScript, + cmake, + rocm-cmake, + rocprim, + clr, + gtest, + gbenchmark, + buildTests ? false, + buildBenchmarks ? false, + gpuTargets ? [ ], }: # CUB can also be used as a backend instead of rocPRIM. stdenv.mkDerivation (finalAttrs: { pname = "hipcub"; - version = "6.0.2"; + version = "6.3.1"; - outputs = [ - "out" - ] ++ lib.optionals buildTests [ - "test" - ] ++ lib.optionals buildBenchmarks [ - "benchmark" - ]; + outputs = + [ + "out" + ] + ++ lib.optionals buildTests [ + "test" + ] + ++ lib.optionals buildBenchmarks [ + "benchmark" + ]; src = fetchFromGitHub { owner = "ROCm"; repo = "hipCUB"; rev = "rocm-${finalAttrs.version}"; - hash = "sha256-8QzVgj0JSb86zEG3sj5AAt9pG3frw+xrjEOTo7xCIrc="; + hash = "sha256-uECOQWG9C64tg5YZdm9/3+fZXaZVGslu8vElK3m23GY="; }; nativeBuildInputs = [ @@ -39,44 +43,53 @@ stdenv.mkDerivation (finalAttrs: { clr ]; - buildInputs = [ - rocprim - ] ++ lib.optionals buildTests [ - gtest - ] ++ lib.optionals buildBenchmarks [ - gbenchmark - ]; + buildInputs = + [ + rocprim + ] + ++ lib.optionals buildTests [ + gtest + ] + ++ lib.optionals buildBenchmarks [ + gbenchmark + ]; - cmakeFlags = [ - "-DCMAKE_CXX_COMPILER=hipcc" - "-DHIP_ROOT_DIR=${clr}" - # Manually define CMAKE_INSTALL_ - # See: https://github.com/NixOS/nixpkgs/pull/197838 - "-DCMAKE_INSTALL_BINDIR=bin" - "-DCMAKE_INSTALL_LIBDIR=lib" - "-DCMAKE_INSTALL_INCLUDEDIR=include" - ] ++ lib.optionals (gpuTargets != [ ]) [ - "-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}" - ] ++ lib.optionals buildTests [ - "-DBUILD_TEST=ON" - ] ++ lib.optionals buildBenchmarks [ - "-DBUILD_BENCHMARK=ON" - ]; + cmakeFlags = + [ + "-DHIP_ROOT_DIR=${clr}" + # Manually define CMAKE_INSTALL_ + # See: https://github.com/NixOS/nixpkgs/pull/197838 + "-DCMAKE_INSTALL_BINDIR=bin" + "-DCMAKE_INSTALL_LIBDIR=lib" + "-DCMAKE_INSTALL_INCLUDEDIR=include" + ] + ++ lib.optionals (gpuTargets != [ ]) [ + "-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}" + ] + ++ lib.optionals buildTests [ + "-DBUILD_TEST=ON" + ] + ++ lib.optionals buildBenchmarks [ + "-DBUILD_BENCHMARK=ON" + ]; - postInstall = lib.optionalString buildTests '' - mkdir -p $test/bin - mv $out/bin/test_* $test/bin - '' + lib.optionalString buildBenchmarks '' - mkdir -p $benchmark/bin - mv $out/bin/benchmark_* $benchmark/bin - '' + lib.optionalString (buildTests || buildBenchmarks) '' - rmdir $out/bin - ''; + postInstall = + lib.optionalString buildTests '' + mkdir -p $test/bin + mv $out/bin/test_* $test/bin + '' + + lib.optionalString buildBenchmarks '' + mkdir -p $benchmark/bin + mv $out/bin/benchmark_* $benchmark/bin + '' + + lib.optionalString (buildTests || buildBenchmarks) '' + rmdir $out/bin + ''; passthru.updateScript = rocmUpdateScript { name = finalAttrs.pname; - owner = finalAttrs.src.owner; - repo = finalAttrs.src.repo; + inherit (finalAttrs.src) owner; + inherit (finalAttrs.src) repo; }; meta = with lib; { @@ -85,6 +98,5 @@ stdenv.mkDerivation (finalAttrs: { license = with licenses; [ bsd3 ]; maintainers = teams.rocm.members; platforms = platforms.linux; - broken = versions.minor finalAttrs.version != versions.minor stdenv.cc.version || versionAtLeast finalAttrs.version "7.0.0"; }; }) diff --git a/pkgs/development/rocm-modules/6/hipfft/default.nix b/pkgs/development/rocm-modules/6/hipfft/default.nix index bc9e26bd9f113..76cbf83883ccc 100644 --- a/pkgs/development/rocm-modules/6/hipfft/default.nix +++ b/pkgs/development/rocm-modules/6/hipfft/default.nix @@ -22,7 +22,7 @@ # Can also use cuFFT stdenv.mkDerivation (finalAttrs: { pname = "hipfft"; - version = "6.0.2"; + version = "6.3.1"; outputs = [ @@ -42,7 +42,7 @@ stdenv.mkDerivation (finalAttrs: { owner = "ROCm"; repo = "hipFFT"; rev = "rocm-${finalAttrs.version}"; - hash = "sha256-DjjNQryJdl7RmaMQRQPWkleweEWMIwH/xXU84GGjoC0="; + hash = "sha256-Jq/YHEtOo7a0/Ki7gxZATKmSqPU6cyLf5gx3A4MAZNw="; fetchSubmodules = true; }; @@ -111,8 +111,8 @@ stdenv.mkDerivation (finalAttrs: { passthru.updateScript = rocmUpdateScript { name = finalAttrs.pname; - owner = finalAttrs.src.owner; - repo = finalAttrs.src.repo; + inherit (finalAttrs.src) owner; + inherit (finalAttrs.src) repo; }; meta = with lib; { @@ -121,8 +121,5 @@ stdenv.mkDerivation (finalAttrs: { license = with licenses; [ mit ]; maintainers = teams.rocm.members; platforms = platforms.linux; - broken = - versions.minor finalAttrs.version != versions.minor stdenv.cc.version - || versionAtLeast finalAttrs.version "7.0.0"; }; }) diff --git a/pkgs/development/rocm-modules/6/hipfort/default.nix b/pkgs/development/rocm-modules/6/hipfort/default.nix index 73f583f1fe1d3..2e0b1769cb51c 100644 --- a/pkgs/development/rocm-modules/6/hipfort/default.nix +++ b/pkgs/development/rocm-modules/6/hipfort/default.nix @@ -1,21 +1,22 @@ -{ lib -, stdenv -, fetchFromGitHub -, rocmUpdateScript -, cmake -, rocm-cmake -, gfortran +{ + lib, + stdenv, + fetchFromGitHub, + rocmUpdateScript, + cmake, + rocm-cmake, + gfortran, }: stdenv.mkDerivation (finalAttrs: { pname = "hipfort"; - version = "6.0.2"; + version = "6.3.1"; src = fetchFromGitHub { owner = "ROCm"; repo = "hipfort"; rev = "rocm-${finalAttrs.version}"; - hash = "sha256-3PIqSDyDlY0oVSEx20EPlKGYNkc9xPZtIG3Sbw69esE="; + hash = "sha256-cokHxyb4NDMHeq7RIVz7PBuUKRIHyGdZgDgF6Za4fHM="; }; nativeBuildInputs = [ @@ -51,8 +52,8 @@ stdenv.mkDerivation (finalAttrs: { passthru.updateScript = rocmUpdateScript { name = finalAttrs.pname; - owner = finalAttrs.src.owner; - repo = finalAttrs.src.repo; + inherit (finalAttrs.src) owner; + inherit (finalAttrs.src) repo; }; meta = with lib; { @@ -61,6 +62,5 @@ stdenv.mkDerivation (finalAttrs: { license = with licenses; [ mit ]; # mitx11 maintainers = teams.rocm.members; platforms = platforms.linux; - broken = versions.minor finalAttrs.version != versions.minor stdenv.cc.version || versionAtLeast finalAttrs.version "7.0.0"; }; }) diff --git a/pkgs/development/rocm-modules/6/hipify/default.nix b/pkgs/development/rocm-modules/6/hipify/default.nix index 29109a701856b..edc05361a50e5 100644 --- a/pkgs/development/rocm-modules/6/hipify/default.nix +++ b/pkgs/development/rocm-modules/6/hipify/default.nix @@ -1,39 +1,57 @@ -{ lib -, stdenv -, fetchFromGitHub -, rocmUpdateScript -, cmake -, clang -, libxml2 +{ + lib, + stdenv, + fetchFromGitHub, + rocmUpdateScript, + cmake, + clang, + libxml2, + rocm-merged-llvm, + zlib, + zstd, + perl, }: stdenv.mkDerivation (finalAttrs: { pname = "hipify"; - version = "6.0.2"; + version = "6.3.1"; src = fetchFromGitHub { owner = "ROCm"; repo = "HIPIFY"; rev = "rocm-${finalAttrs.version}"; - hash = "sha256-nNyWrPPhUwT7FyASzc3kf5NCTzeqvHybVOc+6hBzkA4="; + hash = "sha256-o/1LNsNtAyQcSug1gf7ujGNRRbvC33kwldrJKZi2LA0="; }; - nativeBuildInputs = [ cmake ]; - buildInputs = [ libxml2 ]; + nativeBuildInputs = [ + cmake + ]; + + buildInputs = [ + libxml2 + rocm-merged-llvm + zlib + zstd + perl + ]; postPatch = '' substituteInPlace CMakeLists.txt \ --replace "\''${LLVM_TOOLS_BINARY_DIR}/clang" "${clang}/bin/clang" + chmod +x bin/* ''; passthru.updateScript = rocmUpdateScript { name = finalAttrs.pname; - owner = finalAttrs.src.owner; - repo = finalAttrs.src.repo; + inherit (finalAttrs.src) owner; + inherit (finalAttrs.src) repo; }; postInstall = '' - patchShebangs $out/bin + chmod +x $out/bin/* + chmod +x $out/libexec/* + patchShebangs $out/bin/ + patchShebangs $out/libexec/ ''; meta = with lib; { @@ -42,6 +60,5 @@ stdenv.mkDerivation (finalAttrs: { license = with licenses; [ mit ]; maintainers = teams.rocm.members; platforms = platforms.linux; - broken = versions.minor finalAttrs.version != versions.minor stdenv.cc.version || versionAtLeast finalAttrs.version "7.0.0"; }; }) diff --git a/pkgs/development/rocm-modules/6/hiprand/default.nix b/pkgs/development/rocm-modules/6/hiprand/default.nix index 796e73c4e9e22..13446523c5ef0 100644 --- a/pkgs/development/rocm-modules/6/hiprand/default.nix +++ b/pkgs/development/rocm-modules/6/hiprand/default.nix @@ -14,7 +14,7 @@ stdenv.mkDerivation (finalAttrs: { pname = "hiprand"; - version = "6.0.2"; + version = "6.3.1"; outputs = [ @@ -28,7 +28,7 @@ stdenv.mkDerivation (finalAttrs: { owner = "ROCm"; repo = "hipRAND"; rev = "rocm-${finalAttrs.version}"; - hash = "sha256-uGHzOhUX5JEknVFwhHhWFdPmwLS/TuaXYMeItS7tXIg="; + hash = "sha256-TVc+qFwRiS5tAo1OKI1Wu5hadlwPZmSVZ9SvVvH1w7Y="; }; nativeBuildInputs = [ @@ -41,8 +41,6 @@ stdenv.mkDerivation (finalAttrs: { cmakeFlags = [ - "-DCMAKE_C_COMPILER=hipcc" - "-DCMAKE_CXX_COMPILER=hipcc" "-DHIP_ROOT_DIR=${clr}" # Manually define CMAKE_INSTALL_ # See: https://github.com/NixOS/nixpkgs/pull/197838 @@ -67,8 +65,8 @@ stdenv.mkDerivation (finalAttrs: { passthru.updateScript = rocmUpdateScript { name = finalAttrs.pname; - owner = finalAttrs.src.owner; - repo = finalAttrs.src.repo; + inherit (finalAttrs.src) owner; + inherit (finalAttrs.src) repo; }; meta = with lib; { @@ -77,8 +75,5 @@ stdenv.mkDerivation (finalAttrs: { license = with licenses; [ mit ]; maintainers = teams.rocm.members; platforms = platforms.linux; - broken = - versions.minor finalAttrs.version != versions.minor stdenv.cc.version - || versionAtLeast finalAttrs.version "7.0.0"; }; }) diff --git a/pkgs/development/rocm-modules/6/hipsolver/default.nix b/pkgs/development/rocm-modules/6/hipsolver/default.nix index de2e3a049b652..4f7bc2e46af58 100644 --- a/pkgs/development/rocm-modules/6/hipsolver/default.nix +++ b/pkgs/development/rocm-modules/6/hipsolver/default.nix @@ -9,6 +9,8 @@ gfortran, rocblas, rocsolver, + rocsparse, + suitesparse, gtest, lapack-reference, buildTests ? false, @@ -19,7 +21,7 @@ # Can also use cuSOLVER stdenv.mkDerivation (finalAttrs: { pname = "hipsolver"; - version = "6.0.2"; + version = "6.3.1"; outputs = [ @@ -39,7 +41,7 @@ stdenv.mkDerivation (finalAttrs: { owner = "ROCm"; repo = "hipSOLVER"; rev = "rocm-${finalAttrs.version}"; - hash = "sha256-iMfaOv4TdTkmaRHCZOuqUfjO081J6on71+s8nIwwV00="; + hash = "sha256-ZQUKU3L4DgZ5zM7pCYEix0ulRkl78x/5wJnyCndTAwk="; }; nativeBuildInputs = [ @@ -53,6 +55,8 @@ stdenv.mkDerivation (finalAttrs: { [ rocblas rocsolver + rocsparse + suitesparse ] ++ lib.optionals buildTests [ gtest @@ -63,13 +67,13 @@ stdenv.mkDerivation (finalAttrs: { cmakeFlags = [ - "-DCMAKE_C_COMPILER=hipcc" "-DCMAKE_CXX_COMPILER=hipcc" # Manually define CMAKE_INSTALL_ # See: https://github.com/NixOS/nixpkgs/pull/197838 "-DCMAKE_INSTALL_BINDIR=bin" "-DCMAKE_INSTALL_LIBDIR=lib" "-DCMAKE_INSTALL_INCLUDEDIR=include" + "-DBUILD_WITH_SPARSE=OFF" # FIXME: broken - can't find suitesparse/cholmod, looks fixed in master ] ++ lib.optionals buildTests [ "-DBUILD_CLIENTS_TESTS=ON" @@ -101,8 +105,8 @@ stdenv.mkDerivation (finalAttrs: { passthru.updateScript = rocmUpdateScript { name = finalAttrs.pname; - owner = finalAttrs.src.owner; - repo = finalAttrs.src.repo; + inherit (finalAttrs.src) owner; + inherit (finalAttrs.src) repo; }; meta = with lib; { @@ -111,8 +115,5 @@ stdenv.mkDerivation (finalAttrs: { license = with licenses; [ mit ]; maintainers = teams.rocm.members; platforms = platforms.linux; - broken = - versions.minor finalAttrs.version != versions.minor stdenv.cc.version - || versionAtLeast finalAttrs.version "7.0.0"; }; }) diff --git a/pkgs/development/rocm-modules/6/hipsparse/default.nix b/pkgs/development/rocm-modules/6/hipsparse/default.nix index 20257d629a00b..cc2f78e67b5d6 100644 --- a/pkgs/development/rocm-modules/6/hipsparse/default.nix +++ b/pkgs/development/rocm-modules/6/hipsparse/default.nix @@ -12,6 +12,7 @@ gtest, openmp, buildTests ? false, + buildBenchmarks ? false, buildSamples ? false, gpuTargets ? [ ], }: @@ -19,7 +20,7 @@ # This can also use cuSPARSE as a backend instead of rocSPARSE stdenv.mkDerivation (finalAttrs: { pname = "hipsparse"; - version = "6.0.2"; + version = "6.3.1"; outputs = [ @@ -36,7 +37,7 @@ stdenv.mkDerivation (finalAttrs: { owner = "ROCm"; repo = "hipSPARSE"; rev = "rocm-${finalAttrs.version}"; - hash = "sha256-fi5b0IF++OiezpM3JuUkhwpmW2apeFH4r5g6CcFseNY="; + hash = "sha256-3a7fKpYyiqG3aGOg7YrTHmKoH4rgTVLD16DvrZ3YY1g="; }; nativeBuildInputs = [ @@ -51,7 +52,7 @@ stdenv.mkDerivation (finalAttrs: { rocsparse git ] - ++ lib.optionals buildTests [ + ++ lib.optionals (buildTests || buildBenchmarks) [ gtest ] ++ lib.optionals (buildTests || buildSamples) [ @@ -60,20 +61,21 @@ stdenv.mkDerivation (finalAttrs: { cmakeFlags = [ - "-DCMAKE_C_COMPILER=hipcc" - "-DCMAKE_CXX_COMPILER=hipcc" - "-DBUILD_CLIENTS_SAMPLES=${if buildSamples then "ON" else "OFF"}" + # FIXME: #define __noinline__ gets hit in https://github.com/ROCm/clr/blame/3f3f3d0f1c01b6ac592dc2bf5c69cf60e18095cf/hipamd/include/hip/amd_detail/host_defines.h#L175 + # if we don't use hipcc + # "-DCMAKE_C_COMPILER=hipcc" + # "-DCMAKE_CXX_COMPILER=hipcc" # Manually define CMAKE_INSTALL_ # See: https://github.com/NixOS/nixpkgs/pull/197838 "-DCMAKE_INSTALL_BINDIR=bin" "-DCMAKE_INSTALL_LIBDIR=lib" "-DCMAKE_INSTALL_INCLUDEDIR=include" + (lib.cmakeBool "BUILD_CLIENTS_TESTS" buildTests) + (lib.cmakeBool "BUILD_CLIENTS_BENCHMARKS" buildBenchmarks) + (lib.cmakeBool "BUILD_CLIENTS_SAMPLES" buildSamples) ] ++ lib.optionals (gpuTargets != [ ]) [ "-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}" - ] - ++ lib.optionals buildTests [ - "-DBUILD_CLIENTS_TESTS=ON" ]; # We have to manually generate the matrices @@ -140,8 +142,8 @@ stdenv.mkDerivation (finalAttrs: { passthru.updateScript = rocmUpdateScript { name = finalAttrs.pname; - owner = finalAttrs.src.owner; - repo = finalAttrs.src.repo; + inherit (finalAttrs.src) owner; + inherit (finalAttrs.src) repo; }; meta = with lib; { @@ -150,8 +152,5 @@ stdenv.mkDerivation (finalAttrs: { license = with licenses; [ mit ]; maintainers = teams.rocm.members; platforms = platforms.linux; - broken = - versions.minor finalAttrs.version != versions.minor stdenv.cc.version - || versionAtLeast finalAttrs.version "7.0.0"; }; }) diff --git a/pkgs/development/rocm-modules/6/hsa-amd-aqlprofile-bin/default.nix b/pkgs/development/rocm-modules/6/hsa-amd-aqlprofile-bin/default.nix index fb2f2c84379b5..1dcd91b3fc406 100644 --- a/pkgs/development/rocm-modules/6/hsa-amd-aqlprofile-bin/default.nix +++ b/pkgs/development/rocm-modules/6/hsa-amd-aqlprofile-bin/default.nix @@ -4,22 +4,24 @@ fetchurl, callPackage, dpkg, + rocm-core, }: stdenv.mkDerivation (finalAttrs: { pname = "hsa-amd-aqlprofile-bin"; - version = "6.0.2"; + version = "6.3.0"; src = let - version = finalAttrs.version; - dotless = builtins.replaceStrings [ "." ] [ "0" ] version; - incremental = "115"; + inherit (finalAttrs) version; + patch = rocm-core.ROCM_LIBPATCH_VERSION; + majorMinor = lib.versions.major version + "." + lib.versions.minor version; + incremental = "39"; osRelease = "22.04"; in fetchurl { - url = "https://repo.radeon.com/rocm/apt/${version}/pool/main/h/hsa-amd-aqlprofile/hsa-amd-aqlprofile_1.0.0.${dotless}.${dotless}-${incremental}~${osRelease}_amd64.deb"; - hash = "sha256-0XeKUKaof5pSMS/UgLwumBDBYgyH/pCex9jViUKENXY="; + url = "https://repo.radeon.com/rocm/apt/${majorMinor}/pool/main/h/hsa-amd-aqlprofile/hsa-amd-aqlprofile_1.0.0.${patch}-${incremental}~${osRelease}_amd64.deb"; + hash = "sha256-ghgz5ZgWopgLJcK4Vbwm6zlny3IwxzWz9V0Fuwu35R0="; }; nativeBuildInputs = [ dpkg ]; @@ -31,7 +33,7 @@ stdenv.mkDerivation (finalAttrs: { runHook preInstall mkdir -p $out - cp -a opt/rocm-${finalAttrs.version}/* $out + cp -a opt/rocm-${finalAttrs.version}*/* $out chmod +x $out/lib/libhsa-amd-aqlprofile64.so.1.* chmod +x $out/lib/hsa-amd-aqlprofile/librocprofv2_att.so @@ -46,8 +48,5 @@ stdenv.mkDerivation (finalAttrs: { license = with licenses; [ unfree ]; maintainers = teams.rocm.members; platforms = platforms.linux; - broken = - versions.minor finalAttrs.version != versions.minor stdenv.cc.version - || versionAtLeast finalAttrs.version "7.0.0"; }; }) diff --git a/pkgs/development/rocm-modules/6/llvm/base.nix b/pkgs/development/rocm-modules/6/llvm/base.nix deleted file mode 100644 index 8b85ae53bcaa1..0000000000000 --- a/pkgs/development/rocm-modules/6/llvm/base.nix +++ /dev/null @@ -1,216 +0,0 @@ -{ - lib, - stdenv, - gcc12Stdenv, - fetchFromGitHub, - rocmUpdateScript, - pkg-config, - cmake, - ninja, - git, - doxygen, - sphinx, - lit, - libxml2, - libxcrypt, - libedit, - libffi, - mpfr, - zlib, - ncurses, - python3Packages, - buildDocs ? true, - buildMan ? true, - buildTests ? true, - targetName ? "llvm", - targetDir ? "llvm", - targetProjects ? [ ], - targetRuntimes ? [ ], - llvmTargetsToBuild ? [ "NATIVE" ], # "NATIVE" resolves into x86 or aarch64 depending on stdenv - extraPatches ? [ ], - extraNativeBuildInputs ? [ ], - extraBuildInputs ? [ ], - extraCMakeFlags ? [ ], - extraPostPatch ? "", - checkTargets ? [ - (lib.optionalString buildTests (if targetDir == "runtimes" then "check-runtimes" else "check-all")) - ], - extraPostInstall ? "", - hardeningDisable ? [ ], - requiredSystemFeatures ? [ ], - extraLicenses ? [ ], - isBroken ? false, -}: - -let - stdenv' = stdenv; -in -let - stdenv = - if stdenv'.cc.cc.isGNU or false && lib.versionAtLeast stdenv'.cc.cc.version "13.0" then - gcc12Stdenv - else - stdenv'; -in - -let - llvmNativeTarget = - if stdenv.hostPlatform.isx86_64 then - "X86" - else if stdenv.hostPlatform.isAarch64 then - "AArch64" - else - throw "Unsupported ROCm LLVM platform"; - inferNativeTarget = t: if t == "NATIVE" then llvmNativeTarget else t; - llvmTargetsToBuild' = [ "AMDGPU" ] ++ builtins.map inferNativeTarget llvmTargetsToBuild; -in -stdenv.mkDerivation (finalAttrs: { - pname = "rocm-llvm-${targetName}"; - version = "6.0.2"; - - outputs = - [ - "out" - ] - ++ lib.optionals buildDocs [ - "doc" - ] - ++ lib.optionals buildMan [ - "man" - "info" # Avoid `attribute 'info' missing` when using with wrapCC - ]; - - patches = extraPatches; - - src = fetchFromGitHub { - owner = "ROCm"; - repo = "llvm-project"; - rev = "rocm-${finalAttrs.version}"; - hash = "sha256-uGxalrwMNCOSqSFVrYUBi3ijkMEFFTrzFImmvZKQf6I="; - }; - - nativeBuildInputs = - [ - pkg-config - cmake - ninja - git - (python3Packages.python.withPackages (p: [ p.setuptools ])) - ] - ++ lib.optionals (buildDocs || buildMan) [ - doxygen - sphinx - python3Packages.recommonmark - ] - ++ lib.optionals (buildTests && !finalAttrs.passthru.isLLVM) [ - lit - ] - ++ extraNativeBuildInputs; - - buildInputs = [ - libxml2 - libxcrypt - libedit - libffi - mpfr - ] ++ extraBuildInputs; - - propagatedBuildInputs = lib.optionals finalAttrs.passthru.isLLVM [ - zlib - ncurses - ]; - - sourceRoot = "${finalAttrs.src.name}/${targetDir}"; - - cmakeFlags = - [ - "-DLLVM_TARGETS_TO_BUILD=${builtins.concatStringsSep ";" llvmTargetsToBuild'}" - ] - ++ lib.optionals (finalAttrs.passthru.isLLVM && targetProjects != [ ]) [ - "-DLLVM_ENABLE_PROJECTS=${lib.concatStringsSep ";" targetProjects}" - ] - ++ - lib.optionals ((finalAttrs.passthru.isLLVM || targetDir == "runtimes") && targetRuntimes != [ ]) - [ - "-DLLVM_ENABLE_RUNTIMES=${lib.concatStringsSep ";" targetRuntimes}" - ] - ++ lib.optionals finalAttrs.passthru.isLLVM [ - "-DLLVM_INSTALL_UTILS=ON" - "-DLLVM_INSTALL_GTEST=ON" - ] - ++ lib.optionals (buildDocs || buildMan) [ - "-DLLVM_INCLUDE_DOCS=ON" - "-DLLVM_BUILD_DOCS=ON" - # "-DLLVM_ENABLE_DOXYGEN=ON" Way too slow, only uses one core - "-DLLVM_ENABLE_SPHINX=ON" - "-DSPHINX_OUTPUT_HTML=ON" - "-DSPHINX_OUTPUT_MAN=ON" - "-DSPHINX_WARNINGS_AS_ERRORS=OFF" - ] - ++ lib.optionals buildTests [ - "-DLLVM_INCLUDE_TESTS=ON" - "-DLLVM_BUILD_TESTS=ON" - "-DLLVM_EXTERNAL_LIT=${lit}/bin/.lit-wrapped" - ] - ++ extraCMakeFlags; - - prePatch = '' - cd ../ - chmod -R u+w . - ''; - - postPatch = - '' - cd ${targetDir} - '' - + lib.optionalString finalAttrs.passthru.isLLVM '' - patchShebangs lib/OffloadArch/make_generated_offload_arch_h.sh - '' - + lib.optionalString (buildTests && finalAttrs.passthru.isLLVM) '' - # FileSystem permissions tests fail with various special bits - rm test/tools/llvm-objcopy/ELF/mirror-permissions-unix.test - rm unittests/Support/Path.cpp - - substituteInPlace unittests/Support/CMakeLists.txt \ - --replace-fail "Path.cpp" "" - '' - + extraPostPatch; - - doCheck = buildTests; - checkTarget = lib.concatStringsSep " " checkTargets; - - postInstall = - lib.optionalString buildMan '' - mkdir -p $info - '' - + extraPostInstall; - - passthru = { - isLLVM = targetDir == "llvm"; - isClang = targetDir == "clang" || builtins.elem "clang" targetProjects; - isROCm = true; - - updateScript = rocmUpdateScript { - name = finalAttrs.pname; - owner = finalAttrs.src.owner; - repo = finalAttrs.src.repo; - }; - }; - - inherit hardeningDisable requiredSystemFeatures; - - meta = with lib; { - description = "ROCm fork of the LLVM compiler infrastructure"; - homepage = "https://github.com/ROCm/llvm-project"; - license = with licenses; [ ncsa ] ++ extraLicenses; - maintainers = - with maintainers; - [ - acowley - lovesegfault - ] - ++ teams.rocm.members; - platforms = platforms.linux; - broken = isBroken || versionAtLeast finalAttrs.version "7.0.0"; - }; -}) diff --git a/pkgs/development/rocm-modules/6/llvm/clang-at-least-16-LLVMgold-path.patch b/pkgs/development/rocm-modules/6/llvm/clang-at-least-16-LLVMgold-path.patch new file mode 100644 index 0000000000000..74cdbacc8a717 --- /dev/null +++ b/pkgs/development/rocm-modules/6/llvm/clang-at-least-16-LLVMgold-path.patch @@ -0,0 +1,14 @@ +diff --git a/lib/Driver/ToolChains/CommonArgs.cpp b/lib/Driver/ToolChains/CommonArgs.cpp +index 34640b3c450d..93c4a4f4ec5c 100644 +--- a/lib/Driver/ToolChains/CommonArgs.cpp ++++ b/lib/Driver/ToolChains/CommonArgs.cpp +@@ -589,8 +589,7 @@ void tools::addLTOOptions(const ToolChain &ToolChain, const ArgList &Args, + #endif + + SmallString<1024> Plugin; +- llvm::sys::path::native(Twine(D.Dir) + +- "/../" CLANG_INSTALL_LIBDIR_BASENAME + ++ llvm::sys::path::native(Twine("@libllvmLibdir@") + + PluginName + Suffix, + Plugin); + CmdArgs.push_back(Args.MakeArgString(Twine(PluginPrefix) + Plugin)); diff --git a/pkgs/development/rocm-modules/6/llvm/clang-bodge-ignore-systemwide-incls.diff b/pkgs/development/rocm-modules/6/llvm/clang-bodge-ignore-systemwide-incls.diff new file mode 100644 index 0000000000000..5f44dbe15655c --- /dev/null +++ b/pkgs/development/rocm-modules/6/llvm/clang-bodge-ignore-systemwide-incls.diff @@ -0,0 +1,23 @@ +diff --git a/lib/Driver/ToolChains/Linux.cpp b/lib/Driver/ToolChains/Linux.cpp +index 57368104c914..71c57f72078e 100644 +--- a/lib/Driver/ToolChains/Linux.cpp ++++ b/lib/Driver/ToolChains/Linux.cpp +@@ -640,6 +640,7 @@ void Linux::AddClangSystemIncludeArgs(const ArgList &DriverArgs, + return; + + // LOCAL_INCLUDE_DIR ++ if (!SysRoot.empty()) + addSystemInclude(DriverArgs, CC1Args, concat(SysRoot, "/usr/local/include")); + // TOOL_INCLUDE_DIR + AddMultilibIncludeArgs(DriverArgs, CC1Args); +@@ -672,8 +673,10 @@ void Linux::AddClangSystemIncludeArgs(const ArgList &DriverArgs, + // Add an include of '/include' directly. This isn't provided by default by + // system GCCs, but is often used with cross-compiling GCCs, and harmless to + // add even when Clang is acting as-if it were a system compiler. ++ if (!SysRoot.empty()) + addExternCSystemInclude(DriverArgs, CC1Args, concat(SysRoot, "/include")); + ++ if (!SysRoot.empty()) + addExternCSystemInclude(DriverArgs, CC1Args, concat(SysRoot, "/usr/include")); + + if (!DriverArgs.hasArg(options::OPT_nobuiltininc) && getTriple().isMusl()) diff --git a/pkgs/development/rocm-modules/6/llvm/clang-log-jobs.diff b/pkgs/development/rocm-modules/6/llvm/clang-log-jobs.diff new file mode 100644 index 0000000000000..3a3a712c8bbaf --- /dev/null +++ b/pkgs/development/rocm-modules/6/llvm/clang-log-jobs.diff @@ -0,0 +1,40 @@ +diff --git a/clang/lib/Driver/Compilation.cpp b/clang/lib/Driver/Compilation.cpp +index 06f5e7e7e335..8407d664886a 100644 +--- a/lib/Driver/Compilation.cpp ++++ b/lib/Driver/Compilation.cpp +@@ -340,6 +340,9 @@ private: + void Compilation::ExecuteJobs(const JobList &Jobs, + FailingCommandList &FailingCommands, + bool LogOnly) const { ++ // If >1 job, log as each job finishes so can see progress while building many offloads ++ const bool logJobs = Jobs.size() > 1; ++ auto start_time = std::chrono::steady_clock::now(); + // According to UNIX standard, driver need to continue compiling all the + // inputs on the command line even one of them failed. + // In all but CLMode, execute all the jobs unless the necessary inputs for the +@@ -364,11 +367,25 @@ void Compilation::ExecuteJobs(const JobList &Jobs, + + JS.setJobState(Next, JobScheduler::JS_RUN); + auto Work = [&, Next]() { ++ auto job_start_time = std::chrono::steady_clock::now(); + const Command *FailingCommand = nullptr; + if (int Res = ExecuteCommand(*Next, FailingCommand, LogOnly)) { + FailingCommands.push_back(std::make_pair(Res, FailingCommand)); + JS.setJobState(Next, JobScheduler::JS_FAIL); + } else { ++ if (logJobs && Next) { ++ auto now = std::chrono::steady_clock::now(); ++ auto job_duration = std::chrono::duration_cast(now - job_start_time).count(); ++ auto duration = std::chrono::duration_cast(now - start_time).count(); ++ if (duration > 10 && job_duration > 0) { ++ if (Next->getOutputFilenames().empty()) ++ if (Next->getExecutable()) llvm::errs() << "Job completed: " << Next->getExecutable() << "\n"; ++ else (llvm::errs() << "Job completed: "), Next->Print(llvm::errs(), "\n", true); ++ else ++ llvm::errs() << "Job completed: " << Next->getOutputFilenames().front().c_str() << "\n"; ++ } ++ } ++ + JS.setJobState(Next, JobScheduler::JS_DONE); + } + }; diff --git a/pkgs/development/rocm-modules/6/llvm/default.nix b/pkgs/development/rocm-modules/6/llvm/default.nix index f68d29e37a19c..d162118d0a973 100644 --- a/pkgs/development/rocm-modules/6/llvm/default.nix +++ b/pkgs/development/rocm-modules/6/llvm/default.nix @@ -1,142 +1,500 @@ { - # stdenv FIXME: Try changing back to this with a new ROCm release https://github.com/NixOS/nixpkgs/issues/271943 - gcc12Stdenv, - callPackage, - rocmUpdateScript, - wrapBintoolsWith, + lib, + stdenv, + llvmPackages_18, overrideCC, rocm-device-libs, rocm-runtime, - rocm-thunk, - clr, + fetchFromGitHub, + runCommand, + symlinkJoin, + rdfind, + wrapBintoolsWith, + emptyDirectory, + zstd, + zlib, + gcc-unwrapped, + glibc, + substituteAll, + libffi, + libxml2, + removeReferencesTo, + # Build compilers and stdenv suitable for profiling + # compressed line tables (-g1 -gz) and + # frame pointers for sampling profilers (-fno-omit-frame-pointer -momit-leaf-frame-pointer) + # TODO: Should also apply to downstream packages which use rocmClangStdenv + profilableStdenv ? false, }: let - ## Stage 1 ## - # Projects - llvm = callPackage ./stage-1/llvm.nix { - inherit rocmUpdateScript; - stdenv = gcc12Stdenv; - }; - clang-unwrapped = callPackage ./stage-1/clang-unwrapped.nix { - inherit rocmUpdateScript llvm; - stdenv = gcc12Stdenv; - }; - lld = callPackage ./stage-1/lld.nix { - inherit rocmUpdateScript llvm; - stdenv = gcc12Stdenv; + llvmPackagesNoBintools = llvmPackages_18.override { + bootBintools = null; + bootBintoolsNoLibc = null; }; + useLibcxx = false; # whether rocm stdenv uses libcxx (clang c++ stdlib) instead of gcc stdlibc++ - # Runtimes - runtimes = callPackage ./stage-1/runtimes.nix { - inherit rocmUpdateScript llvm; - stdenv = gcc12Stdenv; - }; + llvmStdenv = overrideCC llvmPackagesNoBintools.libcxxStdenv llvmPackagesNoBintools.clangUseLLVM; + llvmLibstdcxxStdenv = overrideCC llvmPackagesNoBintools.stdenv ( + llvmPackagesNoBintools.libstdcxxClang.override { + inherit (llvmPackages_18) bintools; + } + ); + stdenvToBuildRocmLlvm = if useLibcxx then llvmStdenv else llvmLibstdcxxStdenv; + gcc-include = runCommand "gcc-include" { } '' + mkdir -p $out + ln -s ${gcc-unwrapped}/include/ $out/ + ln -s ${gcc-unwrapped}/lib/ $out/ + ''; + + # A prefix for use as the GCC prefix when building rocmcxx + disallowedRefsForToolchain = [ + stdenv.cc + stdenv.cc.cc + stdenv.cc.bintools + gcc-unwrapped + stdenvToBuildRocmLlvm + ]; + gcc-prefix = + let + gccPrefixPaths = [ + gcc-unwrapped + gcc-unwrapped.lib + glibc.dev + ]; + in + symlinkJoin { + name = "gcc-prefix"; + paths = gccPrefixPaths ++ [ + glibc + ]; + disallowedRequisites = gccPrefixPaths; + postBuild = '' + rm -rf $out/{bin,libexec,nix-support,lib64,share,etc} + rm $out/lib/gcc/x86_64-unknown-linux-gnu/*/plugin/include/auto-host.h + + mkdir /build/tmpout + mv $out/* /build/tmpout + cp -Lr --no-preserve=mode /build/tmpout/* $out/ + set -x + ls $out/include/c++ + versionedIncludePath="$(echo $out/include/c++/*/)" + mv $versionedIncludePath/* $out/include/c++/ + rm -rf $versionedIncludePath/ - ## Stage 2 ## - # Helpers - bintools-unwrapped = callPackage ./stage-2/bintools-unwrapped.nix { inherit llvm lld; }; - bintools = wrapBintoolsWith { bintools = bintools-unwrapped; }; - rStdenv = callPackage ./stage-2/rstdenv.nix { - inherit - llvm - clang-unwrapped - lld - runtimes - bintools - ; - stdenv = gcc12Stdenv; + find $out/lib -type f -exec ${removeReferencesTo}/bin/remove-references-to -t ${gcc-unwrapped.lib} {} + + + ln -s $out $out/x86_64-unknown-linux-gnu + ''; + }; + version = "6.3.1"; + # major version of this should be the clang version ROCm forked from + rocmLlvmVersion = "18.0.0-${llvmSrc.rev}"; + usefulOutputs = + drv: + builtins.filter (x: x != null) [ + drv + (drv.lib or null) + (drv.dev or null) + ]; + listUsefulOutputs = builtins.concatMap usefulOutputs; + llvmSrc = fetchFromGitHub { + # owner = "ROCm"; + # repo = "llvm-project"; + # rev = "rocm-${version}"; + # hash = "sha256-ii4ErYxfwmis0PSovpG37ybaXmKX4neUjHXliaI2v6k="; + + # Performance improvements cherry-picked on top of rocm-6.3.x + # most importantly, amdgpu-early-alwaysinline memory usage fix + owner = "LunNova"; + repo = "llvm-project-rocm"; + rev = "4182046534deb851753f0d962146e5176f648893"; + hash = "sha256-sPmYi1WiiAqnRnHVNba2nPUxGflBC01FWCTNLPlYF9c="; }; + llvmSrcFixed = llvmSrc; + llvmMajorVersion = lib.versions.major rocmLlvmVersion; + # An llvmPackages (pkgs/development/compilers/llvm/) built from ROCm LLVM's source tree + # optionally using LLVM libcxx + llvmPackagesRocm = llvmPackages_18.override (_old: { + stdenv = stdenvToBuildRocmLlvm; # old.stdenv #llvmPackagesNoBintools.libcxxStdenv; + + # not setting gitRelease = because that causes patch selection logic to use git patches + # ROCm LLVM is closer to 18 official + # gitRelease = {}; officialRelease = null; + officialRelease = { }; # Set but empty because we're overriding everything from it. + version = rocmLlvmVersion; + src = llvmSrcFixed; + monorepoSrc = llvmSrcFixed; + doCheck = false; + }); + sysrootCompiler = + cc: name: paths: + let + linked = symlinkJoin { inherit name paths; }; + in + runCommand name { } '' + set -x + mkdir -p $out/ + cp --reflink=auto -rL ${linked}/* $out/ + chmod -R +rw $out + mkdir -p $out/usr + ln -s $out/ $out/usr/local + mkdir -p $out/nix-support/ + rm -rf $out/lib64 # we don't need mixed 32 bit + echo 'export CC=clang' >> $out/nix-support/setup-hook + echo 'export CXX=clang++' >> $out/nix-support/setup-hook + mkdir -p $out/lib/clang/${llvmMajorVersion}/lib/linux/ + ln -s $out/lib/linux/libclang_rt.* $out/lib/clang/${llvmMajorVersion}/lib/linux/ + + find $out -type f -exec sed -i "s|${cc.out}|$out|g" {} + + find $out -type f -exec sed -i "s|${cc.dev}|$out|g" {} + + + # our /include now has more than clang expects, so this specific dir still needs to point to cc.dev + # FIXME: could copy into a different subdir? + sed -i 's|set(CLANG_INCLUDE_DIRS.*$|set(CLANG_INCLUDE_DIRS "${cc.dev}/include")|g' $out/lib/cmake/clang/ClangConfig.cmake + ${lib.getExe rdfind} -makesymlinks true $out/ # create links *within* the sysroot to save space + ''; + findClangNostdlibincPatch = + x: + ( + (lib.strings.hasSuffix "add-nostdlibinc-flag.patch" (builtins.baseNameOf x)) + || (lib.strings.hasSuffix "clang-at-least-16-LLVMgold-path.patch" (builtins.baseNameOf x)) + ); + llvmTargetsFlag = "-DLLVM_TARGETS_TO_BUILD=AMDGPU;${ + { + "x86_64" = "X86"; + "aarch64" = "AArch64"; + } + .${llvmStdenv.targetPlatform.parsed.cpu.name} + }"; + # -ffat-lto-objects = emit LTO object files that are compatible with non-LTO-supporting builds too + # FatLTO objects are a special type of fat object file that contain LTO compatible IR in addition to generated object code, + # instead of containing object code for multiple target architectures. This allows users to defer the choice of whether to + # use LTO or not to link-time, and has been a feature available in other compilers, like GCC, for some time. + + addGccLtoCmakeFlags = !llvmPackagesRocm.stdenv.cc.isClang; + llvmExtraCflags = + "-O3 -DNDEBUG -march=skylake -mtune=znver3" + + (lib.optionalString addGccLtoCmakeFlags " -D_GLIBCXX_USE_CXX11_ABI=0 -flto -ffat-lto-objects -flto-compression-level=19 -Wl,-flto") + + (lib.optionalString llvmPackagesRocm.stdenv.cc.isClang " -flto=thin -ffat-lto-objects") + + (lib.optionalString profilableStdenv " -fno-omit-frame-pointer -momit-leaf-frame-pointer -gz -g1"); in rec { - inherit - llvm - clang-unwrapped - lld - bintools - ; - - # Runtimes - libc = callPackage ./stage-2/libc.nix { - inherit rocmUpdateScript; - stdenv = rStdenv; - }; - libunwind = callPackage ./stage-2/libunwind.nix { - inherit rocmUpdateScript; - stdenv = rStdenv; + inherit (llvmPackagesRocm) libunwind; + inherit (llvmPackagesRocm) libcxx; + llvm = (llvmPackagesRocm.llvm.override { ninja = emptyDirectory; }).overrideAttrs (old: { + dontStrip = profilableStdenv; + nativeBuildInputs = old.nativeBuildInputs ++ [ removeReferencesTo ]; + buildInputs = old.buildInputs ++ [ + zstd + zlib + ]; + env.NIX_BUILD_ID_STYLE = "fast"; + postPatch = '' + ${old.postPatch or ""} + patchShebangs lib/OffloadArch/make_generated_offload_arch_h.sh + ''; + LDFLAGS = "-Wl,--build-id=sha1,--icf=all,--compress-debug-sections=zlib"; + cmakeFlags = + old.cmakeFlags + ++ [ + llvmTargetsFlag + "-DCMAKE_BUILD_TYPE=Release" + "-DLLVM_ENABLE_ZSTD=FORCE_ON" + "-DLLVM_ENABLE_ZLIB=FORCE_ON" + "-DLLVM_ENABLE_THREADS=ON" + "-DLLVM_ENABLE_LTO=Thin" + "-DLLVM_USE_LINKER=lld" + (lib.cmakeBool "LLVM_ENABLE_LIBCXX" useLibcxx) + "-DCLANG_DEFAULT_CXX_STDLIB=${if useLibcxx then "libc++" else "libstdc++"}" + ] + ++ lib.optionals addGccLtoCmakeFlags [ + "-DCMAKE_AR=${gcc-unwrapped}/bin/gcc-ar" + "-DCMAKE_RANLIB=${gcc-unwrapped}/bin/gcc-ranlib" + "-DCMAKE_NM=${gcc-unwrapped}/bin/gcc-nm" + ] + ++ lib.optionals useLibcxx [ + "-DLLVM_ENABLE_LTO=Thin" + "-DLLVM_USE_LINKER=lld" + "-DLLVM_ENABLE_LIBCXX=ON" + ]; + preConfigure = '' + ${old.preConfigure or ""} + cmakeFlagsArray+=( + '-DCMAKE_C_FLAGS_RELEASE=${llvmExtraCflags}' + '-DCMAKE_CXX_FLAGS_RELEASE=${llvmExtraCflags}' + ) + ''; + # Ensure we don't leak refs to compiler that was used to bootstrap this LLVM + disallowedReferences = (old.disallowedReferences or [ ]) ++ disallowedRefsForToolchain; + postFixup = '' + ${old.postFixup or ""} + remove-references-to -t "${stdenv.cc}" "$lib/lib/libLLVMSupport.a" + find $lib -type f -exec remove-references-to -t ${stdenv.cc.cc} {} + + find $lib -type f -exec remove-references-to -t ${stdenvToBuildRocmLlvm.cc} {} + + find $lib -type f -exec remove-references-to -t ${stdenv.cc.bintools} {} + + ''; + }); + lld = + (llvmPackagesRocm.lld.override { + libllvm = llvm; + ninja = emptyDirectory; + }).overrideAttrs + (old: { + patches = builtins.filter ( + x: !(lib.strings.hasSuffix "more-openbsd-program-headers.patch" (builtins.baseNameOf x)) + ) old.patches; + dontStrip = profilableStdenv; + nativeBuildInputs = old.nativeBuildInputs ++ [ + llvmPackagesNoBintools.lld + removeReferencesTo + ]; + buildInputs = old.buildInputs ++ [ + zstd + zlib + ]; + env.NIX_BUILD_ID_STYLE = "fast"; + LDFLAGS = "-Wl,--build-id=sha1,--icf=all,--compress-debug-sections=zlib"; + cmakeFlags = + old.cmakeFlags + ++ [ + llvmTargetsFlag + "-DCMAKE_BUILD_TYPE=Release" + "-DLLVM_ENABLE_ZSTD=FORCE_ON" + "-DLLVM_ENABLE_ZLIB=FORCE_ON" + "-DLLVM_ENABLE_THREADS=ON" + "-DLLVM_ENABLE_LTO=Thin" + "-DLLVM_USE_LINKER=lld" + (lib.cmakeBool "LLVM_ENABLE_LIBCXX" useLibcxx) + "-DCLANG_DEFAULT_CXX_STDLIB=${if useLibcxx then "libc++" else "libstdc++"}" + ] + ++ lib.optionals addGccLtoCmakeFlags [ + "-DCMAKE_AR=${gcc-unwrapped}/bin/gcc-ar" + "-DCMAKE_RANLIB=${gcc-unwrapped}/bin/gcc-ranlib" + "-DCMAKE_NM=${gcc-unwrapped}/bin/gcc-nm" + ] + ++ lib.optionals useLibcxx [ + "-DLLVM_ENABLE_LIBCXX=ON" + ]; + # Ensure we don't leak refs to compiler that was used to bootstrap this LLVM + disallowedReferences = (old.disallowedReferences or [ ]) ++ disallowedRefsForToolchain; + postFixup = '' + ${old.postFixup or ""} + find $lib -type f -exec remove-references-to -t ${stdenv.cc.cc} {} + + find $lib -type f -exec remove-references-to -t ${stdenv.cc.bintools} {} + + ''; + preConfigure = '' + ${old.preConfigure or ""} + cmakeFlagsArray+=( + '-DCMAKE_C_FLAGS_RELEASE=${llvmExtraCflags}' + '-DCMAKE_CXX_FLAGS_RELEASE=${llvmExtraCflags}' + ) + ''; + }); + clang-unwrapped = + ( + (llvmPackagesRocm.clang-unwrapped.override { + libllvm = llvm; + ninja = emptyDirectory; + }).overrideAttrs + ( + old: + let + filteredPatches = builtins.filter (x: !(findClangNostdlibincPatch x)) old.patches; + in + { + meta.platforms = [ + "x86_64-linux" + ]; + pname = "${old.pname}-rocm"; + patches = filteredPatches ++ [ + ./clang-bodge-ignore-systemwide-incls.diff + ./clang-log-jobs.diff # FIXME: rebase for 20+? + # FIXME: if llvm was overrideable properly this wouldn't be needed + (substituteAll { + src = ./clang-at-least-16-LLVMgold-path.patch; + libllvmLibdir = "${llvm.lib}/lib"; + }) + ]; + nativeBuildInputs = old.nativeBuildInputs ++ [ + llvmPackagesNoBintools.lld + removeReferencesTo + ]; + buildInputs = old.buildInputs ++ [ + zstd + zlib + ]; + dontStrip = profilableStdenv; + LDFLAGS = "-Wl,--build-id=sha1,--icf=all,--compress-debug-sections=zlib"; + env = (old.env or { }) // { + NIX_BUILD_ID_STYLE = "fast"; + }; + # Ensure we don't leak refs to compiler that was used to bootstrap this LLVM + disallowedReferences = (old.disallowedReferences or [ ]) ++ disallowedRefsForToolchain; + requiredSystemFeatures = (old.requiredSystemFeatures or [ ]) ++ [ "big-parallel" ]; + # https://github.com/llvm/llvm-project/blob/6976deebafa8e7de993ce159aa6b82c0e7089313/clang/cmake/caches/DistributionExample-stage2.cmake#L9-L11 + cmakeFlags = + old.cmakeFlags + ++ [ + llvmTargetsFlag + "-DCMAKE_BUILD_TYPE=Release" + "-DLLVM_ENABLE_ZSTD=FORCE_ON" + "-DLLVM_ENABLE_ZLIB=FORCE_ON" + "-DLLVM_ENABLE_THREADS=ON" + "-DLLVM_ENABLE_LTO=Thin" + "-DLLVM_USE_LINKER=lld" + (lib.cmakeBool "LLVM_ENABLE_LIBCXX" useLibcxx) + "-DCLANG_DEFAULT_CXX_STDLIB=${if useLibcxx then "libc++" else "libstdc++"}" + ] + ++ lib.optionals addGccLtoCmakeFlags [ + "-DCMAKE_AR=${gcc-unwrapped}/bin/gcc-ar" + "-DCMAKE_RANLIB=${gcc-unwrapped}/bin/gcc-ranlib" + "-DCMAKE_NM=${gcc-unwrapped}/bin/gcc-nm" + ] + ++ lib.optionals useLibcxx [ + "-DLLVM_ENABLE_LTO=Thin" + "-DLLVM_ENABLE_LIBCXX=ON" + "-DLLVM_USE_LINKER=lld" + "-DCLANG_DEFAULT_RTLIB=compiler-rt" + ] + ++ lib.optionals (!useLibcxx) [ + # FIXME: Config file in rocmcxx instead of GCC_INSTALL_PREFIX? + "-DGCC_INSTALL_PREFIX=${gcc-prefix}" + ]; + postFixup = + (old.postFixup or "") + + '' + find $lib -type f -exec remove-references-to -t ${stdenv.cc.cc} {} + + find $lib -type f -exec remove-references-to -t ${stdenv.cc.bintools} {} + + ''; + preConfigure = + (old.preConfigure or "") + + '' + cmakeFlagsArray+=( + '-DCMAKE_C_FLAGS_RELEASE=${llvmExtraCflags}' + '-DCMAKE_CXX_FLAGS_RELEASE=${llvmExtraCflags}' + ) + ''; + } + ) + ) + // { + libllvm = llvm; + }; + # A clang that understands standard include searching in a GNU sysroot and will put GPU libs in include path + # in the right order + # and expects its libc to be in the sysroot + rocmcxx = + (sysrootCompiler clang-unwrapped "rocmcxx" ( + listUsefulOutputs ( + [ + clang-unwrapped + bintools + compiler-rt + ] + ++ (lib.optionals useLibcxx [ + libcxx + ]) + ++ (lib.optionals (!useLibcxx) [ + gcc-include + glibc + glibc.dev + ]) + ) + )) + // { + version = llvmMajorVersion; + cc = rocmcxx; + libllvm = llvm; + isClang = true; + isGNU = false; + }; + clang-tools = llvmPackagesRocm.clang-tools.override { + inherit clang-unwrapped clang; }; - libcxxabi = callPackage ./stage-2/libcxxabi.nix { - inherit rocmUpdateScript; - stdenv = rStdenv; - }; - libcxx = callPackage ./stage-2/libcxx.nix { - inherit rocmUpdateScript; - stdenv = rStdenv; - }; - compiler-rt = callPackage ./stage-2/compiler-rt.nix { - inherit rocmUpdateScript llvm; - stdenv = rStdenv; + inherit (llvmPackagesRocm) compiler-rt compiler-rt-libc; + bintools = wrapBintoolsWith { + bintools = llvmPackagesRocm.bintools-unwrapped.override { + inherit lld llvm; + }; }; - ## Stage 3 ## - # Helpers - clang = callPackage ./stage-3/clang.nix { - inherit - llvm - lld - clang-unwrapped - bintools - libc - libunwind - libcxxabi - libcxx - compiler-rt - ; - stdenv = gcc12Stdenv; - }; - rocmClangStdenv = overrideCC gcc12Stdenv clang; + clang = rocmcxx; - # Projects - clang-tools-extra = callPackage ./stage-3/clang-tools-extra.nix { - inherit rocmUpdateScript llvm clang-unwrapped; - stdenv = rocmClangStdenv; - }; - libclc = callPackage ./stage-3/libclc.nix { - inherit rocmUpdateScript llvm clang; - stdenv = rocmClangStdenv; - }; - lldb = callPackage ./stage-3/lldb.nix { - inherit rocmUpdateScript clang; - stdenv = rocmClangStdenv; - }; - mlir = callPackage ./stage-3/mlir.nix { - inherit rocmUpdateScript clr; - stdenv = rocmClangStdenv; - }; - polly = callPackage ./stage-3/polly.nix { - inherit rocmUpdateScript; - stdenv = rocmClangStdenv; - }; - flang = callPackage ./stage-3/flang.nix { - inherit rocmUpdateScript clang-unwrapped mlir; - stdenv = rocmClangStdenv; - }; - openmp = callPackage ./stage-3/openmp.nix { - inherit - rocmUpdateScript - llvm - clang-unwrapped - clang - rocm-device-libs - rocm-runtime - rocm-thunk - ; - stdenv = rocmClangStdenv; - }; + # Emulate a monolithic ROCm LLVM build to support building ROCm's in-tree LLVM projects + rocm-merged-llvm = symlinkJoin { + name = "rocm-llvm-merge"; + paths = + [ + llvm + llvm.dev + lld + lld.lib + lld.dev + libunwind + libunwind.dev + compiler-rt + compiler-rt.dev + rocmcxx + ] + ++ lib.optionals useLibcxx [ + libcxx + libcxx.out + libcxx.dev + ]; + postBuild = builtins.unsafeDiscardStringContext '' + found_files=$(find $out -name '*.cmake') + if [ -z "$found_files" ]; then + >&2 echo "Error: No CMake files found in $out" + exit 1 + fi - # Runtimes - pstl = callPackage ./stage-3/pstl.nix { - inherit rocmUpdateScript; - stdenv = rocmClangStdenv; + for target in ${clang-unwrapped.out} ${clang-unwrapped.lib} ${clang-unwrapped.dev}; do + if grep "$target" $found_files; then + >&2 echo "Unexpected ref to $target (clang-unwrapped) found" + # exit 1 + # # FIXME: enable this to reduce closure size + fi + done + ''; + inherit version; + llvm-src = llvmSrc; }; + + rocmClangStdenv = overrideCC ( + if useLibcxx then llvmPackagesRocm.libcxxStdenv else llvmPackagesRocm.stdenv + ) clang; + + # Projects + openmp = + (llvmPackagesRocm.openmp.override { + stdenv = rocmClangStdenv; + # FIXME: this is wrong for cross builds + llvm = rocm-merged-llvm; + targetLlvm = rocm-merged-llvm; + clang-unwrapped = clang; + }).overrideAttrs + (old: { + disallowedReferences = (old.disallowedReferences or [ ]) ++ disallowedRefsForToolchain; + nativeBuildInputs = (old.nativeBuildInputs or [ ]) ++ [ removeReferencesTo ]; + cmakeFlags = + old.cmakeFlags + ++ [ + "-DDEVICELIBS_ROOT=${rocm-device-libs.src}" + ] + ++ lib.optionals addGccLtoCmakeFlags [ + "-DCMAKE_AR=${gcc-unwrapped}/bin/gcc-ar" + "-DCMAKE_RANLIB=${gcc-unwrapped}/bin/gcc-ranlib" + ]; + env.LLVM = "${rocm-merged-llvm}"; + env.LLVM_DIR = "${rocm-merged-llvm}"; + env.CCC_OVERRIDE_OPTIONS = "+-v"; + buildInputs = old.buildInputs ++ [ + rocm-device-libs + rocm-runtime + zlib + zstd + libxml2 + libffi + ]; + }); } diff --git a/pkgs/development/rocm-modules/6/llvm/stage-1/clang-unwrapped.nix b/pkgs/development/rocm-modules/6/llvm/stage-1/clang-unwrapped.nix deleted file mode 100644 index dc9af79816c31..0000000000000 --- a/pkgs/development/rocm-modules/6/llvm/stage-1/clang-unwrapped.nix +++ /dev/null @@ -1,48 +0,0 @@ -{ - stdenv, - callPackage, - rocmUpdateScript, - llvm, -}: - -callPackage ../base.nix rec { - inherit stdenv rocmUpdateScript; - targetName = "clang-unwrapped"; - targetDir = "clang"; - extraBuildInputs = [ llvm ]; - - extraCMakeFlags = [ - "-DCLANG_INCLUDE_DOCS=ON" - "-DCLANG_INCLUDE_TESTS=ON" - ]; - - extraPostPatch = '' - # Looks like they forgot to add finding libedit to the standalone build - ln -s ../cmake/Modules/FindLibEdit.cmake cmake/modules - - substituteInPlace CMakeLists.txt \ - --replace-fail "include(CheckIncludeFile)" "include(CheckIncludeFile)''\nfind_package(LibEdit)" - - # `No such file or directory: '/build/source/clang/tools/scan-build/bin/scan-build'` - rm test/Analysis/scan-build/*.test - rm test/Analysis/scan-build/rebuild_index/rebuild_index.test - - # `does not depend on a module exporting 'baz.h'` - rm test/Modules/header-attribs.cpp - - # We do not have HIP or the ROCm stack available yet - rm test/Driver/hip-options.hip - - # ???? `ld: cannot find crti.o: No such file or directory` linker issue? - rm test/Interpreter/dynamic-library.cpp - - # `fatal error: 'stdio.h' file not found` - rm test/OpenMP/amdgcn_emit_llvm.c - ''; - - extraPostInstall = '' - mv bin/clang-tblgen $out/bin - ''; - - requiredSystemFeatures = [ "big-parallel" ]; -} diff --git a/pkgs/development/rocm-modules/6/llvm/stage-1/lld.nix b/pkgs/development/rocm-modules/6/llvm/stage-1/lld.nix deleted file mode 100644 index 6a6226a221e08..0000000000000 --- a/pkgs/development/rocm-modules/6/llvm/stage-1/lld.nix +++ /dev/null @@ -1,15 +0,0 @@ -{ - stdenv, - callPackage, - rocmUpdateScript, - llvm, -}: - -callPackage ../base.nix rec { - inherit stdenv rocmUpdateScript; - buildMan = false; # No man pages to build - targetName = "lld"; - targetDir = targetName; - extraBuildInputs = [ llvm ]; - checkTargets = [ "check-${targetName}" ]; -} diff --git a/pkgs/development/rocm-modules/6/llvm/stage-1/llvm.nix b/pkgs/development/rocm-modules/6/llvm/stage-1/llvm.nix deleted file mode 100644 index a9464da16697a..0000000000000 --- a/pkgs/development/rocm-modules/6/llvm/stage-1/llvm.nix +++ /dev/null @@ -1,11 +0,0 @@ -{ - stdenv, - callPackage, - rocmUpdateScript, -}: - -callPackage ../base.nix { - inherit stdenv rocmUpdateScript; - requiredSystemFeatures = [ "big-parallel" ]; - isBroken = stdenv.hostPlatform.isAarch64; # https://github.com/ROCm/ROCm/issues/1831#issuecomment-1278205344 -} diff --git a/pkgs/development/rocm-modules/6/llvm/stage-1/runtimes.nix b/pkgs/development/rocm-modules/6/llvm/stage-1/runtimes.nix deleted file mode 100644 index 268ad973b9134..0000000000000 --- a/pkgs/development/rocm-modules/6/llvm/stage-1/runtimes.nix +++ /dev/null @@ -1,32 +0,0 @@ -{ - lib, - stdenv, - callPackage, - rocmUpdateScript, - llvm, -}: - -callPackage ../base.nix rec { - inherit stdenv rocmUpdateScript; - buildDocs = false; - buildMan = false; - buildTests = false; - targetName = "runtimes"; - targetDir = targetName; - - targetRuntimes = [ - "libunwind" - "libcxxabi" - "libcxx" - "compiler-rt" - ]; - - extraBuildInputs = [ llvm ]; - - extraCMakeFlags = [ - "-DLIBCXX_INCLUDE_BENCHMARKS=OFF" - "-DLIBCXX_CXX_ABI=libcxxabi" - ]; - - extraLicenses = [ lib.licenses.mit ]; -} diff --git a/pkgs/development/rocm-modules/6/llvm/stage-2/1000-libcxx-failing-tests.list b/pkgs/development/rocm-modules/6/llvm/stage-2/1000-libcxx-failing-tests.list deleted file mode 100644 index d5e1f675079fa..0000000000000 --- a/pkgs/development/rocm-modules/6/llvm/stage-2/1000-libcxx-failing-tests.list +++ /dev/null @@ -1,175 +0,0 @@ -../libcxx/test/libcxx/containers/gnu_cxx/hash_map.pass.cpp -../libcxx/test/libcxx/containers/gnu_cxx/hash_set.pass.cpp -../libcxx/test/libcxx/depr/depr.default.allocator/allocator.members/allocate.cxx2a.pass.cpp -../libcxx/test/libcxx/depr/depr.default.allocator/allocator.members/construct.cxx2a.pass.cpp -../libcxx/test/libcxx/input.output/filesystems/class.directory_entry/directory_entry.mods/last_write_time.pass.cpp -../libcxx/test/libcxx/input.output/filesystems/class.path/path.member/path.native.obs/string_alloc.pass.cpp -../libcxx/test/libcxx/language.support/support.dynamic/libcpp_deallocate.sh.cpp -../libcxx/test/libcxx/localization/locales/locale/locale.types/locale.facet/no_allocation.pass.cpp -../libcxx/test/libcxx/utilities/utility/mem.res/mem.res.monotonic.buffer/mem.res.monotonic.buffer.mem/allocate_from_underaligned_buffer.pass.cpp -../libcxx/test/libcxx/utilities/utility/mem.res/mem.res.monotonic.buffer/mem.res.monotonic.buffer.mem/allocate_in_geometric_progression.pass.cpp -../libcxx/test/std/containers/associative/map/map.access/index_key.pass.cpp -../libcxx/test/std/containers/associative/map/map.access/index_rv_key.pass.cpp -../libcxx/test/std/containers/associative/map/map.modifiers/insert_and_emplace_allocator_requirements.pass.cpp -../libcxx/test/std/containers/associative/multimap/multimap.modifiers/insert_allocator_requirements.pass.cpp -../libcxx/test/std/containers/associative/multiset/insert_emplace_allocator_requirements.pass.cpp -../libcxx/test/std/containers/associative/set/insert_and_emplace_allocator_requirements.pass.cpp -../libcxx/test/std/containers/sequences/list/list.modifiers/insert_iter_iter_iter.pass.cpp -../libcxx/test/std/containers/sequences/list/list.modifiers/insert_iter_size_value.pass.cpp -../libcxx/test/std/containers/sequences/list/list.modifiers/insert_iter_value.pass.cpp -../libcxx/test/std/containers/sequences/vector.bool/ctor_exceptions.pass.cpp -../libcxx/test/std/containers/sequences/vector/vector.cons/exceptions.pass.cpp -../libcxx/test/std/containers/unord/unord.map/unord.map.elem/index.pass.cpp -../libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/insert_and_emplace_allocator_requirements.pass.cpp -../libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/insert_allocator_requirements.pass.cpp -../libcxx/test/std/containers/unord/unord.multiset/insert_emplace_allocator_requirements.pass.cpp -../libcxx/test/std/containers/unord/unord.set/insert_and_emplace_allocator_requirements.pass.cpp -../libcxx/test/std/experimental/memory/memory.resource.global/new_delete_resource.pass.cpp -../libcxx/test/std/experimental/memory/memory.resource.global/null_memory_resource.pass.cpp -../libcxx/test/std/input.output/file.streams/fstreams/filebuf.virtuals/pbackfail.pass.cpp -../libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.cons/copy_assign.pass.cpp -../libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.cons/copy.pass.cpp -../libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.cons/move_assign.pass.cpp -../libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.cons/move.pass.cpp -../libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.cons/path.pass.cpp -../libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.mods/assign.pass.cpp -../libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.mods/refresh.pass.cpp -../libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.mods/replace_filename.pass.cpp -../libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/file_size.pass.cpp -../libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/file_type_obs.pass.cpp -../libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/hard_link_count.pass.cpp -../libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/last_write_time.pass.cpp -../libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/status.pass.cpp -../libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/symlink_status.pass.cpp -../libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.members/copy_assign.pass.cpp -../libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.members/copy.pass.cpp -../libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.members/ctor.pass.cpp -../libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.members/increment.pass.cpp -../libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.members/move_assign.pass.cpp -../libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.members/move.pass.cpp -../libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.nonmembers/begin_end.pass.cpp -../libcxx/test/std/input.output/filesystems/class.path/path.member/path.append.pass.cpp -../libcxx/test/std/input.output/filesystems/class.path/path.member/path.assign/move.pass.cpp -../libcxx/test/std/input.output/filesystems/class.path/path.member/path.assign/source.pass.cpp -../libcxx/test/std/input.output/filesystems/class.path/path.member/path.compare.pass.cpp -../libcxx/test/std/input.output/filesystems/class.path/path.member/path.concat.pass.cpp -../libcxx/test/std/input.output/filesystems/class.path/path.member/path.construct/move.pass.cpp -../libcxx/test/std/input.output/filesystems/class.path/path.member/path.decompose/path.decompose.pass.cpp -../libcxx/test/std/input.output/filesystems/class.path/path.member/path.gen/lexically_normal.pass.cpp -../libcxx/test/std/input.output/filesystems/class.path/path.member/path.gen/lexically_relative_and_proximate.pass.cpp -../libcxx/test/std/input.output/filesystems/class.path/path.member/path.generic.obs/generic_string_alloc.pass.cpp -../libcxx/test/std/input.output/filesystems/class.path/path.member/path.generic.obs/named_overloads.pass.cpp -../libcxx/test/std/input.output/filesystems/class.path/path.member/path.modifiers/clear.pass.cpp -../libcxx/test/std/input.output/filesystems/class.path/path.member/path.modifiers/make_preferred.pass.cpp -../libcxx/test/std/input.output/filesystems/class.path/path.member/path.modifiers/remove_filename.pass.cpp -../libcxx/test/std/input.output/filesystems/class.path/path.member/path.modifiers/replace_extension.pass.cpp -../libcxx/test/std/input.output/filesystems/class.path/path.member/path.modifiers/replace_filename.pass.cpp -../libcxx/test/std/input.output/filesystems/class.path/path.member/path.modifiers/swap.pass.cpp -../libcxx/test/std/input.output/filesystems/class.path/path.member/path.native.obs/named_overloads.pass.cpp -../libcxx/test/std/input.output/filesystems/class.path/path.nonmember/path.factory.pass.cpp -../libcxx/test/std/input.output/filesystems/class.path/path.nonmember/path.io.pass.cpp -../libcxx/test/std/input.output/filesystems/class.path/path.nonmember/swap.pass.cpp -../libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/copy_assign.pass.cpp -../libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/copy.pass.cpp -../libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/ctor.pass.cpp -../libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/depth.pass.cpp -../libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/disable_recursion_pending.pass.cpp -../libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/increment.pass.cpp -../libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/move_assign.pass.cpp -../libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/move.pass.cpp -../libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/pop.pass.cpp -../libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/recursion_pending.pass.cpp -../libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.nonmembers/begin_end.pass.cpp -../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.canonical/canonical.pass.cpp -../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.copy_file/copy_file_large.pass.cpp -../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.copy_file/copy_file.pass.cpp -../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.copy_symlink/copy_symlink.pass.cpp -../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.copy/copy.pass.cpp -../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.create_directories/create_directories.pass.cpp -../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.create_directory_symlink/create_directory_symlink.pass.cpp -../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.create_directory/create_directory_with_attributes.pass.cpp -../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.create_directory/create_directory.pass.cpp -../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.create_hard_link/create_hard_link.pass.cpp -../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.create_symlink/create_symlink.pass.cpp -../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.current_path/current_path.pass.cpp -../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.equivalent/equivalent.pass.cpp -../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.exists/exists.pass.cpp -../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.file_size/file_size.pass.cpp -../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.hard_lk_ct/hard_link_count.pass.cpp -../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_block_file/is_block_file.pass.cpp -../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_char_file/is_character_file.pass.cpp -../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_directory/is_directory.pass.cpp -../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_empty/is_empty.pass.cpp -../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_fifo/is_fifo.pass.cpp -../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_other/is_other.pass.cpp -../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_regular_file/is_regular_file.pass.cpp -../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_socket/is_socket.pass.cpp -../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_symlink/is_symlink.pass.cpp -../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.last_write_time/last_write_time.pass.cpp -../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.permissions/permissions.pass.cpp -../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.proximate/proximate.pass.cpp -../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.read_symlink/read_symlink.pass.cpp -../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.relative/relative.pass.cpp -../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.remove_all/remove_all.pass.cpp -../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.remove_all/toctou.pass.cpp -../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.remove/remove.pass.cpp -../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.rename/rename.pass.cpp -../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.resize_file/resize_file.pass.cpp -../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.space/space.pass.cpp -../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.status/status.pass.cpp -../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.symlink_status/symlink_status.pass.cpp -../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.temp_dir_path/temp_directory_path.pass.cpp -../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.weakly_canonical/weakly_canonical.pass.cpp -../libcxx/test/std/localization/locale.categories/category.ctype/facet.ctype.special/facet.ctype.char.dtor/dtor.pass.cpp -../libcxx/test/std/localization/locale.stdcvt/codecvt_utf16.pass.cpp -../libcxx/test/std/localization/locale.stdcvt/codecvt_utf8.pass.cpp -../libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.buffer/ctor.pass.cpp -../libcxx/test/std/localization/locales/locale/locale.members/combine.pass.cpp -../libcxx/test/std/strings/basic.string/string.cons/substr_rvalue.pass.cpp -../libcxx/test/std/utilities/any/any.class/any.assign/copy.pass.cpp -../libcxx/test/std/utilities/any/any.class/any.assign/value.pass.cpp -../libcxx/test/std/utilities/any/any.class/any.cons/copy.pass.cpp -../libcxx/test/std/utilities/any/any.class/any.cons/default.pass.cpp -../libcxx/test/std/utilities/any/any.class/any.cons/in_place_type.pass.cpp -../libcxx/test/std/utilities/any/any.class/any.cons/move.pass.cpp -../libcxx/test/std/utilities/any/any.class/any.cons/value.pass.cpp -../libcxx/test/std/utilities/any/any.class/any.modifiers/emplace.pass.cpp -../libcxx/test/std/utilities/any/any.nonmembers/any.cast/any_cast_reference.pass.cpp -../libcxx/test/std/utilities/any/any.nonmembers/make_any.pass.cpp -../libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.alg/swap.pass.cpp -../libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.con/copy_assign.pass.cpp -../libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.con/copy_move.pass.cpp -../libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.con/F_assign.pass.cpp -../libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.con/F.pass.cpp -../libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.con/nullptr_t_assign.pass.cpp -../libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.mod/swap.pass.cpp -../libcxx/test/std/utilities/memory/default.allocator/allocator.members/allocate_at_least.pass.cpp -../libcxx/test/std/utilities/memory/default.allocator/allocator.members/allocate.pass.cpp -../libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.enab/enable_shared_from_this.pass.cpp -../libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/nullptr_t_deleter_throw.pass.cpp -../libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/pointer_deleter_throw.pass.cpp -../libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/pointer_throw.pass.cpp -../libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/unique_ptr.pass.cpp -../libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.create/make_shared.pass.cpp -../libcxx/test/std/utilities/utility/mem.res/mem.res.global/new_delete_resource.pass.cpp -../libcxx/test/std/utilities/utility/mem.res/mem.res.global/null_memory_resource.pass.cpp -../libcxx/test/std/utilities/utility/mem.res/mem.res.monotonic.buffer/mem.res.monotonic.buffer.ctor/without_buffer.pass.cpp -../libcxx/test/std/utilities/utility/mem.res/mem.res.monotonic.buffer/mem.res.monotonic.buffer.mem/allocate_deallocate.pass.cpp -../libcxx/test/std/utilities/utility/mem.res/mem.res.monotonic.buffer/mem.res.monotonic.buffer.mem/allocate_exception_safety.pass.cpp -../libcxx/test/std/utilities/utility/mem.res/mem.res.monotonic.buffer/mem.res.monotonic.buffer.mem/allocate_from_initial_buffer.pass.cpp -../libcxx/test/std/utilities/utility/mem.res/mem.res.monotonic.buffer/mem.res.monotonic.buffer.mem/allocate_from_zero_sized_buffer.pass.cpp -../libcxx/test/std/utilities/utility/mem.res/mem.res.monotonic.buffer/mem.res.monotonic.buffer.mem/allocate_in_geometric_progression.pass.cpp -../libcxx/test/std/utilities/utility/mem.res/mem.res.monotonic.buffer/mem.res.monotonic.buffer.mem/allocate_overaligned_request.pass.cpp -../libcxx/test/std/utilities/utility/mem.res/mem.res.monotonic.buffer/mem.res.monotonic.buffer.mem/allocate_with_initial_size.pass.cpp -../libcxx/test/std/utilities/utility/mem.res/mem.res.pool/mem.res.pool.ctor/ctor_does_not_allocate.pass.cpp -../libcxx/test/std/utilities/utility/mem.res/mem.res.pool/mem.res.pool.mem/equality.pass.cpp -../libcxx/test/std/utilities/utility/mem.res/mem.res.pool/mem.res.pool.mem/sync_allocate_overaligned_request.pass.cpp -../libcxx/test/std/utilities/utility/mem.res/mem.res.pool/mem.res.pool.mem/sync_allocate_reuse_blocks.pass.cpp -../libcxx/test/std/utilities/utility/mem.res/mem.res.pool/mem.res.pool.mem/sync_allocate.pass.cpp -../libcxx/test/std/utilities/utility/mem.res/mem.res.pool/mem.res.pool.mem/unsync_allocate_overaligned_request.pass.cpp -../libcxx/test/std/utilities/utility/mem.res/mem.res.pool/mem.res.pool.mem/unsync_allocate_reuse_blocks.pass.cpp -../libcxx/test/std/utilities/utility/mem.res/mem.res.pool/mem.res.pool.mem/unsync_allocate.pass.cpp -../libcxx/test/std/language.support/support.dynamic/hardware_inference_size.compile.pass.cpp -../libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.array/sized_delete_array14.pass.cpp -../libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/sized_delete14.pass.cpp -../libcxx/test/libcxx/selftest/sh.cpp/empty.sh.cpp diff --git a/pkgs/development/rocm-modules/6/llvm/stage-2/bintools-unwrapped.nix b/pkgs/development/rocm-modules/6/llvm/stage-2/bintools-unwrapped.nix deleted file mode 100644 index e17a913d4bb77..0000000000000 --- a/pkgs/development/rocm-modules/6/llvm/stage-2/bintools-unwrapped.nix +++ /dev/null @@ -1,29 +0,0 @@ -{ - runCommand, - llvm, - lld, -}: - -runCommand "rocm-llvm-binutils-${llvm.version}" { preferLocalBuild = true; } '' - mkdir -p $out/bin - - for prog in ${lld}/bin/*; do - ln -s $prog $out/bin/$(basename $prog) - done - - for prog in ${llvm}/bin/*; do - ln -sf $prog $out/bin/$(basename $prog) - done - - ln -s ${llvm}/bin/llvm-ar $out/bin/ar - ln -s ${llvm}/bin/llvm-as $out/bin/as - ln -s ${llvm}/bin/llvm-dwp $out/bin/dwp - ln -s ${llvm}/bin/llvm-nm $out/bin/nm - ln -s ${llvm}/bin/llvm-objcopy $out/bin/objcopy - ln -s ${llvm}/bin/llvm-objdump $out/bin/objdump - ln -s ${llvm}/bin/llvm-ranlib $out/bin/ranlib - ln -s ${llvm}/bin/llvm-readelf $out/bin/readelf - ln -s ${llvm}/bin/llvm-size $out/bin/size - ln -s ${llvm}/bin/llvm-strip $out/bin/strip - ln -s ${lld}/bin/lld $out/bin/ld -'' diff --git a/pkgs/development/rocm-modules/6/llvm/stage-2/compiler-rt.nix b/pkgs/development/rocm-modules/6/llvm/stage-2/compiler-rt.nix deleted file mode 100644 index f3b8648e11045..0000000000000 --- a/pkgs/development/rocm-modules/6/llvm/stage-2/compiler-rt.nix +++ /dev/null @@ -1,64 +0,0 @@ -{ - lib, - stdenv, - callPackage, - rocmUpdateScript, - llvm, - glibc, -}: - -callPackage ../base.nix rec { - inherit stdenv rocmUpdateScript; - buildDocs = false; # No documentation to build - buildMan = false; # No man pages to build - targetName = "compiler-rt"; - targetDir = "runtimes"; - - targetRuntimes = [ - "libunwind" - "libcxxabi" - "libcxx" - targetName - ]; - - extraCMakeFlags = [ - "-DCOMPILER_RT_INCLUDE_TESTS=ON" - "-DCOMPILER_RT_USE_LLVM_UNWINDER=ON" - "-DCOMPILER_RT_CXX_LIBRARY=libcxx" - "-DCOMPILER_RT_CAN_EXECUTE_TESTS=OFF" # We can't run most of these - - # Workaround having to build combined - "-DLIBUNWIND_INCLUDE_DOCS=OFF" - "-DLIBUNWIND_INCLUDE_TESTS=OFF" - "-DLIBUNWIND_USE_COMPILER_RT=ON" - "-DLIBUNWIND_INSTALL_LIBRARY=OFF" - "-DLIBUNWIND_INSTALL_HEADERS=OFF" - "-DLIBCXXABI_INCLUDE_TESTS=OFF" - "-DLIBCXXABI_USE_LLVM_UNWINDER=ON" - "-DLIBCXXABI_USE_COMPILER_RT=ON" - "-DLIBCXXABI_INSTALL_LIBRARY=OFF" - "-DLIBCXXABI_INSTALL_HEADERS=OFF" - "-DLIBCXX_INCLUDE_DOCS=OFF" - "-DLIBCXX_INCLUDE_TESTS=OFF" - "-DLIBCXX_USE_COMPILER_RT=ON" - "-DLIBCXX_CXX_ABI=libcxxabi" - "-DLIBCXX_INSTALL_LIBRARY=OFF" - "-DLIBCXX_INSTALL_HEADERS=OFF" - ]; - - extraPostPatch = '' - # `No such file or directory: 'ldd'` - substituteInPlace ../compiler-rt/test/lit.common.cfg.py \ - --replace "'ldd'," "'${glibc.bin}/bin/ldd'," - - # We can run these - substituteInPlace ../compiler-rt/test/CMakeLists.txt \ - --replace "endfunction()" "endfunction()''\nadd_subdirectory(builtins)''\nadd_subdirectory(shadowcallstack)" - - # Could not launch llvm-config in /build/source/runtimes/build/bin - mkdir -p build/bin - ln -s ${llvm}/bin/llvm-config build/bin - ''; - - extraLicenses = [ lib.licenses.mit ]; -} diff --git a/pkgs/development/rocm-modules/6/llvm/stage-2/libc.nix b/pkgs/development/rocm-modules/6/llvm/stage-2/libc.nix deleted file mode 100644 index 26d33460d4338..0000000000000 --- a/pkgs/development/rocm-modules/6/llvm/stage-2/libc.nix +++ /dev/null @@ -1,27 +0,0 @@ -{ - stdenv, - callPackage, - rocmUpdateScript, -}: - -callPackage ../base.nix rec { - inherit stdenv rocmUpdateScript; - buildMan = false; # No man pages to build - targetName = "libc"; - targetDir = "runtimes"; - targetRuntimes = [ targetName ]; - - extraPostPatch = '' - # `Failed to match ... against ...` `Match value not within tolerance value of MPFR result:` - # We need a better way, but I don't know enough sed magic and patching `CMakeLists.txt` isn't working... - substituteInPlace ../libc/test/src/math/log10_test.cpp \ - --replace-fail "i < N" "i < 0" \ - --replace-fail "test(mpfr::RoundingMode::Nearest);" "" \ - --replace-fail "test(mpfr::RoundingMode::Downward);" "" \ - --replace-fail "test(mpfr::RoundingMode::Upward);" "" \ - --replace-fail "test(mpfr::RoundingMode::TowardZero);" "" - ''; - - checkTargets = [ "check-${targetName}" ]; - hardeningDisable = [ "fortify" ]; # Prevent `error: "Assumed value of MB_LEN_MAX wrong"` -} diff --git a/pkgs/development/rocm-modules/6/llvm/stage-2/libcxx.nix b/pkgs/development/rocm-modules/6/llvm/stage-2/libcxx.nix deleted file mode 100644 index b9ed102d5408a..0000000000000 --- a/pkgs/development/rocm-modules/6/llvm/stage-2/libcxx.nix +++ /dev/null @@ -1,43 +0,0 @@ -{ - stdenv, - callPackage, - rocmUpdateScript, -}: - -callPackage ../base.nix rec { - inherit stdenv rocmUpdateScript; - buildMan = false; # No man pages to build - targetName = "libcxx"; - targetDir = "runtimes"; - - targetRuntimes = [ - "libunwind" - "libcxxabi" - targetName - ]; - - extraCMakeFlags = [ - "-DLIBCXX_INCLUDE_DOCS=ON" - "-DLIBCXX_INCLUDE_TESTS=ON" - "-DLIBCXX_USE_COMPILER_RT=ON" - "-DLIBCXX_CXX_ABI=libcxxabi" - - # Workaround having to build combined - "-DLIBUNWIND_INCLUDE_DOCS=OFF" - "-DLIBUNWIND_INCLUDE_TESTS=OFF" - "-DLIBUNWIND_USE_COMPILER_RT=ON" - "-DLIBUNWIND_INSTALL_LIBRARY=OFF" - "-DLIBUNWIND_INSTALL_HEADERS=OFF" - "-DLIBCXXABI_INCLUDE_TESTS=OFF" - "-DLIBCXXABI_USE_LLVM_UNWINDER=ON" - "-DLIBCXXABI_USE_COMPILER_RT=ON" - "-DLIBCXXABI_INSTALL_LIBRARY=OFF" - "-DLIBCXXABI_INSTALL_HEADERS=OFF" - ]; - - # Most of these can't find `bash` or `mkdir`, might just be hard-coded paths, or PATH is altered - extraPostPatch = '' - chmod +w -R ../libcxx/test/{libcxx,std} - cat ${./1000-libcxx-failing-tests.list} | xargs -d \\n rm - ''; -} diff --git a/pkgs/development/rocm-modules/6/llvm/stage-2/libcxxabi.nix b/pkgs/development/rocm-modules/6/llvm/stage-2/libcxxabi.nix deleted file mode 100644 index bc54e17be45fb..0000000000000 --- a/pkgs/development/rocm-modules/6/llvm/stage-2/libcxxabi.nix +++ /dev/null @@ -1,38 +0,0 @@ -{ - stdenv, - callPackage, - rocmUpdateScript, -}: - -callPackage ../base.nix rec { - inherit stdenv rocmUpdateScript; - buildDocs = false; # No documentation to build - buildMan = false; # No man pages to build - targetName = "libcxxabi"; - targetDir = "runtimes"; - - targetRuntimes = [ - "libunwind" - targetName - "libcxx" - ]; - - extraCMakeFlags = [ - "-DLIBCXXABI_INCLUDE_TESTS=ON" - "-DLIBCXXABI_USE_LLVM_UNWINDER=ON" - "-DLIBCXXABI_USE_COMPILER_RT=ON" - - # Workaround having to build combined - "-DLIBUNWIND_INCLUDE_DOCS=OFF" - "-DLIBUNWIND_INCLUDE_TESTS=OFF" - "-DLIBUNWIND_USE_COMPILER_RT=ON" - "-DLIBUNWIND_INSTALL_LIBRARY=OFF" - "-DLIBUNWIND_INSTALL_HEADERS=OFF" - "-DLIBCXX_INCLUDE_DOCS=OFF" - "-DLIBCXX_INCLUDE_TESTS=OFF" - "-DLIBCXX_USE_COMPILER_RT=ON" - "-DLIBCXX_CXX_ABI=libcxxabi" - "-DLIBCXX_INSTALL_LIBRARY=OFF" - "-DLIBCXX_INSTALL_HEADERS=OFF" - ]; -} diff --git a/pkgs/development/rocm-modules/6/llvm/stage-2/libunwind.nix b/pkgs/development/rocm-modules/6/llvm/stage-2/libunwind.nix deleted file mode 100644 index fb5e7cb3b68ca..0000000000000 --- a/pkgs/development/rocm-modules/6/llvm/stage-2/libunwind.nix +++ /dev/null @@ -1,27 +0,0 @@ -{ - stdenv, - callPackage, - rocmUpdateScript, -}: - -callPackage ../base.nix rec { - inherit stdenv rocmUpdateScript; - buildMan = false; # No man pages to build - targetName = "libunwind"; - targetDir = "runtimes"; - targetRuntimes = [ targetName ]; - - extraCMakeFlags = [ - "-DLIBUNWIND_INCLUDE_DOCS=ON" - "-DLIBUNWIND_INCLUDE_TESTS=ON" - "-DLIBUNWIND_USE_COMPILER_RT=ON" - ]; - - extraPostPatch = '' - # `command had no output on stdout or stderr` (Says these unsupported tests) - chmod +w -R ../libunwind/test - rm ../libunwind/test/floatregister.pass.cpp - rm ../libunwind/test/unwind_leaffunction.pass.cpp - rm ../libunwind/test/libunwind_02.pass.cpp - ''; -} diff --git a/pkgs/development/rocm-modules/6/llvm/stage-2/rstdenv.nix b/pkgs/development/rocm-modules/6/llvm/stage-2/rstdenv.nix deleted file mode 100644 index f83abe36cc2e2..0000000000000 --- a/pkgs/development/rocm-modules/6/llvm/stage-2/rstdenv.nix +++ /dev/null @@ -1,37 +0,0 @@ -{ - stdenv, - overrideCC, - wrapCCWith, - llvm, - clang-unwrapped, - lld, - runtimes, - bintools, -}: - -overrideCC stdenv (wrapCCWith rec { - inherit bintools; - libcxx = runtimes; - cc = clang-unwrapped; - gccForLibs = stdenv.cc.cc; - - extraPackages = [ - llvm - lld - ]; - - nixSupport.cc-cflags = [ - "-resource-dir=$out/resource-root" - "-fuse-ld=lld" - "-rtlib=compiler-rt" - "-unwindlib=libunwind" - "-Wno-unused-command-line-argument" - ]; - - extraBuildCommands = '' - clang_version=`${cc}/bin/clang -v 2>&1 | grep "clang version " | grep -E -o "[0-9.-]+"` - mkdir -p $out/resource-root - ln -s ${cc}/lib/clang/$clang_version/include $out/resource-root - ln -s ${runtimes}/lib $out/resource-root - ''; -}) diff --git a/pkgs/development/rocm-modules/6/llvm/stage-3/1000-openmp-failing-tests.list b/pkgs/development/rocm-modules/6/llvm/stage-3/1000-openmp-failing-tests.list deleted file mode 100644 index e53b21b3c5358..0000000000000 --- a/pkgs/development/rocm-modules/6/llvm/stage-3/1000-openmp-failing-tests.list +++ /dev/null @@ -1,122 +0,0 @@ -runtime/test/tasking/hidden_helper_task/gtid.cpp -runtime/test/ompt/parallel/parallel_if0.c -runtime/test/ompt/parallel/serialized.c -runtime/test/ompt/teams/parallel_team.c -runtime/test/ompt/teams/serial_teams.c -runtime/test/ompt/teams/serialized.c -runtime/test/ompt/teams/team.c -libomptarget/test/api/assert.c -libomptarget/test/api/omp_device_managed_memory.c -libomptarget/test/api/omp_device_memory.c -libomptarget/test/api/omp_get_device_num.c -libomptarget/test/api/omp_host_pinned_memory.c -libomptarget/test/api/omp_host_pinned_memory_alloc.c -libomptarget/test/api/omp_target_memcpy_async1.c -libomptarget/test/api/omp_target_memcpy_async2.c -libomptarget/test/api/omp_target_memcpy_rect_async1.c -libomptarget/test/api/omp_target_memcpy_rect_async2.c -libomptarget/test/mapping/array_section_implicit_capture.c -libomptarget/test/mapping/data_absent_at_exit.c -libomptarget/test/mapping/data_member_ref.cpp -libomptarget/test/mapping/declare_mapper_api.cpp -libomptarget/test/mapping/declare_mapper_target.cpp -libomptarget/test/mapping/declare_mapper_target_data.cpp -libomptarget/test/mapping/declare_mapper_target_data_enter_exit.cpp -libomptarget/test/mapping/firstprivate_aligned.cpp -libomptarget/test/mapping/has_device_addr.cpp -libomptarget/test/mapping/implicit_device_ptr.c -libomptarget/test/mapping/is_device_ptr.cpp -libomptarget/test/mapping/lambda_mapping.cpp -libomptarget/test/mapping/low_alignment.c -libomptarget/test/mapping/map_back_race.cpp -libomptarget/test/mapping/power_of_two_alignment.c -libomptarget/test/mapping/pr38704.c -libomptarget/test/mapping/prelock.cpp -libomptarget/test/mapping/present/target_data_at_exit.c -libomptarget/test/mapping/private_mapping.c -libomptarget/test/mapping/ptr_and_obj_motion.c -libomptarget/test/mapping/reduction_implicit_map.cpp -libomptarget/test/mapping/target_derefence_array_pointrs.cpp -libomptarget/test/mapping/target_map_for_member_data.cpp -libomptarget/test/mapping/target_update_array_extension.c -libomptarget/test/mapping/target_use_device_addr.c -libomptarget/test/offloading/atomic-compare-signedness.c -libomptarget/test/offloading/bug47654.cpp -libomptarget/test/offloading/bug49021.cpp -libomptarget/test/offloading/bug49779.cpp -libomptarget/test/offloading/bug50022.cpp -libomptarget/test/offloading/bug51781.c -libomptarget/test/offloading/bug51982.c -libomptarget/test/offloading/bug53727.cpp -libomptarget/test/offloading/complex_reduction.cpp -libomptarget/test/offloading/cuda_no_devices.c -libomptarget/test/offloading/d2d_memcpy.c -libomptarget/test/offloading/dynamic_module.c -libomptarget/test/offloading/dynamic_module_load.c -libomptarget/test/offloading/global_constructor.cpp -libomptarget/test/offloading/lone_target_exit_data.c -libomptarget/test/offloading/memory_manager.cpp -libomptarget/test/offloading/parallel_offloading_map.cpp -libomptarget/test/offloading/static_linking.c -libomptarget/test/offloading/std_complex_arithmetic.cpp -libomptarget/test/offloading/target-teams-atomic.c -libomptarget/test/offloading/target_constexpr_mapping.cpp -libomptarget/test/offloading/target_critical_region.cpp -libomptarget/test/offloading/target_depend_nowait.cpp -libomptarget/test/offloading/target_nowait_target.cpp -libomptarget/test/offloading/taskloop_offload_nowait.cpp -libomptarget/test/offloading/test_libc.cpp -libomptarget/test/ompt/veccopy.c -libomptarget/test/ompt/veccopy_disallow_both.c -libomptarget/test/ompt/veccopy_emi.c -libomptarget/test/ompt/veccopy_emi_map.c -libomptarget/test/ompt/veccopy_map.c -libomptarget/test/ompt/veccopy_no_device_init.c -libomptarget/test/ompt/veccopy_wrong_return.c -libomptarget/test/api/is_initial_device.c -libomptarget/test/mapping/declare_mapper_nested_default_mappers_array_subscript.cpp -libomptarget/test/mapping/declare_mapper_nested_default_mappers_ptr_subscript.cpp -libomptarget/test/mapping/declare_mapper_nested_default_mappers_var.cpp -libomptarget/test/mapping/target_pointers_members_map.cpp -libomptarget/test/api/omp_dynamic_shared_memory_mixed.c -libomptarget/test/api/omp_env_vars.c -libomptarget/test/api/omp_get_mapped_ptr.c -libomptarget/test/api/omp_get_num_devices.c -libomptarget/test/api/omp_get_num_devices_with_empty_target.c -libomptarget/test/mapping/alloc_fail.c -libomptarget/test/mapping/array_section_use_device_ptr.c -libomptarget/test/mapping/declare_mapper_nested_default_mappers.cpp -libomptarget/test/mapping/declare_mapper_nested_mappers.cpp -libomptarget/test/mapping/declare_mapper_target_update.cpp -libomptarget/test/mapping/delete_inf_refcount.c -libomptarget/test/mapping/lambda_by_value.cpp -libomptarget/test/mapping/ompx_hold/omp_target_disassociate_ptr.c -libomptarget/test/mapping/ompx_hold/struct.c -libomptarget/test/mapping/ompx_hold/target-data.c -libomptarget/test/mapping/ompx_hold/target.c -libomptarget/test/mapping/present/target.c -libomptarget/test/mapping/present/target_array_extension.c -libomptarget/test/mapping/present/target_data.c -libomptarget/test/mapping/present/target_data_array_extension.c -libomptarget/test/mapping/present/target_enter_data.c -libomptarget/test/mapping/present/target_exit_data_delete.c -libomptarget/test/mapping/present/target_exit_data_release.c -libomptarget/test/mapping/present/target_update.c -libomptarget/test/mapping/present/target_update_array_extension.c -libomptarget/test/mapping/present/zero_length_array_section.c -libomptarget/test/mapping/present/zero_length_array_section_exit.c -libomptarget/test/mapping/target_data_array_extension_at_exit.c -libomptarget/test/mapping/target_has_device_addr.c -libomptarget/test/mapping/target_implicit_partial_map.c -libomptarget/test/mapping/target_wrong_use_device_addr.c -libomptarget/test/offloading/host_as_target.c -libomptarget/test/offloading/info.c -libomptarget/test/offloading/offloading_success.c -libomptarget/test/offloading/offloading_success.cpp -libomptarget/test/offloading/wtime.c -libomptarget/test/unified_shared_memory/api.c -libomptarget/test/unified_shared_memory/associate_ptr.c -libomptarget/test/unified_shared_memory/close_enter_exit.c -libomptarget/test/unified_shared_memory/close_manual.c -libomptarget/test/unified_shared_memory/close_member.c -libomptarget/test/unified_shared_memory/close_modifier.c diff --git a/pkgs/development/rocm-modules/6/llvm/stage-3/1001-mlir-failing-tests.list b/pkgs/development/rocm-modules/6/llvm/stage-3/1001-mlir-failing-tests.list deleted file mode 100644 index 0b3d2d22592da..0000000000000 --- a/pkgs/development/rocm-modules/6/llvm/stage-3/1001-mlir-failing-tests.list +++ /dev/null @@ -1,11 +0,0 @@ -./test/Target/LLVMIR/openmp-llvm.mlir -./test/mlir-spirv-cpu-runner/double.mlir -./test/mlir-spirv-cpu-runner/simple_add.mlir -./test/mlir-vulkan-runner/addf.mlir -./test/mlir-vulkan-runner/addi.mlir -./test/mlir-vulkan-runner/addi8.mlir -./test/mlir-vulkan-runner/mulf.mlir -./test/mlir-vulkan-runner/smul_extended.mlir -./test/mlir-vulkan-runner/subf.mlir -./test/mlir-vulkan-runner/time.mlir -./test/mlir-vulkan-runner/umul_extended.mlir diff --git a/pkgs/development/rocm-modules/6/llvm/stage-3/clang-tools-extra.nix b/pkgs/development/rocm-modules/6/llvm/stage-3/clang-tools-extra.nix deleted file mode 100644 index 4351c4bc6629d..0000000000000 --- a/pkgs/development/rocm-modules/6/llvm/stage-3/clang-tools-extra.nix +++ /dev/null @@ -1,43 +0,0 @@ -{ - stdenv, - callPackage, - rocmUpdateScript, - llvm, - clang-unwrapped, - gtest, -}: - -callPackage ../base.nix rec { - inherit stdenv rocmUpdateScript; - buildTests = false; # `invalid operands to binary expression ('std::basic_stringstream' and 'const llvm::StringRef')` - targetName = "clang-tools-extra"; - - targetProjects = [ - "clang" - "clang-tools-extra" - ]; - - extraBuildInputs = [ gtest ]; - - extraCMakeFlags = [ - "-DLLVM_INCLUDE_DOCS=OFF" - "-DLLVM_INCLUDE_TESTS=OFF" - "-DCLANG_INCLUDE_DOCS=OFF" - "-DCLANG_INCLUDE_TESTS=ON" - "-DCLANG_TOOLS_EXTRA_INCLUDE_DOCS=ON" - ]; - - extraPostInstall = '' - # Remove LLVM and Clang - for path in `find ${llvm} ${clang-unwrapped}`; do - if [ $path != ${llvm} ] && [ $path != ${clang-unwrapped} ]; then - rm -f $out''${path#${llvm}} $out''${path#${clang-unwrapped}} || true - fi - done - - # Cleanup empty directories - find $out -type d -empty -delete - ''; - - requiredSystemFeatures = [ "big-parallel" ]; -} diff --git a/pkgs/development/rocm-modules/6/llvm/stage-3/clang.nix b/pkgs/development/rocm-modules/6/llvm/stage-3/clang.nix deleted file mode 100644 index 4afaa726ad786..0000000000000 --- a/pkgs/development/rocm-modules/6/llvm/stage-3/clang.nix +++ /dev/null @@ -1,77 +0,0 @@ -{ - stdenv, - wrapCCWith, - llvm, - lld, - clang-unwrapped, - bintools, - libc, - libunwind, - libcxxabi, - libcxx, - compiler-rt, -}: - -wrapCCWith rec { - inherit libcxx bintools; - - # We do this to avoid HIP pathing problems, and mimic a monolithic install - cc = stdenv.mkDerivation (finalAttrs: { - inherit (clang-unwrapped) version; - pname = "rocm-llvm-clang"; - dontUnpack = true; - - installPhase = '' - runHook preInstall - - clang_version=`${clang-unwrapped}/bin/clang -v 2>&1 | grep "clang version " | grep -E -o "[0-9.-]+"` - mkdir -p $out/{bin,include/c++/v1,lib/{cmake,clang/$clang_version/{include,lib}},libexec,share} - - for path in ${llvm} ${clang-unwrapped} ${lld} ${libc} ${libunwind} ${libcxxabi} ${libcxx} ${compiler-rt}; do - cp -as $path/* $out - chmod +w $out/{*,include/c++/v1,lib/{clang/$clang_version/include,cmake}} - rm -f $out/lib/libc++.so - done - - ln -s $out/lib/* $out/lib/clang/$clang_version/lib - ln -sf $out/include/* $out/lib/clang/$clang_version/include - - runHook postInstall - ''; - - passthru.isClang = true; - passthru.isROCm = true; - }); - - gccForLibs = stdenv.cc.cc; - - extraPackages = [ - llvm - lld - libc - libunwind - libcxxabi - compiler-rt - ]; - - nixSupport.cc-cflags = [ - "-resource-dir=$out/resource-root" - "-fuse-ld=lld" - "-rtlib=compiler-rt" - "-unwindlib=libunwind" - "-Wno-unused-command-line-argument" - ]; - - extraBuildCommands = '' - clang_version=`${cc}/bin/clang -v 2>&1 | grep "clang version " | grep -E -o "[0-9.-]+"` - mkdir -p $out/resource-root - ln -s ${cc}/lib/clang/$clang_version/{include,lib} $out/resource-root - - # Not sure why, but hardening seems to make things break - echo "" > $out/nix-support/add-hardening.sh - - # GPU compilation uses builtin `lld` - substituteInPlace $out/bin/{clang,clang++} \ - --replace-fail "-MM) dontLink=1 ;;" "-MM | --cuda-device-only) dontLink=1 ;;''\n--cuda-host-only | --cuda-compile-host-device) dontLink=0 ;;" - ''; -} diff --git a/pkgs/development/rocm-modules/6/llvm/stage-3/flang.nix b/pkgs/development/rocm-modules/6/llvm/stage-3/flang.nix deleted file mode 100644 index c6e72d56ce566..0000000000000 --- a/pkgs/development/rocm-modules/6/llvm/stage-3/flang.nix +++ /dev/null @@ -1,32 +0,0 @@ -{ - stdenv, - callPackage, - rocmUpdateScript, - clang-unwrapped, - mlir, - graphviz, - python3Packages, -}: - -callPackage ../base.nix rec { - inherit stdenv rocmUpdateScript; - targetName = "flang"; - targetDir = targetName; - - extraNativeBuildInputs = [ - graphviz - python3Packages.sphinx-markdown-tables - ]; - - extraBuildInputs = [ mlir ]; - - extraCMakeFlags = [ - "-DCLANG_DIR=${clang-unwrapped}/lib/cmake/clang" - "-DMLIR_TABLEGEN_EXE=${mlir}/bin/mlir-tblgen" - "-DCLANG_TABLEGEN_EXE=${clang-unwrapped}/bin/clang-tblgen" - "-DFLANG_INCLUDE_TESTS=OFF" # `The dependency target "Bye" of target ...` - ]; - - # `flang/lib/Semantics/check-omp-structure.cpp:1905:1: error: no member named 'v' in 'Fortran::parser::OmpClause::OmpxDynCgroupMem'` - isBroken = true; -} diff --git a/pkgs/development/rocm-modules/6/llvm/stage-3/libclc.nix b/pkgs/development/rocm-modules/6/llvm/stage-3/libclc.nix deleted file mode 100644 index c8a6b98d91301..0000000000000 --- a/pkgs/development/rocm-modules/6/llvm/stage-3/libclc.nix +++ /dev/null @@ -1,38 +0,0 @@ -{ - stdenv, - callPackage, - rocmUpdateScript, - llvm, - clang, - spirv-llvm-translator, -}: - -let - spirv = (spirv-llvm-translator.override { inherit llvm; }); -in -callPackage ../base.nix rec { - inherit stdenv rocmUpdateScript; - buildDocs = false; # No documentation to build - buildMan = false; # No man pages to build - targetName = "libclc"; - targetDir = targetName; - extraBuildInputs = [ spirv ]; - - # `spirv-mesa3d` isn't compiling with LLVM 15.0.0, it does with LLVM 14.0.0 - # Try removing the `spirv-mesa3d` and `clspv` patches next update - # `clspv` tests fail, unresolved calls - extraPostPatch = '' - substituteInPlace CMakeLists.txt \ - --replace-fail "find_program( LLVM_CLANG clang PATHS \''${LLVM_BINDIR} NO_DEFAULT_PATH )" \ - "find_program( LLVM_CLANG clang PATHS \"${clang}/bin\" NO_DEFAULT_PATH )" \ - --replace-fail "find_program( LLVM_SPIRV llvm-spirv PATHS \''${LLVM_BINDIR} NO_DEFAULT_PATH )" \ - "find_program( LLVM_SPIRV llvm-spirv PATHS \"${spirv}/bin\" NO_DEFAULT_PATH )" \ - --replace-fail " spirv-mesa3d-" "" \ - --replace-fail " spirv64-mesa3d-" "" \ - --replace-fail "NOT \''${t} MATCHES" \ - "NOT \''${ARCH} STREQUAL \"clspv\" AND NOT \''${ARCH} STREQUAL \"clspv64\" AND NOT \''${t} MATCHES" - ''; - - checkTargets = [ ]; - isBroken = true; # ROCm 5.7.0 doesn't have IR/AttributeMask.h yet...? -} diff --git a/pkgs/development/rocm-modules/6/llvm/stage-3/lldb.nix b/pkgs/development/rocm-modules/6/llvm/stage-3/lldb.nix deleted file mode 100644 index 31694ce50113f..0000000000000 --- a/pkgs/development/rocm-modules/6/llvm/stage-3/lldb.nix +++ /dev/null @@ -1,40 +0,0 @@ -{ - stdenv, - callPackage, - rocmUpdateScript, - clang, - xz, - swig, - lua5_3, - graphviz, - gtest, - python3Packages, -}: - -callPackage ../base.nix rec { - inherit stdenv rocmUpdateScript; - buildTests = false; # FIXME: Bad pathing for clang executable in tests, using relative path most likely - targetName = "lldb"; - targetDir = targetName; - extraNativeBuildInputs = [ python3Packages.sphinx-automodapi ]; - - extraBuildInputs = [ - xz - swig - lua5_3 - graphviz - gtest - ]; - - extraCMakeFlags = [ - "-DLLDB_EXTERNAL_CLANG_RESOURCE_DIR=${clang}/resource-root/lib/clang/$clang_version" - "-DLLDB_INCLUDE_TESTS=ON" - "-DLLDB_INCLUDE_UNITTESTS=ON" - ]; - - extraPostPatch = '' - export clang_version=`clang -v 2>&1 | grep "clang version " | grep -E -o "[0-9.-]+"` - ''; - - checkTargets = [ "check-${targetName}" ]; -} diff --git a/pkgs/development/rocm-modules/6/llvm/stage-3/mlir.nix b/pkgs/development/rocm-modules/6/llvm/stage-3/mlir.nix deleted file mode 100644 index 9b87769e3e8fe..0000000000000 --- a/pkgs/development/rocm-modules/6/llvm/stage-3/mlir.nix +++ /dev/null @@ -1,61 +0,0 @@ -{ - stdenv, - callPackage, - rocmUpdateScript, - clr, - vulkan-headers, - vulkan-loader, - glslang, - shaderc, - fetchpatch, -}: - -callPackage ../base.nix rec { - inherit stdenv rocmUpdateScript; - buildDocs = false; # No decent way to hack this to work - buildMan = false; # No man pages to build - targetName = "mlir"; - targetDir = targetName; - - # Fix `DebugTranslation.cpp:139:10: error: no matching function for call to 'get'` - extraPatches = [ - (fetchpatch { - url = "https://github.com/ROCm/llvm-project/commit/f1d1e10ec7e1061bf0b90abbc1e298d9438a5e74.patch"; - hash = "sha256-3c91A9InMKxm+JcnWxoUeOU68y5I6w1AAXx6T9UByqI="; - }) - ]; - extraNativeBuildInputs = [ clr ]; - - extraBuildInputs = [ - vulkan-headers - vulkan-loader - glslang - shaderc - ]; - - extraCMakeFlags = [ - "-DMLIR_INCLUDE_DOCS=ON" - "-DMLIR_INCLUDE_TESTS=ON" - "-DMLIR_ENABLE_ROCM_RUNNER=ON" - "-DMLIR_ENABLE_SPIRV_CPU_RUNNER=ON" - "-DMLIR_ENABLE_VULKAN_RUNNER=ON" - "-DROCM_TEST_CHIPSET=gfx000" # CPU runner - ]; - - extraPostPatch = '' - # `add_library cannot create target "llvm_gtest" because an imported target with the same name already exists` - substituteInPlace CMakeLists.txt \ - --replace-fail "EXISTS \''${UNITTEST_DIR}/googletest/include/gtest/gtest.h" "FALSE" - - # Mainly `No such file or directory` - cat ${./1001-mlir-failing-tests.list} | xargs -d \\n rm - ''; - - extraPostInstall = '' - mkdir -p $out/bin - mv bin/mlir-tblgen $out/bin - ''; - - checkTargets = [ "check-${targetName}" ]; - requiredSystemFeatures = [ "big-parallel" ]; -} diff --git a/pkgs/development/rocm-modules/6/llvm/stage-3/openmp.nix b/pkgs/development/rocm-modules/6/llvm/stage-3/openmp.nix deleted file mode 100644 index 2581661057809..0000000000000 --- a/pkgs/development/rocm-modules/6/llvm/stage-3/openmp.nix +++ /dev/null @@ -1,55 +0,0 @@ -{ - lib, - stdenv, - callPackage, - rocmUpdateScript, - llvm, - clang, - clang-unwrapped, - rocm-device-libs, - rocm-runtime, - rocm-thunk, - perl, - elfutils, - libdrm, - numactl, - lit, -}: - -callPackage ../base.nix rec { - inherit stdenv rocmUpdateScript; - targetName = "openmp"; - targetDir = targetName; - extraNativeBuildInputs = [ perl ]; - - extraBuildInputs = [ - rocm-device-libs - rocm-runtime - rocm-thunk - elfutils - libdrm - numactl - ]; - - extraCMakeFlags = [ - "-DCMAKE_MODULE_PATH=/build/source/llvm/cmake/modules" # For docs - "-DCLANG_TOOL=${clang}/bin/clang" - "-DCLANG_OFFLOAD_BUNDLER_TOOL=${clang-unwrapped}/bin/clang-offload-bundler" - "-DPACKAGER_TOOL=${clang-unwrapped}/bin/clang-offload-packager" - "-DOPENMP_LLVM_TOOLS_DIR=${llvm}/bin" - "-DOPENMP_LLVM_LIT_EXECUTABLE=${lit}/bin/.lit-wrapped" - "-DDEVICELIBS_ROOT=${rocm-device-libs.src}" - ]; - - extraPostPatch = '' - # We can't build this target at the moment - substituteInPlace libomptarget/DeviceRTL/CMakeLists.txt \ - --replace "gfx1010" "" - - # No idea what's going on here... - cat ${./1000-openmp-failing-tests.list} | xargs -d \\n rm - ''; - - checkTargets = [ "check-${targetName}" ]; - extraLicenses = [ lib.licenses.mit ]; -} diff --git a/pkgs/development/rocm-modules/6/llvm/stage-3/polly.nix b/pkgs/development/rocm-modules/6/llvm/stage-3/polly.nix deleted file mode 100644 index d70a353d3a8cf..0000000000000 --- a/pkgs/development/rocm-modules/6/llvm/stage-3/polly.nix +++ /dev/null @@ -1,19 +0,0 @@ -{ - stdenv, - callPackage, - rocmUpdateScript, -}: - -callPackage ../base.nix rec { - inherit stdenv rocmUpdateScript; - targetName = "polly"; - targetDir = targetName; - - extraPostPatch = '' - # `add_library cannot create target "llvm_gtest" because an imported target with the same name already exists` - substituteInPlace CMakeLists.txt \ - --replace-fail "NOT TARGET gtest" "FALSE" - ''; - - checkTargets = [ "check-${targetName}" ]; -} diff --git a/pkgs/development/rocm-modules/6/llvm/stage-3/pstl.nix b/pkgs/development/rocm-modules/6/llvm/stage-3/pstl.nix deleted file mode 100644 index 63fba911677cc..0000000000000 --- a/pkgs/development/rocm-modules/6/llvm/stage-3/pstl.nix +++ /dev/null @@ -1,16 +0,0 @@ -{ - stdenv, - callPackage, - rocmUpdateScript, -}: - -callPackage ../base.nix rec { - inherit stdenv rocmUpdateScript; - buildDocs = false; # No documentation to build - buildMan = false; # No man pages to build - buildTests = false; # Too many errors - targetName = "pstl"; - targetDir = "runtimes"; - targetRuntimes = [ targetName ]; - checkTargets = [ "check-${targetName}" ]; -} diff --git a/pkgs/development/rocm-modules/6/migraphx/default.nix b/pkgs/development/rocm-modules/6/migraphx/default.nix index 137f3a2347087..3c6b7b68b39a2 100644 --- a/pkgs/development/rocm-modules/6/migraphx/default.nix +++ b/pkgs/development/rocm-modules/6/migraphx/default.nix @@ -7,7 +7,6 @@ cmake, rocm-cmake, clr, - clang-tools-extra, openmp, rocblas, rocmlir, @@ -54,7 +53,7 @@ let in stdenv.mkDerivation (finalAttrs: { pname = "migraphx"; - version = "6.0.2"; + version = "6.3.1"; outputs = [ @@ -71,7 +70,7 @@ stdenv.mkDerivation (finalAttrs: { owner = "ROCm"; repo = "AMDMIGraphX"; rev = "rocm-${finalAttrs.version}"; - hash = "sha256-VDYUSpWYAdJ63SKVCO26DVAC3RtZM7otqN0sYUA6DBQ="; + hash = "sha256-h9cTbrMwHeRGVJS/uHQnCXplNcrBqxbhwz2AcAEso0M="; }; nativeBuildInputs = @@ -80,7 +79,6 @@ stdenv.mkDerivation (finalAttrs: { cmake rocm-cmake clr - clang-tools-extra python3Packages.python ] ++ lib.optionals buildDocs [ @@ -172,8 +170,8 @@ stdenv.mkDerivation (finalAttrs: { passthru.updateScript = rocmUpdateScript { name = finalAttrs.pname; - owner = finalAttrs.src.owner; - repo = finalAttrs.src.repo; + inherit (finalAttrs.src) owner; + inherit (finalAttrs.src) repo; }; meta = with lib; { diff --git a/pkgs/development/rocm-modules/6/miopen/default.nix b/pkgs/development/rocm-modules/6/miopen/default.nix index e815ca00b7f07..b8970729ca7a8 100644 --- a/pkgs/development/rocm-modules/6/miopen/default.nix +++ b/pkgs/development/rocm-modules/6/miopen/default.nix @@ -10,9 +10,13 @@ rocm-cmake, rocblas, rocmlir, + rocrand, + rocm-runtime, + rocm-merged-llvm, + hipblas-common, + hipblas, + hipblaslt, clr, - clang-tools-extra, - clang-ocl, composable_kernel, frugally-deep, rocm-docs-core, @@ -30,43 +34,48 @@ rocm-comgr, roctracer, python3Packages, + # FIXME: should be able to use all clr targets + gpuTargets ? [ + "gfx908" + "gfx90a" + "gfx942" + "gfx1030" + "gfx1100" + ], # clr.gpuTargets buildDocs ? false, # Needs internet because of rocm-docs-core buildTests ? false, }: let - version = "6.0.2"; + # FIXME: cmake files need patched to include this properly + cFlags = "-O3 -DNDEBUG -Wno-documentation-pedantic --offload-compress -I${hipblas-common}/include -I${hipblas}/include -I${roctracer}/include -I${nlohmann_json}/include -I${sqlite.dev}/include -I${rocrand}/include"; + version = "6.3.1"; src = fetchFromGitHub { owner = "ROCm"; repo = "MIOpen"; rev = "rocm-${version}"; - hash = "sha256-mbOdlSb0ESKi9hMkq3amv70Xkp/YKnZYre24d/y5TD0="; + hash = "sha256-KV+tJPD4HQayY8zD4AdOFxxYRnyI47suxX5OgZ7mpdU="; fetchLFS = true; + fetchSubmodules = true; + # WORKAROUND: .lfsconfig is incorrectly set to exclude everything upstream leaveDotGit = true; - - # If you're reading this, it's gonna take a bit of time. - # fetchSubModules doesn't work with postFetch??? - # fetchLFS isn't actually fetching the LFS files... postFetch = '' export HOME=$(mktemp -d) cd $out - - # We need more history to fetch LFS files + set -x git remote add origin $url - git fetch origin + git fetch origin +refs/tags/rocm-${version}:refs/tags/rocm-${version} git clean -fdx - git checkout rocm-${version} - - # We need to do this manually since using leaveDotGit and fetchSubmodules errors - git submodule update --init - - # Fetch the LFS files + git switch -c rocm-${version} refs/tags/rocm-${version} + git config lfs.fetchexclude "none" + rm .lfsconfig git lfs install - git lfs fetch --all + git lfs track "*.kdb.bz2" + GIT_TRACE=1 git lfs fetch --include="src/kernels/**" + GIT_TRACE=1 git lfs pull --include="src/kernels/**" git lfs checkout - # Remove the defunct .git folder rm -rf .git ''; }; @@ -112,8 +121,16 @@ stdenv.mkDerivation (finalAttrs: { inherit version src; pname = "miopen"; + env.CFLAGS = cFlags; + env.CXXFLAGS = cFlags; + + preConfigure = '' + makeFlagsArray+=("-l$(nproc)") + ''; # Find zstd and add to target. Mainly for torch. patches = [ + ./skip-preexisting-dbs.patch + ./fix-isnan.patch # https://github.com/ROCm/MIOpen/pull/3448 (fetchpatch { url = "https://github.com/ROCm/MIOpen/commit/e608b4325646afeabb5e52846997b926d2019d19.patch"; hash = "sha256-oxa3qlIC2bzbwGxrQOZXoY/S7CpLsMrnWRB7Og0tk0M="; @@ -122,11 +139,11 @@ stdenv.mkDerivation (finalAttrs: { url = "https://github.com/ROCm/MIOpen/commit/3413d2daaeb44b7d6eadcc03033a5954a118491e.patch"; hash = "sha256-ST4snUcTmmSI1Ogx815KEX9GdMnmubsavDzXCGJkiKs="; }) - (fetchpatch { - name = "Extend-MIOpen-ISA-compatibility.patch"; - url = "https://github.com/GZGavinZhao/MIOpen/commit/416088b534618bd669a765afce59cfc7197064c1.patch"; - hash = "sha256-OwONCA68y8s2GqtQj+OtotXwUXQ5jM8tpeM92iaD4MU="; - }) + # (fetchpatch { + # name = "Extend-MIOpen-ISA-compatibility.patch"; + # url = "https://github.com/GZGavinZhao/MIOpen/commit/416088b534618bd669a765afce59cfc7197064c1.patch"; + # hash = "sha256-OwONCA68y8s2GqtQj+OtotXwUXQ5jM8tpeM92iaD4MU="; + # }) ]; outputs = @@ -139,20 +156,24 @@ stdenv.mkDerivation (finalAttrs: { ++ lib.optionals buildTests [ "test" ]; + enableParallelBuilding = true; + env.ROCM_PATH = clr; + env.LD_LIBRARY_PATH = lib.makeLibraryPath [ rocm-runtime ]; + env.HIP_CLANG_PATH = "${rocm-merged-llvm}/bin"; nativeBuildInputs = [ pkg-config cmake rocm-cmake clr - clang-tools-extra ]; buildInputs = [ + hipblas + hipblas-common rocblas rocmlir - clang-ocl composable_kernel half boost @@ -161,6 +182,8 @@ stdenv.mkDerivation (finalAttrs: { nlohmann_json frugally-deep roctracer + rocrand + hipblaslt ] ++ lib.optionals buildDocs [ latex @@ -178,15 +201,33 @@ stdenv.mkDerivation (finalAttrs: { cmakeFlags = [ - "-DCMAKE_CXX_FLAGS=-Wno-#warnings" # -> - "-DUNZIPPER=${bzip2}/bin/bunzip2" + "-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}" + "-DGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}" + "-DGPU_ARCHS=${lib.concatStringsSep ";" gpuTargets}" + "-DMIOPEN_USE_SQLITE_PERFDB=ON" + "-DCMAKE_VERBOSE_MAKEFILE=ON" + "-DCMAKE_MODULE_PATH=${clr}/hip/cmake" + "-DCMAKE_BUILD_TYPE=Release" + + # needs to stream to stdout so bzcat rather than bunzip2 + "-DUNZIPPER=${bzip2}/bin/bzcat" + + # isnan not defined for float error, probably still needs hipcc? should try without hipcc again next bump + "-DCMAKE_C_COMPILER=amdclang" + "-DCMAKE_CXX_COMPILER=amdclang++" + "-DROCM_PATH=${clr}" + "-DHIP_ROOT_DIR=${clr}" + (lib.cmakeBool "MIOPEN_USE_ROCBLAS" true) + (lib.cmakeBool "MIOPEN_USE_HIPBLASLT" true) + (lib.cmakeBool "MIOPEN_USE_COMPOSABLEKERNEL" true) + (lib.cmakeBool "MIOPEN_USE_HIPRTC" true) + (lib.cmakeBool "MIOPEN_USE_COMGR" true) + "-DCMAKE_HIP_COMPILER_ROCM_ROOT=${clr}" # Manually define CMAKE_INSTALL_ # See: https://github.com/NixOS/nixpkgs/pull/197838 "-DCMAKE_INSTALL_BINDIR=bin" "-DCMAKE_INSTALL_LIBDIR=lib" "-DCMAKE_INSTALL_INCLUDEDIR=include" - "-DCMAKE_C_COMPILER=hipcc" - "-DCMAKE_CXX_COMPILER=hipcc" "-DMIOPEN_BACKEND=HIP" ] ++ lib.optionals buildTests [ @@ -195,24 +236,38 @@ stdenv.mkDerivation (finalAttrs: { ]; postPatch = '' + echo "HACK: disabling clang-tidy" + substituteInPlace cmake/ClangTidy.cmake \ + --replace-fail 'macro(enable_clang_tidy)' 'macro(enable_clang_tidy) + endmacro() + macro(enable_clang_tidy_unused)' \ + --replace-fail 'function(clang_tidy_check TARGET)' 'function(clang_tidy_check TARGET) + return()' + patchShebangs test src/composable_kernel fin utils install_deps.cmake - substituteInPlace CMakeLists.txt \ - --replace "unpack_db(\"\''${CMAKE_SOURCE_DIR}/src/kernels/\''${FILE_NAME}.kdb.bz2\")" "" \ - --replace "MIOPEN_HIP_COMPILER MATCHES \".*clang\\\\+\\\\+$\"" "true" \ - --replace "set(MIOPEN_TIDY_ERRORS ALL)" "" # error: missing required key 'key' + #--replace "unpack_db(\"\''${CMAKE_SOURCE_DIR}/src/kernels/\''${FILE_NAME}.kdb.bz2\")" "" \ + # substituteInPlace CMakeLists.txt \ + # --replace "MIOPEN_HIP_COMPILER MATCHES \".*clang\\\\+\\\\+$\"" "true" \ + # --replace "set(MIOPEN_TIDY_ERRORS ALL)" "" # error: missing required key 'key' substituteInPlace test/gtest/CMakeLists.txt \ --replace "include(googletest)" "" - substituteInPlace test/gtest/CMakeLists.txt \ - --replace-fail " gtest_main " " ${gtest}/lib/libgtest.so ${gtest}/lib/libgtest_main.so " + # substituteInPlace test/gtest/CMakeLists.txt \ + # --replace-fail " gtest_main " " ${gtest}/lib/libgtest.so ${gtest}/lib/libgtest_main.so " ln -sf ${gfx900} src/kernels/gfx900.kdb ln -sf ${gfx906} src/kernels/gfx906.kdb ln -sf ${gfx908} src/kernels/gfx908.kdb ln -sf ${gfx90a} src/kernels/gfx90a.kdb ln -sf ${gfx1030} src/kernels/gfx1030.kdb + mkdir -p build/share/miopen/db/ + ln -sf ${gfx900} build/share/miopen/db/gfx900.kdb + ln -sf ${gfx906} build/share/miopen/db/gfx906.kdb + ln -sf ${gfx908} build/share/miopen/db/gfx908.kdb + ln -sf ${gfx90a} build/share/miopen/db/gfx90a.kdb + ln -sf ${gfx1030} build/share/miopen/db/gfx1030.kdb ''; # Unfortunately, it seems like we have to call make on these manually @@ -249,13 +304,14 @@ stdenv.mkDerivation (finalAttrs: { ) } $test/bin/* ''; + # doCheck = false; # FIXME: clang-tidy really slow :( requiredSystemFeatures = [ "big-parallel" ]; passthru.updateScript = rocmUpdateScript { name = finalAttrs.pname; - owner = finalAttrs.src.owner; - repo = finalAttrs.src.repo; + inherit (finalAttrs.src) owner; + inherit (finalAttrs.src) repo; }; meta = with lib; { @@ -264,8 +320,5 @@ stdenv.mkDerivation (finalAttrs: { license = with licenses; [ mit ]; maintainers = teams.rocm.members; platforms = platforms.linux; - broken = - versions.minor finalAttrs.version != versions.minor stdenv.cc.version - || versionAtLeast finalAttrs.version "7.0.0"; }; }) diff --git a/pkgs/development/rocm-modules/6/miopen/fix-isnan.patch b/pkgs/development/rocm-modules/6/miopen/fix-isnan.patch new file mode 100644 index 0000000000000..71c140d57c8ed --- /dev/null +++ b/pkgs/development/rocm-modules/6/miopen/fix-isnan.patch @@ -0,0 +1,31 @@ +From 17f67e0aa31cd2f1c1cb012d3858abf6956acc72 Mon Sep 17 00:00:00 2001 +From: "Sv. Lockal" +Date: Tue, 24 Dec 2024 14:43:10 +0000 +Subject: [PATCH] Fix missing isnan definition on libstdc++ >=14 systems + +Closes #3441 +--- + driver/reducecalculation_driver.hpp | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/driver/reducecalculation_driver.hpp b/driver/reducecalculation_driver.hpp +index 8226b3c953..2001969509 100644 +--- a/driver/reducecalculation_driver.hpp ++++ b/driver/reducecalculation_driver.hpp +@@ -33,6 +33,7 @@ + #include "random.hpp" + #include + #include ++#include + #include + #include + #include +@@ -77,7 +78,7 @@ int32_t mloReduceCalculationForwardRunHost(miopenTensorDescriptor_t inputDesc, + for(size_t i = 0; i < reduce_size; ++i) + { + Tcheck val = static_cast(input[input_idx]); +- if(nanPropagation && isnan(val)) ++ if(nanPropagation && std::isnan(val)) + { + val = 0.0f; + } diff --git a/pkgs/development/rocm-modules/6/miopen/skip-preexisting-dbs.patch b/pkgs/development/rocm-modules/6/miopen/skip-preexisting-dbs.patch new file mode 100644 index 0000000000000..89be2f4076cb4 --- /dev/null +++ b/pkgs/development/rocm-modules/6/miopen/skip-preexisting-dbs.patch @@ -0,0 +1,22 @@ +diff --git a/CMakeLists.txt b/CMakeLists.txt +index d0ffaf983..0b9ed0952 100644 +--- a/CMakeLists.txt ++++ b/CMakeLists.txt +@@ -554,7 +554,7 @@ endif() + function(unpack_db db_bzip2_file) + get_filename_component(__fname ${db_bzip2_file} NAME_WLE) + add_custom_command(OUTPUT ${KERNELS_BINARY_DIR}/${__fname} +- COMMAND ${UNZIPPER} -dc -k ${db_bzip2_file} > ${KERNELS_BINARY_DIR}/${__fname}) ++ COMMAND test -e ${KERNELS_BINARY_DIR}/${__fname} || ${UNZIPPER} -dc -k ${db_bzip2_file} > ${KERNELS_BINARY_DIR}/${__fname}) + string(REPLACE "." "_" __tname ${__fname}) + add_custom_target(generate_${__tname} ALL DEPENDS ${KERNELS_BINARY_DIR}/${__fname}) + +@@ -563,7 +563,7 @@ function(unpack_db db_bzip2_file) + if(NOT MIOPEN_USE_SQLITE_PERFDB AND __extension STREQUAL ".db") + add_custom_command(OUTPUT ${KERNELS_BINARY_DIR}/${__fname}.txt + DEPENDS sqlite2txt generate_${__tname} +- COMMAND $ ${KERNELS_BINARY_DIR}/${__fname} ${KERNELS_BINARY_DIR}/${__fname}.txt ++ COMMAND test -e ${KERNELS_BINARY_DIR}/${__fname}.txt || $ ${KERNELS_BINARY_DIR}/${__fname} ${KERNELS_BINARY_DIR}/${__fname}.txt + ) + add_custom_target(generate_${__tname}_txt ALL DEPENDS ${KERNELS_BINARY_DIR}/${__fname}.txt) + add_dependencies(generate_kernels generate_${__tname}_txt) diff --git a/pkgs/development/rocm-modules/6/mivisionx/default.nix b/pkgs/development/rocm-modules/6/mivisionx/default.nix index 1b117ad3fc4d8..554ec6a3bd8a1 100644 --- a/pkgs/development/rocm-modules/6/mivisionx/default.nix +++ b/pkgs/development/rocm-modules/6/mivisionx/default.nix @@ -43,13 +43,13 @@ stdenv.mkDerivation (finalAttrs: { "cpu" ); - version = "6.0.2"; + version = "6.3.1"; src = fetchFromGitHub { owner = "ROCm"; repo = "MIVisionX"; rev = "rocm-${finalAttrs.version}"; - hash = "sha256-d32lcJq24MXeIWbNbo6putWaol5kF2io6cz4ZuL+DbE="; + hash = "sha256-SisCbUDCAiWQ1Ue7qrtoT6vO/1ztzqji+3cJD6MXUNw="; }; patches = [ @@ -144,8 +144,8 @@ stdenv.mkDerivation (finalAttrs: { passthru.updateScript = rocmUpdateScript { name = finalAttrs.pname; - owner = finalAttrs.src.owner; - repo = finalAttrs.src.repo; + inherit (finalAttrs.src) owner; + inherit (finalAttrs.src) repo; }; meta = with lib; { @@ -154,8 +154,5 @@ stdenv.mkDerivation (finalAttrs: { license = with licenses; [ mit ]; maintainers = teams.rocm.members; platforms = platforms.linux; - broken = - versions.minor finalAttrs.version != versions.minor stdenv.cc.version - || versionAtLeast finalAttrs.version "7.0.0"; }; }) diff --git a/pkgs/development/rocm-modules/6/mscclpp/default.nix b/pkgs/development/rocm-modules/6/mscclpp/default.nix new file mode 100644 index 0000000000000..e1ac56b768392 --- /dev/null +++ b/pkgs/development/rocm-modules/6/mscclpp/default.nix @@ -0,0 +1,42 @@ +{ + fetchFromGitHub, + stdenv, + cmake, + clr, + numactl, + nlohmann_json, +}: +stdenv.mkDerivation { + pname = "mscclpp"; + version = "0.5.2"; + nativeBuildInputs = [ + cmake + ]; + buildInputs = [ + clr + numactl + ]; + postPatch = '' + substituteInPlace CMakeLists.txt \ + --replace-fail "gfx90a gfx941 gfx942" "gfx908 gfx90a gfx942 gfx1030 gfx1100" + ''; + cmakeFlags = [ + "-DMSCCLPP_BYPASS_GPU_CHECK=ON" + "-DMSCCLPP_USE_ROCM=ON" + "-DMSCCLPP_BUILD_TESTS=OFF" + "-DGPU_TARGETS=gfx908;gfx90a;gfx942;gfx1030;gfx1100" + "-DAMDGPU_TARGETS=gfx908;gfx90a;gfx942;gfx1030;gfx1100" + "-DMSCCLPP_BUILD_APPS_NCCL=ON" + "-DMSCCLPP_BUILD_PYTHON_BINDINGS=OFF" + "-DFETCHCONTENT_QUIET=OFF" + "-DFETCHCONTENT_TRY_FIND_PACKAGE_MODE=ALWAYS" + "-DFETCHCONTENT_SOURCE_DIR_JSON=${nlohmann_json.src}" + ]; + env.ROCM_PATH = clr; + src = fetchFromGitHub { + owner = "microsoft"; + repo = "mscclpp"; + rev = "ee75caf365a27b9ab7521cfdda220b55429e5c37"; + hash = "sha256-/mi9T9T6OIVtJWN3YoEe9az/86rz7BrX537lqaEh3ig="; + }; +} diff --git a/pkgs/development/rocm-modules/6/rccl/default.nix b/pkgs/development/rocm-modules/6/rccl/default.nix index dc5fd0534b061..806316735abc9 100644 --- a/pkgs/development/rocm-modules/6/rccl/default.nix +++ b/pkgs/development/rocm-modules/6/rccl/default.nix @@ -6,18 +6,34 @@ cmake, rocm-cmake, rocm-smi, + rocm-core, clr, + mscclpp, perl, hipify, gtest, chrpath, + rocprofiler, + rocprofiler-register, + autoPatchelfHook, buildTests ? false, - gpuTargets ? [ ], + gpuTargets ? (clr.localGpuTargets or [ ]), }: +let + useAsan = buildTests; + useUbsan = buildTests; + san = lib.optionalString (useAsan || useUbsan) ( + "-fno-gpu-sanitize -fsanitize=undefined " + + (lib.optionalString useAsan "-fsanitize=address -shared-libsan ") + ); +in +# FIXME: infiniband support relies on: +# * kfd_peerdirect support which is on out-of-tree amdkfd in ROCm/ROCK-Kernel-Driver +# * ib_peer_mem support which is ??? and ubuntu has a patchset here https://git.launchpad.net/~ubuntu-kernel/ubuntu/+source/linux/+git/hirsute/commit/?id=e9eb90eb5e4a5aef6f516abbc720038fc0d1a139 stdenv.mkDerivation (finalAttrs: { - pname = "rccl"; - version = "6.0.2"; + pname = "rccl${clr.gpuArchSuffix}"; + version = "6.3.1"; outputs = [ @@ -27,11 +43,17 @@ stdenv.mkDerivation (finalAttrs: { "test" ]; + patches = [ + ./fix-mainline-support-and-ub.diff + ./enable-mscclpp-on-all-gfx9.diff + ./rccl-test-missing-iomanip.diff + ]; + src = fetchFromGitHub { owner = "ROCm"; repo = "rccl"; rev = "rocm-${finalAttrs.version}"; - hash = "sha256-Oyml47yGEB7fALxBcDjqFngS38cnI39sDj94/JV7wE0="; + hash = "sha256-61yvFqloOO6qtn0H6XsAPvJ6LKlOeXgTD/xbjCuB3zQ="; }; nativeBuildInputs = [ @@ -40,12 +62,16 @@ stdenv.mkDerivation (finalAttrs: { clr perl hipify + autoPatchelfHook # ASAN doesn't add rpath without this ]; buildInputs = [ rocm-smi gtest + rocprofiler + rocprofiler-register + mscclpp ] ++ lib.optionals buildTests [ chrpath @@ -53,8 +79,16 @@ stdenv.mkDerivation (finalAttrs: { cmakeFlags = [ - "-DCMAKE_CXX_COMPILER=hipcc" + "-DCMAKE_BUILD_TYPE=Release" + "-DROCM_PATH=${clr}" + "-DHIP_COMPILER=${clr}/bin/amdclang++" + "-DCMAKE_CXX_COMPILER=${clr}/bin/amdclang++" + "-DROCM_PATCH_VERSION=${rocm-core.ROCM_LIBPATCH_VERSION}" + "-DROCM_VERSION=${rocm-core.ROCM_LIBPATCH_VERSION}" "-DBUILD_BFD=OFF" # Can't get it to detect bfd.h + "-DENABLE_MSCCL_KERNEL=ON" + "-DENABLE_MSCCLPP=ON" + "-DMSCCLPP_ROOT=${mscclpp}" # Manually define CMAKE_INSTALL_ # See: https://github.com/NixOS/nixpkgs/pull/197838 "-DCMAKE_INSTALL_BINDIR=bin" @@ -62,32 +96,43 @@ stdenv.mkDerivation (finalAttrs: { "-DCMAKE_INSTALL_INCLUDEDIR=include" ] ++ lib.optionals (gpuTargets != [ ]) [ + # AMD can't make up their minds and keep changing which one is used in different projects. "-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}" + "-DGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}" ] ++ lib.optionals buildTests [ "-DBUILD_TESTS=ON" ]; + makeFlags = [ "-l32" ]; + env.CCC_OVERRIDE_OPTIONS = "+-parallel-jobs=6"; + # -O2 and -fno-strict-aliasing due to UB issues in RCCL :c + # Reported upstream + env.CFLAGS = "-I${clr}/include -O2 -fno-strict-aliasing ${san}-fno-omit-frame-pointer -momit-leaf-frame-pointer"; + env.CXXFLAGS = "-I${clr}/include -O2 -fno-strict-aliasing ${san}-fno-omit-frame-pointer -momit-leaf-frame-pointer"; + env.LDFLAGS = "${san}"; postPatch = '' patchShebangs src tools # Really strange behavior, `#!/usr/bin/env perl` should work... substituteInPlace CMakeLists.txt \ - --replace "\''$ \''${hipify-perl_executable}" "${perl}/bin/perl ${hipify}/bin/hipify-perl" \ - --replace-warn "-parallel-jobs=12" "-parallel-jobs=1" \ - --replace-warn "-parallel-jobs=16" "-parallel-jobs=1" + --replace "\''$ \''${hipify-perl_executable}" "${perl}/bin/perl ${hipify}/bin/hipify-perl" ''; - postInstall = lib.optionalString buildTests '' - mkdir -p $test/bin - mv $out/bin/* $test/bin - rmdir $out/bin - ''; + postInstall = + lib.optionalString useAsan '' + patchelf --add-needed ${clr}/llvm/lib/linux/libclang_rt.asan-x86_64.so $out/lib/librccl.so + '' + + lib.optionalString buildTests '' + mkdir -p $test/bin + mv $out/bin/* $test/bin + rmdir $out/bin + ''; passthru.updateScript = rocmUpdateScript { name = finalAttrs.pname; - owner = finalAttrs.src.owner; - repo = finalAttrs.src.repo; + inherit (finalAttrs.src) owner; + inherit (finalAttrs.src) repo; }; meta = with lib; { @@ -99,8 +144,5 @@ stdenv.mkDerivation (finalAttrs: { ]; maintainers = teams.rocm.members; platforms = platforms.linux; - broken = - versions.minor finalAttrs.version != versions.minor stdenv.cc.version - || versionAtLeast finalAttrs.version "7.0.0"; }; }) diff --git a/pkgs/development/rocm-modules/6/rccl/enable-mscclpp-on-all-gfx9.diff b/pkgs/development/rocm-modules/6/rccl/enable-mscclpp-on-all-gfx9.diff new file mode 100644 index 0000000000000..6fc375921064b --- /dev/null +++ b/pkgs/development/rocm-modules/6/rccl/enable-mscclpp-on-all-gfx9.diff @@ -0,0 +1,13 @@ +diff --git a/src/init.cc b/src/init.cc +index 738f756..1b0e4fc 100644 +--- a/src/init.cc ++++ b/src/init.cc +@@ -2049,7 +2049,7 @@ static ncclResult_t ncclCommInitRankFunc(struct ncclAsyncJob* job_) { + if (mscclEnabled() && (comm->topo->mscclEnabled || mscclForceEnabled()) && mscclppCommCompatible(comm)) { + hipDeviceProp_t devProp; + CUDACHECK(hipGetDeviceProperties(&devProp, cudaDev)); +- comm->mscclppCompatible = IsArchMatch(devProp.gcnArchName, "gfx94"); ++ comm->mscclppCompatible = IsArchMatch(devProp.gcnArchName, "gfx9"); + if (comm->mscclppCompatible) { + bool mapContainsId = (mscclpp_uniqueIdMap.count(job->commId) > 0); + auto& mscclppUniqueId = mscclpp_uniqueIdMap[job->commId]; diff --git a/pkgs/development/rocm-modules/6/rccl/fix-mainline-support-and-ub.diff b/pkgs/development/rocm-modules/6/rccl/fix-mainline-support-and-ub.diff new file mode 100644 index 0000000000000..15efac82c5abd --- /dev/null +++ b/pkgs/development/rocm-modules/6/rccl/fix-mainline-support-and-ub.diff @@ -0,0 +1,178 @@ +diff --git a/src/include/bootstrap.h b/src/include/bootstrap.h +index 8c5f081..9922b79 100644 +--- a/src/include/bootstrap.h ++++ b/src/include/bootstrap.h +@@ -10,11 +10,13 @@ + #include "nccl.h" + #include "comm.h" + ++// this is accessed through unaligned ptrs because ncclUniqueId is a typedef of char[128] + struct ncclBootstrapHandle { + uint64_t magic; + union ncclSocketAddress addr; + }; + static_assert(sizeof(struct ncclBootstrapHandle) <= sizeof(ncclUniqueId), "Bootstrap handle is too large to fit inside NCCL unique ID"); ++static_assert(alignof(struct ncclBootstrapHandle) == alignof(ncclUniqueId), "Bootstrap handle must have same alignment as NCCL unique ID to avoid UB"); + + ncclResult_t bootstrapNetInit(); + ncclResult_t bootstrapCreateRoot(struct ncclBootstrapHandle* handle, bool idFromEnv); +diff --git a/src/misc/rocmwrap.cc b/src/misc/rocmwrap.cc +index b3063d5..464b80d 100644 +--- a/src/misc/rocmwrap.cc ++++ b/src/misc/rocmwrap.cc +@@ -131,9 +131,12 @@ static void initOnceFunc() { + //format and store the kernel conf file location + snprintf(kernel_conf_file, sizeof(kernel_conf_file), "/boot/config-%s", utsname.release); + fp = fopen(kernel_conf_file, "r"); +- if (fp == NULL) INFO(NCCL_INIT,"Could not open kernel conf file"); ++ if (fp == NULL) { ++ INFO(NCCL_INIT,"Could not open kernel conf file, will assume CONFIG_DMABUF_MOVE_NOTIFY and CONFIG_PCI_P2PDMA are enabled"); ++ } + //look for kernel_opt1 and kernel_opt2 in the conf file and check +- while (fgets(buf, sizeof(buf), fp) != NULL) { ++ // FIXME: This check is broken, CONFIG_DMABUF_MOVE_NOTIFY could be across a buf boundary. ++ while (fp && fgets(buf, sizeof(buf), fp) != NULL) { + if (strstr(buf, kernel_opt1) != NULL) { + found_opt1 = 1; + INFO(NCCL_INIT,"CONFIG_DMABUF_MOVE_NOTIFY=y in /boot/config-%s", utsname.release); +@@ -143,11 +146,12 @@ static void initOnceFunc() { + INFO(NCCL_INIT,"CONFIG_PCI_P2PDMA=y in /boot/config-%s", utsname.release); + } + } +- if (!found_opt1 || !found_opt2) { ++ if (fp && (!found_opt1 || !found_opt2)) { + dmaBufSupport = 0; + INFO(NCCL_INIT, "CONFIG_DMABUF_MOVE_NOTIFY and CONFIG_PCI_P2PDMA should be set for DMA_BUF in /boot/config-%s", utsname.release); + INFO(NCCL_INIT, "DMA_BUF_SUPPORT Failed due to OS kernel support"); + } ++ if (fp) fclose(fp); + + if(dmaBufSupport) INFO(NCCL_INIT, "DMA_BUF Support Enabled"); + else goto error; +diff --git a/src/nccl.h.in b/src/nccl.h.in +index 1d127b0..6296073 100644 +--- a/src/nccl.h.in ++++ b/src/nccl.h.in +@@ -39,7 +39,7 @@ typedef struct ncclComm* ncclComm_t; + #define NCCL_UNIQUE_ID_BYTES 128 + /*! @brief Opaque unique id used to initialize communicators + @details The ncclUniqueId must be passed to all participating ranks */ +-typedef struct { char internal[NCCL_UNIQUE_ID_BYTES]; /*!< Opaque array>*/} ncclUniqueId; ++typedef struct alignas(int64_t) { char internal[NCCL_UNIQUE_ID_BYTES]; /*!< Opaque array>*/} ncclUniqueId; + + /*! @defgroup rccl_result_code Result Codes + @details The various result codes that RCCL API calls may return +diff --git a/src/proxy.cc b/src/proxy.cc +index 50e5437..51bb401 100644 +--- a/src/proxy.cc ++++ b/src/proxy.cc +@@ -965,7 +965,11 @@ struct ncclProxyConnectionPool { + + static ncclResult_t ncclProxyNewConnection(struct ncclProxyConnectionPool* pool, int* id) { + if (pool->offset == NCCL_PROXY_CONN_POOL_SIZE) { +- NCCLCHECK(ncclRealloc(&pool->pools, pool->banks, pool->banks+1)); ++ if (pool->pools) { ++ NCCLCHECK(ncclRealloc(&pool->pools, pool->banks, pool->banks+1)); ++ } else { ++ NCCLCHECK(ncclCalloc(&pool->pools, pool->banks+1)); ++ } + NCCLCHECK(ncclCalloc(pool->pools+pool->banks, NCCL_PROXY_CONN_POOL_SIZE)); + pool->banks++; + pool->offset = 0; +diff --git a/src/transport/net_ib.cc b/src/transport/net_ib.cc +index 6d77784..49762d3 100644 +--- a/src/transport/net_ib.cc ++++ b/src/transport/net_ib.cc +@@ -573,7 +573,7 @@ ncclResult_t ncclIbGdrSupport() { + // Requires support from NIC driver modules + // Use ONLY for debugging! + moduleLoaded = 1; +- INFO(NCCL_INIT, "RCCL_FORCE_ENABLE_GDRDMA = 1, so explicitly setting moduleLoaded = 1"); ++ INFO(NCCL_INIT, "ncclIbGdrSupport: RCCL_FORCE_ENABLE_GDRDMA = 1, so explicitly setting moduleLoaded = 1"); + } + + if (moduleLoaded == -1) { +@@ -586,13 +586,14 @@ ncclResult_t ncclIbGdrSupport() { + // or created under a different path like `/sys/kernel/` or `/sys/` (depending on your ib_peer_mem module) + const char* memory_peers_paths[] = {"/sys/kernel/mm/memory_peers/amdkfd/version", + "/sys/kernel/memory_peers/amdkfd/version", +- "/sys/memory_peers/amdkfd/version"}; ++ "/sys/memory_peers/amdkfd/version", ++ NULL}; + int i = 0; + + while (memory_peers_paths[i]) { + if (access(memory_peers_paths[i], F_OK) == 0) { + moduleLoaded = 1; +- INFO(NCCL_INIT,"Found %s", memory_peers_paths[i]); ++ INFO(NCCL_INIT,"ncclIbGdrSupport: Found %s", memory_peers_paths[i]); + break; + } else { + moduleLoaded = 0; +@@ -612,22 +613,23 @@ ncclResult_t ncclIbGdrSupport() { + if (moduleLoaded == 0) { + // Check for `ib_register_peer_memory_client` symbol in `/proc/kallsyms` + // if your system uses native OS ib_peer module +- char buf[256]; +- FILE *fp = NULL; +- fp = fopen("/proc/kallsyms", "r"); ++ FILE *fp = fopen("/proc/kallsyms", "r"); ++ char *line = NULL; ++ size_t len = 0; + + if (fp == NULL) { +- INFO(NCCL_INIT,"Could not open /proc/kallsyms"); ++ INFO(NCCL_INIT,"ncclIbGdrSupport: Could not open /proc/kallsyms to check for ib_register_peer_memory_client"); + } else { +- while (fgets(buf, sizeof(buf), fp) != NULL) { +- if (strstr(buf, "t ib_register_peer_memory_client") != NULL || +- strstr(buf, "T ib_register_peer_memory_client") != NULL) { ++ while (getline(&line, &len, fp) > 0) { ++ if (line && strstr(line, "ib_register_peer_memory_client") != NULL) { + moduleLoaded = 1; +- INFO(NCCL_INIT,"Found ib_register_peer_memory_client in /proc/kallsyms"); ++ INFO(NCCL_INIT,"ncclIbGdrSupport: Found ib_register_peer_memory_client in /proc/kallsyms"); + break; + } + } + } ++ if (line) free(line); ++ if (fp) fclose(fp); + } + #else + // Check for the nv_peer_mem module being loaded +@@ -637,7 +639,7 @@ ncclResult_t ncclIbGdrSupport() { + #endif + } + if (moduleLoaded == 0) { +- INFO(NCCL_INIT,"GDRDMA not enabled. Could not find memory_peers directory or peer_memory symbol"); ++ INFO(NCCL_INIT,"ncclIbGdrSupport: GDRDMA not enabled. Could not find memory_peers directory or peer_memory symbol"); + return ncclSystemError; + } + return ncclSuccess; +diff --git a/tools/ib-test/include/nccl.h b/tools/ib-test/include/nccl.h +index 2c86c33..5801c61 100755 +--- a/tools/ib-test/include/nccl.h ++++ b/tools/ib-test/include/nccl.h +@@ -31,7 +31,7 @@ extern "C" { + typedef struct ncclComm* ncclComm_t; + + #define NCCL_UNIQUE_ID_BYTES 128 +-typedef struct { char internal[NCCL_UNIQUE_ID_BYTES]; } ncclUniqueId; ++typedef struct alignas(int64_t) { char internal[NCCL_UNIQUE_ID_BYTES]; } ncclUniqueId; + + /* Error type */ + typedef enum { ncclSuccess = 0, +diff --git a/tools/topo_expl/include/nccl.h b/tools/topo_expl/include/nccl.h +index 729561b..4e4bdd9 100644 +--- a/tools/topo_expl/include/nccl.h ++++ b/tools/topo_expl/include/nccl.h +@@ -35,7 +35,7 @@ typedef struct ncclComm* ncclComm_t; + #define NCCL_COMM_NULL NULL + + #define NCCL_UNIQUE_ID_BYTES 128 +-typedef struct { char internal[NCCL_UNIQUE_ID_BYTES]; } ncclUniqueId; ++typedef struct alignas(int64_t) { char internal[NCCL_UNIQUE_ID_BYTES]; } ncclUniqueId; + + /*! @brief Error type */ + typedef enum { ncclSuccess = 0, diff --git a/pkgs/development/rocm-modules/6/rccl/rccl-test-missing-iomanip.diff b/pkgs/development/rocm-modules/6/rccl/rccl-test-missing-iomanip.diff new file mode 100644 index 0000000000000..3cc54a83c1429 --- /dev/null +++ b/pkgs/development/rocm-modules/6/rccl/rccl-test-missing-iomanip.diff @@ -0,0 +1,10 @@ +--- a/test/common/TestBed.cpp ++++ b/test/common/TestBed.cpp +@@ -4,6 +4,7 @@ + * See LICENSE.txt for license information + ************************************************************************/ + #include ++#include + #include "TestBed.hpp" + #include + diff --git a/pkgs/development/rocm-modules/6/rdc/default.nix b/pkgs/development/rocm-modules/6/rdc/default.nix index 2422ef71579c7..fc28202a11f1a 100644 --- a/pkgs/development/rocm-modules/6/rdc/default.nix +++ b/pkgs/development/rocm-modules/6/rdc/default.nix @@ -4,6 +4,7 @@ fetchFromGitHub, rocmUpdateScript, cmake, + amdsmi, rocm-smi, rocm-runtime, libcap, @@ -46,7 +47,7 @@ let in stdenv.mkDerivation (finalAttrs: { pname = "rdc"; - version = "6.0.2"; + version = "6.3.1"; outputs = [ @@ -63,7 +64,7 @@ stdenv.mkDerivation (finalAttrs: { owner = "ROCm"; repo = "rdc"; rev = "rocm-${finalAttrs.version}"; - hash = "sha256-QugcajxILmDeQiWG5uAUO41Wut45irg2Ynufgn1bmps="; + hash = "sha256-sKsti7LeWsxvOmc9h/srsl0OmHkJIRNRiV+8mFVG3/M="; }; nativeBuildInputs = @@ -79,6 +80,7 @@ stdenv.mkDerivation (finalAttrs: { buildInputs = [ + amdsmi rocm-smi rocm-runtime libcap @@ -126,8 +128,8 @@ stdenv.mkDerivation (finalAttrs: { passthru.updateScript = rocmUpdateScript { name = finalAttrs.pname; - owner = finalAttrs.src.owner; - repo = finalAttrs.src.repo; + inherit (finalAttrs.src) owner; + inherit (finalAttrs.src) repo; }; meta = with lib; { @@ -136,7 +138,5 @@ stdenv.mkDerivation (finalAttrs: { license = with licenses; [ mit ]; maintainers = teams.rocm.members; platforms = platforms.linux; - # broken = versions.minor finalAttrs.version != versions.minor rocm-smi.version || versionAtLeast finalAttrs.version "7.0.0"; - broken = true; # Too many errors, unsure how to fix }; }) diff --git a/pkgs/development/rocm-modules/6/rocalution/default.nix b/pkgs/development/rocm-modules/6/rocalution/default.nix index 48cc9ab3f8ccb..fc50702cabb2a 100644 --- a/pkgs/development/rocm-modules/6/rocalution/default.nix +++ b/pkgs/development/rocm-modules/6/rocalution/default.nix @@ -11,6 +11,7 @@ rocrand, clr, git, + pkg-config, openmp, openmpi, gtest, @@ -22,7 +23,7 @@ stdenv.mkDerivation (finalAttrs: { pname = "rocalution"; - version = "6.0.2"; + version = "6.3.1"; outputs = [ @@ -42,7 +43,7 @@ stdenv.mkDerivation (finalAttrs: { owner = "ROCm"; repo = "rocALUTION"; rev = "rocm-${finalAttrs.version}"; - hash = "sha256-mrN+CI2mqaMi8oKxui7HAIE2qSn50aNaFipkWwYMtbc="; + hash = "sha256-xdZ3HUiRGsreHfJH8RgL/s3jGyC5ABmBKcEfgtqWg8Y="; }; nativeBuildInputs = [ @@ -50,6 +51,7 @@ stdenv.mkDerivation (finalAttrs: { rocm-cmake clr git + pkg-config ]; buildInputs = @@ -65,9 +67,12 @@ stdenv.mkDerivation (finalAttrs: { gtest ]; + CXXFLAGS = "-I${openmp.dev}/include"; cmakeFlags = [ - "-DCMAKE_CXX_COMPILER=hipcc" + "-DOpenMP_C_INCLUDE_DIR=${openmp.dev}/include" + "-DOpenMP_CXX_INCLUDE_DIR=${openmp.dev}/include" + "-DOpenMP_omp_LIBRARY=${openmp}/lib" "-DROCM_PATH=${clr}" "-DHIP_ROOT_DIR=${clr}" "-DSUPPORT_HIP=ON" @@ -82,6 +87,7 @@ stdenv.mkDerivation (finalAttrs: { ] ++ lib.optionals (gpuTargets != [ ]) [ "-DAMDGPU_TARGETS=${lib.strings.concatStringsSep ";" gpuTargets}" + "-DGPU_TARGETS=${lib.strings.concatStringsSep ";" gpuTargets}" ] ++ lib.optionals buildTests [ "-DBUILD_CLIENTS_TESTS=ON" @@ -115,8 +121,8 @@ stdenv.mkDerivation (finalAttrs: { passthru.updateScript = rocmUpdateScript { name = finalAttrs.pname; - owner = finalAttrs.src.owner; - repo = finalAttrs.src.repo; + inherit (finalAttrs.src) owner; + inherit (finalAttrs.src) repo; }; meta = with lib; { @@ -125,8 +131,5 @@ stdenv.mkDerivation (finalAttrs: { license = with licenses; [ mit ]; maintainers = teams.rocm.members; platforms = platforms.linux; - broken = - versions.minor finalAttrs.version != versions.minor stdenv.cc.version - || versionAtLeast finalAttrs.version "7.0.0"; }; }) diff --git a/pkgs/development/rocm-modules/6/rocblas/default.nix b/pkgs/development/rocm-modules/6/rocblas/default.nix index 7c5fbe801b193..9ef135ca34769 100644 --- a/pkgs/development/rocm-modules/6/rocblas/default.nix +++ b/pkgs/development/rocm-modules/6/rocblas/default.nix @@ -2,7 +2,6 @@ lib, stdenv, fetchFromGitHub, - fetchpatch, rocmUpdateScript, cmake, rocm-cmake, @@ -14,21 +13,24 @@ gtest, gfortran, openmp, + git, amd-blis, + zstd, + hipblas-common, + hipblaslt, python3Packages, + rocm-smi, + writeShellScriptBin, buildTensile ? true, - buildTests ? false, - buildBenchmarks ? false, - tensileLogic ? "asm_full", - tensileCOVersion ? "default", + buildTests ? true, + buildBenchmarks ? true, # https://github.com/ROCm/Tensile/issues/1757 # Allows gfx101* users to use rocBLAS normally. # Turn the below two values to `true` after the fix has been cherry-picked # into a release. Just backporting that single fix is not enough because it # depends on some previous commits. - tensileSepArch ? false, - tensileLazyLib ? false, - tensileLibFormat ? "msgpack", + tensileSepArch ? true, + tensileLazyLib ? true, # `gfx940`, `gfx941` are not present in this list because they are early # engineering samples, and all final MI300 hardware are `gfx942`: # https://github.com/NixOS/nixpkgs/pull/298388#issuecomment-2032791130 @@ -37,38 +39,51 @@ # would force all `gfx101*` GPUs to run as `gfx1010`, so `gfx101*` GPUs will # always try to use `gfx1010` code objects, hence building for `gfx1012` is # useless: https://github.com/NixOS/nixpkgs/pull/298388#issuecomment-2076327152 - gpuTargets ? [ - "gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx942;gfx1010;gfx1030;gfx1100;gfx1101;gfx1102" - ], + gpuTargets ? ( + clr.localGpuTargets or [ + "gfx900" + "gfx906" + "gfx908" + "gfx90a" + "gfx942" + "gfx1010" + "gfx1030" + "gfx1100" + "gfx1101" + "gfx1102" + ] + ), }: +# FIXME: this derivation is ludicrously large, can we do anything about this? +let + gpuTargets' = lib.concatStringsSep ";" gpuTargets; +in stdenv.mkDerivation (finalAttrs: { - pname = "rocblas"; - version = "6.0.2"; + pname = "rocblas${clr.gpuArchSuffix}"; + version = "6.3.1"; - outputs = - [ - "out" - ] - ++ lib.optionals buildTests [ - "test" - ] - ++ lib.optionals buildBenchmarks [ - "benchmark" - ]; + outputs = [ + "out" + ]; src = fetchFromGitHub { owner = "ROCm"; repo = "rocBLAS"; rev = "rocm-${finalAttrs.version}"; - hash = "sha256-G68d/gvBbTdNx8xR3xY+OkBm5Yxq1NFjxby9BbpOcUk="; + hash = "sha256-IYcrVcGH4yZDkFZeNOJPfG0qsPS/WiH0fTSUSdo1BH4="; }; nativeBuildInputs = [ cmake + # no ninja, it buffers console output and nix times out long periods of no output rocm-cmake clr + git + (writeShellScriptBin "amdclang++" '' + exec clang++ "$@" + '') ] ++ lib.optionals buildTensile [ tensile @@ -77,12 +92,15 @@ stdenv.mkDerivation (finalAttrs: { buildInputs = [ python3 + hipblas-common + hipblaslt ] ++ lib.optionals buildTensile [ + zstd msgpack libxml2 python3Packages.msgpack - python3Packages.joblib + python3Packages.zstandard ] ++ lib.optionals buildTests [ gtest @@ -91,58 +109,90 @@ stdenv.mkDerivation (finalAttrs: { gfortran openmp amd-blis + rocm-smi ] ++ lib.optionals (buildTensile || buildTests || buildBenchmarks) [ python3Packages.pyyaml ]; + dontStrip = true; + env.CXXFLAGS = + "-O3 -DNDEBUG -I${hipblas-common}/include" + + lib.optionalString (buildTests || buildBenchmarks) " -I${amd-blis}/include/blis"; + # Fails to link tests if we don't add amd-blis libs + env.LDFLAGS = lib.optionalString ( + buildTests || buildBenchmarks + ) "-Wl,--as-needed -L${amd-blis}/lib -lblis-mt -lcblas"; + env.TENSILE_ROCM_ASSEMBLER_PATH = "${stdenv.cc}/bin/clang++"; + cmakeFlags = [ - (lib.cmakeFeature "CMAKE_C_COMPILER" "hipcc") - (lib.cmakeFeature "CMAKE_CXX_COMPILER" "hipcc") + "-DCMAKE_BUILD_TYPE=Release" + # "-DCMAKE_INTERPROCEDURAL_OPTIMIZATION=ON" + "-DCMAKE_VERBOSE_MAKEFILE=ON" + (lib.cmakeFeature "CMAKE_EXECUTE_PROCESS_COMMAND_ECHO" "STDERR") + "-DCMAKE_Fortran_COMPILER=${lib.getBin gfortran}/bin/gfortran" + "-DCMAKE_Fortran_COMPILER_AR=${lib.getBin gfortran}/bin/ar" + "-DCMAKE_Fortran_COMPILER_RANLIB=${lib.getBin gfortran}/bin/ranlib" + # FIXME: AR and RANLIB might need passed `--plugin=$(gfortran --print-file-name=liblto_plugin.so)` (lib.cmakeFeature "python" "python3") - (lib.cmakeFeature "AMDGPU_TARGETS" (lib.concatStringsSep ";" gpuTargets)) + "-DSUPPORTED_TARGETS=${gpuTargets'}" + "-DAMDGPU_TARGETS=${gpuTargets'}" + "-DGPU_TARGETS=${gpuTargets'}" (lib.cmakeBool "BUILD_WITH_TENSILE" buildTensile) (lib.cmakeBool "ROCM_SYMLINK_LIBS" false) (lib.cmakeFeature "ROCBLAS_TENSILE_LIBRARY_DIR" "lib/rocblas") (lib.cmakeBool "BUILD_CLIENTS_TESTS" buildTests) (lib.cmakeBool "BUILD_CLIENTS_BENCHMARKS" buildBenchmarks) - # rocblas header files are not installed unless we set this - (lib.cmakeFeature "CMAKE_INSTALL_INCLUDEDIR" "include") + (lib.cmakeBool "BUILD_CLIENTS_SAMPLES" buildBenchmarks) + (lib.cmakeBool "BUILD_OFFLOAD_COMPRESS" true) + # Temporarily set variables to work around upstream CMakeLists issue + # Can be removed once https://github.com/ROCm/rocm-cmake/issues/121 is fixed + "-DCMAKE_INSTALL_BINDIR=bin" + "-DCMAKE_INSTALL_INCLUDEDIR=include" + "-DCMAKE_INSTALL_LIBDIR=lib" ] ++ lib.optionals buildTensile [ + "-DCPACK_SET_DESTDIR=OFF" + "-DLINK_BLIS=ON" + "-DTensile_CODE_OBJECT_VERSION=default" + "-DTensile_LOGIC=asm_full" + "-DTensile_LIBRARY_FORMAT=msgpack" (lib.cmakeBool "BUILD_WITH_PIP" false) - (lib.cmakeFeature "Tensile_LOGIC" tensileLogic) - (lib.cmakeFeature "Tensile_CODE_OBJECT_VERSION" tensileCOVersion) (lib.cmakeBool "Tensile_SEPARATE_ARCHITECTURES" tensileSepArch) (lib.cmakeBool "Tensile_LAZY_LIBRARY_LOADING" tensileLazyLib) - (lib.cmakeFeature "Tensile_LIBRARY_FORMAT" tensileLibFormat) - (lib.cmakeBool "Tensile_PRINT_DEBUG" true) - ] - ++ lib.optionals (buildTests || buildBenchmarks) [ - (lib.cmakeFeature "CMAKE_CXX_FLAGS" "-I${amd-blis}/include/blis") ]; + preConfigure = '' + makeFlagsArray+=("-l$(nproc)") + ''; + + passthru.amdgpu_targets = gpuTargets'; + patches = [ - (fetchpatch { - name = "Extend-rocBLAS-HIP-ISA-compatibility.patch"; - url = "https://github.com/GZGavinZhao/rocBLAS/commit/89b75ff9cc731f71f370fad90517395e117b03bb.patch"; - hash = "sha256-W/ohOOyNCcYYLOiQlPzsrTlNtCBdJpKVxO8s+4G7sjo="; - }) + # (fetchpatch { + # name = "Extend-rocBLAS-HIP-ISA-compatibility.patch"; + # url = "https://github.com/GZGavinZhao/rocBLAS/commit/89b75ff9cc731f71f370fad90517395e117b03bb.patch"; + # hash = "sha256-W/ohOOyNCcYYLOiQlPzsrTlNtCBdJpKVxO8s+4G7sjo="; + # }) + # ./offload-compress.diff ]; # Pass $NIX_BUILD_CORES to Tensile postPatch = '' substituteInPlace cmake/build-options.cmake \ --replace-fail 'Tensile_CPU_THREADS ""' 'Tensile_CPU_THREADS "$ENV{NIX_BUILD_CORES}"' + substituteInPlace CMakeLists.txt \ + --replace-fail "4.42.0" "4.43.0" ''; passthru.updateScript = rocmUpdateScript { name = finalAttrs.pname; - owner = finalAttrs.src.owner; - repo = finalAttrs.src.repo; + inherit (finalAttrs.src) owner; + inherit (finalAttrs.src) repo; }; + enableParallelBuilding = true; requiredSystemFeatures = [ "big-parallel" ]; meta = with lib; { @@ -151,8 +201,5 @@ stdenv.mkDerivation (finalAttrs: { license = with licenses; [ mit ]; maintainers = teams.rocm.members; platforms = platforms.linux; - broken = - versions.minor finalAttrs.version != versions.minor stdenv.cc.version - || versionAtLeast finalAttrs.version "7.0.0"; }; }) diff --git a/pkgs/development/rocm-modules/6/rocblas/offload-compress.diff b/pkgs/development/rocm-modules/6/rocblas/offload-compress.diff new file mode 100644 index 0000000000000..c651eab199cd3 --- /dev/null +++ b/pkgs/development/rocm-modules/6/rocblas/offload-compress.diff @@ -0,0 +1,68 @@ +diff --git a/cmake/build-options.cmake b/cmake/build-options.cmake +index 379f8d889..e9de46914 100644 +--- a/cmake/build-options.cmake ++++ b/cmake/build-options.cmake +@@ -27,6 +27,8 @@ + # presented in the superbuild GUI, but then passed into the ExternalProject as -D + # parameters, which would already define them. + ++include(CheckCXXCompilerFlag) ++ + option( BUILD_VERBOSE "Output additional build information" OFF ) + + # BUILD_SHARED_LIBS is a cmake built-in; we make it an explicit option such that it shows in cmake-gui +@@ -46,6 +48,15 @@ endif() + # this file is intended to be loaded by toolchain or early as sets global compiler flags + # rocm-cmake checks will throw warnings if set later as cmake watchers installed + ++ ++option(BUILD_OFFLOAD_COMPRESS "Build rocBLAS with offload compression" ON) ++if (BUILD_OFFLOAD_COMPRESS) ++ check_cxx_compiler_flag("--offload-compress" CXX_COMPILER_SUPPORTS_OFFLOAD_COMPRESS) ++ if (NOT CXX_COMPILER_SUPPORTS_OFFLOAD_COMPRESS) ++ message( STATUS "WARNING: BUILD_OFFLOAD_COMPRESS=ON but flag not supported by compiler. Ignoring option." ) ++ endif() ++endif() ++ + # FOR OPTIONAL CODE COVERAGE + option(BUILD_CODE_COVERAGE "Build rocBLAS with code coverage enabled" OFF) + +diff --git a/library/CMakeLists.txt b/library/CMakeLists.txt +index 2e94e19ea..161a443e0 100644 +--- a/library/CMakeLists.txt ++++ b/library/CMakeLists.txt +@@ -71,6 +71,10 @@ function( rocblas_library_settings lib_target_ ) + set_target_properties( ${lib_target_} PROPERTIES CXX_EXTENSIONS NO ) + set_target_properties( ${lib_target_} PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/staging" ) + ++ if(BUILD_OFFLOAD_COMPRESS AND CXX_COMPILER_SUPPORTS_OFFLOAD_COMPRESS) ++ set_target_properties( ${lib_target_} PROPERTIES COMPILE_FLAGS "--offload-compress" ) ++ endif() ++ + target_link_libraries( ${lib_target_} INTERFACE hip::host ) + if (WIN32) + target_link_libraries( ${lib_target_} PRIVATE hip::device ) +diff --git a/rmake.py b/rmake.py +index 45335278f..f1f8561b0 100755 +--- a/rmake.py ++++ b/rmake.py +@@ -133,6 +133,9 @@ def parse_args(): + experimental_opts.add_argument( '--no-msgpack', dest='tensile_msgpack_backend', required=False, default=True, action='store_false', + help='Build Tensile backend not to use MessagePack and so use YAML (optional)') + ++ general_opts.add_argument( '--no-offload-compress', dest='no_offload_compress', required=False, default=False, action='store_true', ++ help='Do not apply offload compression.') ++ + general_opts.add_argument( '-r', '--relocatable', required=False, default=False, action='store_true', + help='Linux only: Add RUNPATH (based on ROCM_RPATH) and remove ldconf entry.') + +@@ -399,6 +402,9 @@ def config_cmd(): + if args.address_sanitizer: + cmake_options.append(f"-DBUILD_ADDRESS_SANITIZER=ON") + ++ if args.no_offload_compress: ++ cmake_options.append(f"-DBUILD_OFFLOAD_COMPRESS=OFF") ++ + # clean + delete_dir(build_path) + diff --git a/pkgs/development/rocm-modules/6/rocblas/offload-compress.py b/pkgs/development/rocm-modules/6/rocblas/offload-compress.py new file mode 100644 index 0000000000000..48a3bf2617780 --- /dev/null +++ b/pkgs/development/rocm-modules/6/rocblas/offload-compress.py @@ -0,0 +1,85 @@ +# Compress standalone hsaco/co files the way clang-offload-bundler does +# https://clang.llvm.org/docs/ClangOffloadBundler.html#compression-and-decompression +import zstandard as zstd +import struct +import hashlib +import os +import argparse +import glob + +# Constants +MAGIC_NUMBER = b'CCOB' +VERSION = 2 +COMPRESSION_METHOD_ZSTD = 1 # Assuming 1 represents zstd in the LLVM compression enumeration + +def calculate_md5(data): + return hashlib.md5(data).digest()[:8] # 64-bit truncated MD5 hash + +# struct __ClangOffloadBundleCompressedHeader { +# const char magic[kOffloadBundleCompressedMagicStrSize - 1]; +# uint16_t versionNumber; +# uint16_t compressionMethod; +# uint32_t totalSize; +# uint32_t uncompressedBinarySize; +# uint64_t Hash; +# const char compressedBinarydesc[1]; +# }; + +def compress_file(input_file): + # Read the input file + with open(input_file, 'rb') as f: + uncompressed_data = f.read() + + if uncompressed_data[0:len(MAGIC_NUMBER)] == MAGIC_NUMBER: + print(f"{input_file} already compressed, skipping") + return + + # Compress the data + cctx = zstd.ZstdCompressor() + compressed_data = cctx.compress(uncompressed_data) + + # Calculate hash + hash_value = calculate_md5(uncompressed_data) + + # Create header + header = struct.pack('@4sHHII8s', + MAGIC_NUMBER, + VERSION, + COMPRESSION_METHOD_ZSTD, + len(compressed_data) + 24, # Total file size (header + compressed data) + len(uncompressed_data), + hash_value) + + # Write compressed file + with open(input_file, 'wb') as f: + f.write(header) + f.write(compressed_data) + +def process_directory(directory): + # Get all .hsaco and .co files in the directory + files_to_compress = list(glob.glob(os.path.join(directory, '**', '*.hsaco'), recursive=True) + glob.glob(os.path.join(directory, '**', '*.co'), recursive=True)) + + successes = 0 + for file in files_to_compress: + try: + compress_file(file) + print(f"Compressed: {file}") + successes += 1 + except Exception as e: + print(f"Error compressing {file}: {str(e)}") + + print(f"Compression complete. Compressed {successes: 5d} / {len(files_to_compress): 5d}") + +def main(): + parser = argparse.ArgumentParser(description="Compress .hsaco and .co files in a directory using zstd.") + parser.add_argument("directory", help="Directory containing files to compress") + args = parser.parse_args() + + if not os.path.isdir(args.directory): + print(f"Error: {args.directory} is not a valid directory") + return + + process_directory(args.directory) + +if __name__ == '__main__': + main() diff --git a/pkgs/development/rocm-modules/6/rocdbgapi/default.nix b/pkgs/development/rocm-modules/6/rocdbgapi/default.nix index 9d1006c32945c..67541854c59de 100644 --- a/pkgs/development/rocm-modules/6/rocdbgapi/default.nix +++ b/pkgs/development/rocm-modules/6/rocdbgapi/default.nix @@ -1,66 +1,76 @@ -{ lib -, stdenv -, fetchFromGitHub -, rocmUpdateScript -, cmake -, rocm-cmake -, git -, rocm-comgr -, rocm-runtime -, hwdata -, texliveSmall -, doxygen -, graphviz -, buildDocs ? true +{ + lib, + stdenv, + fetchFromGitHub, + rocmUpdateScript, + cmake, + rocm-cmake, + git, + rocm-comgr, + rocm-runtime, + hwdata, + texliveSmall, + doxygen, + graphviz, + buildDocs ? true, }: let - latex = lib.optionalAttrs buildDocs (texliveSmall.withPackages (ps: with ps; [ - changepage - latexmk - varwidth - multirow - hanging - adjustbox - collectbox - stackengine - enumitem - alphalph - wasysym - sectsty - tocloft - newunicodechar - etoc - helvetic - wasy - courier - ])); -in stdenv.mkDerivation (finalAttrs: { + latex = lib.optionalAttrs buildDocs ( + texliveSmall.withPackages ( + ps: with ps; [ + changepage + latexmk + varwidth + multirow + hanging + adjustbox + collectbox + stackengine + enumitem + alphalph + wasysym + sectsty + tocloft + newunicodechar + etoc + helvetic + wasy + courier + ] + ) + ); +in +stdenv.mkDerivation (finalAttrs: { pname = "rocdbgapi"; - version = "6.0.2"; + version = "6.3.1"; - outputs = [ - "out" - ] ++ lib.optionals buildDocs [ - "doc" - ]; + outputs = + [ + "out" + ] + ++ lib.optionals buildDocs [ + "doc" + ]; src = fetchFromGitHub { owner = "ROCm"; repo = "ROCdbgapi"; rev = "rocm-${finalAttrs.version}"; - hash = "sha256-+CxaTmxRt/RicqQddqIEHs8vvAPCMKXkWg7kbZvnUsQ="; + hash = "sha256-6itfBrWVspobU47aiJAOQoxT8chwrq9scRn0or3bXto="; }; - nativeBuildInputs = [ - cmake - rocm-cmake - git - ] ++ lib.optionals buildDocs [ - latex - doxygen - graphviz - ]; + nativeBuildInputs = + [ + cmake + rocm-cmake + git + ] + ++ lib.optionals buildDocs [ + latex + doxygen + graphviz + ]; buildInputs = [ rocm-comgr @@ -83,21 +93,23 @@ in stdenv.mkDerivation (finalAttrs: { make -j$NIX_BUILD_CORES doc ''; - postInstall = '' - substituteInPlace $out/lib/cmake/amd-dbgapi/amd-dbgapi-config.cmake \ - --replace "/build/source/build/" "" + postInstall = + '' + substituteInPlace $out/lib/cmake/amd-dbgapi/amd-dbgapi-config.cmake \ + --replace "/build/source/build/" "" - substituteInPlace $out/lib/cmake/amd-dbgapi/amd-dbgapi-targets.cmake \ - --replace "/build/source/build" "$out" - '' + lib.optionalString buildDocs '' - mv $out/share/html/amd-dbgapi $doc/share/doc/amd-dbgapi/html - rmdir $out/share/html - ''; + substituteInPlace $out/lib/cmake/amd-dbgapi/amd-dbgapi-targets.cmake \ + --replace "/build/source/build" "$out" + '' + + lib.optionalString buildDocs '' + mv $out/share/html/amd-dbgapi $doc/share/doc/amd-dbgapi/html + rmdir $out/share/html + ''; passthru.updateScript = rocmUpdateScript { name = finalAttrs.pname; - owner = finalAttrs.src.owner; - repo = finalAttrs.src.repo; + inherit (finalAttrs.src) owner; + inherit (finalAttrs.src) repo; }; meta = with lib; { @@ -106,6 +118,5 @@ in stdenv.mkDerivation (finalAttrs: { license = with licenses; [ mit ]; maintainers = teams.rocm.members; platforms = platforms.linux; - broken = versions.minor finalAttrs.version != versions.minor stdenv.cc.version || versionAtLeast finalAttrs.version "7.0.0"; }; }) diff --git a/pkgs/development/rocm-modules/6/rocfft/default.nix b/pkgs/development/rocm-modules/6/rocfft/default.nix index 92f51a161a48d..3a53ab635fa8a 100644 --- a/pkgs/development/rocm-modules/6/rocfft/default.nix +++ b/pkgs/development/rocm-modules/6/rocfft/default.nix @@ -1,5 +1,4 @@ { - rocfft, lib, stdenv, fetchFromGitHub, @@ -15,18 +14,18 @@ gtest, openmp, rocrand, - gpuTargets ? [ ], + gpuTargets ? clr.localGpuTargets or clr.gpuTargets, }: stdenv.mkDerivation (finalAttrs: { - pname = "rocfft"; - version = "6.0.2"; + pname = "rocfft${clr.gpuArchSuffix}"; + version = "6.3.1"; src = fetchFromGitHub { owner = "ROCm"; repo = "rocFFT"; rev = "rocm-${finalAttrs.version}"; - hash = "sha256-6Gjsy14GeR08VqnNmFhu8EyYDnQ+VZRlg+u9MAAWfHc="; + hash = "sha256-RrxdwZ64uC7lQzyJI1eGHX2dmRnW8TfNThnuvuz5XWo="; }; nativeBuildInputs = [ @@ -36,6 +35,7 @@ stdenv.mkDerivation (finalAttrs: { rocm-cmake ]; + # FIXME: rocfft_aot_helper times out build due to no logs!! buildInputs = [ sqlite ]; cmakeFlags = @@ -53,6 +53,10 @@ stdenv.mkDerivation (finalAttrs: { "-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}" ]; + preConfigure = '' + makeFlagsArray+=("-l$(((NIX_BUILD_CORES * 2) / 3))") + ''; + passthru = { test = stdenv.mkDerivation { pname = "${finalAttrs.pname}-test"; @@ -156,8 +160,8 @@ stdenv.mkDerivation (finalAttrs: { updateScript = rocmUpdateScript { name = finalAttrs.pname; - owner = finalAttrs.src.owner; - repo = finalAttrs.src.repo; + inherit (finalAttrs.src) owner; + inherit (finalAttrs.src) repo; }; }; @@ -169,8 +173,5 @@ stdenv.mkDerivation (finalAttrs: { license = with licenses; [ mit ]; maintainers = with maintainers; [ kira-bruneau ] ++ teams.rocm.members; platforms = platforms.linux; - broken = - versions.minor finalAttrs.version != versions.minor stdenv.cc.version - || versionAtLeast finalAttrs.version "7.0.0"; }; }) diff --git a/pkgs/development/rocm-modules/6/rocgdb/default.nix b/pkgs/development/rocm-modules/6/rocgdb/default.nix index cf51f05f9ee48..43dc5445be998 100644 --- a/pkgs/development/rocm-modules/6/rocgdb/default.nix +++ b/pkgs/development/rocm-modules/6/rocgdb/default.nix @@ -21,13 +21,13 @@ stdenv.mkDerivation (finalAttrs: { pname = "rocgdb"; - version = "6.0.2"; + version = "6.3.1"; src = fetchFromGitHub { owner = "ROCm"; repo = "ROCgdb"; rev = "rocm-${finalAttrs.version}"; - hash = "sha256-XeX/k8gfo9HgcUSIjs35C7IqCmFhvBOqQJSOoPF6HK4="; + hash = "sha256-P9NbYMrCs0UpnaEIP+bJEM6yPiRHzl0lI0J4+A7/ePc="; }; nativeBuildInputs = [ @@ -91,8 +91,8 @@ stdenv.mkDerivation (finalAttrs: { passthru.updateScript = rocmUpdateScript { name = finalAttrs.pname; - owner = finalAttrs.src.owner; - repo = finalAttrs.src.repo; + inherit (finalAttrs.src) owner; + inherit (finalAttrs.src) repo; }; meta = with lib; { @@ -101,6 +101,5 @@ stdenv.mkDerivation (finalAttrs: { license = licenses.gpl3Plus; maintainers = teams.rocm.members; platforms = platforms.linux; - broken = versionAtLeast finalAttrs.version "7.0.0"; }; }) diff --git a/pkgs/development/rocm-modules/6/rocm-cmake/default.nix b/pkgs/development/rocm-modules/6/rocm-cmake/default.nix index cd3fd9c035dab..16969b9cf9e59 100644 --- a/pkgs/development/rocm-modules/6/rocm-cmake/default.nix +++ b/pkgs/development/rocm-modules/6/rocm-cmake/default.nix @@ -1,27 +1,31 @@ -{ lib -, stdenv -, fetchFromGitHub -, rocmUpdateScript -, cmake +{ + lib, + stdenv, + fetchFromGitHub, + rocmUpdateScript, + rocm-core, + cmake, }: stdenv.mkDerivation (finalAttrs: { pname = "rocm-cmake"; - version = "6.0.2"; + version = "6.3.1"; src = fetchFromGitHub { owner = "ROCm"; repo = "rocm-cmake"; rev = "rocm-${finalAttrs.version}"; - hash = "sha256-qSjWT0KOQ5oDV06tfnKN+H/JzdoOnR9KY0c+SjvDepM="; + hash = "sha256-8kEcwqHJF584AteuddP7Ai7n6ltVZJ8a6RsYIWGMs0U="; }; nativeBuildInputs = [ cmake ]; + buildInputs = [ rocm-core ]; + passthru.updateScript = rocmUpdateScript { name = finalAttrs.pname; - owner = finalAttrs.src.owner; - repo = finalAttrs.src.repo; + inherit (finalAttrs.src) owner; + inherit (finalAttrs.src) repo; }; meta = with lib; { @@ -30,6 +34,5 @@ stdenv.mkDerivation (finalAttrs: { license = licenses.mit; maintainers = teams.rocm.members; platforms = platforms.unix; - broken = versions.minor finalAttrs.version != versions.minor stdenv.cc.version || versionAtLeast finalAttrs.version "7.0.0"; }; }) diff --git a/pkgs/development/rocm-modules/6/rocm-comgr/default.nix b/pkgs/development/rocm-modules/6/rocm-comgr/default.nix index fb85e6dd2a1b9..ca9be81e85eb6 100644 --- a/pkgs/development/rocm-modules/6/rocm-comgr/default.nix +++ b/pkgs/development/rocm-modules/6/rocm-comgr/default.nix @@ -1,11 +1,12 @@ { lib, stdenv, - fetchFromGitHub, - rocmUpdateScript, cmake, - rocm-cmake, + python3, + rocm-merged-llvm, rocm-device-libs, + zlib, + zstd, libxml2, }: @@ -20,34 +21,30 @@ let in stdenv.mkDerivation (finalAttrs: { pname = "rocm-comgr"; - version = "6.0.2"; + # In-tree with ROCm LLVM + inherit (rocm-merged-llvm) version; + src = rocm-merged-llvm.llvm-src; - src = fetchFromGitHub { - owner = "ROCm"; - repo = "ROCm-CompilerSupport"; - rev = "rocm-${finalAttrs.version}"; - hash = "sha256-9HuNU/k+kPJMlzqOTM20gm6SAOWJe9tpAZXEj4erdmI="; - }; - - sourceRoot = "${finalAttrs.src.name}/lib/comgr"; + sourceRoot = "${finalAttrs.src.name}/amd/comgr"; nativeBuildInputs = [ cmake - rocm-cmake + python3 ]; buildInputs = [ rocm-device-libs libxml2 + zlib + zstd + rocm-merged-llvm ]; - cmakeFlags = [ "-DLLVM_TARGETS_TO_BUILD=AMDGPU;X86" ]; - - passthru.updateScript = rocmUpdateScript { - name = finalAttrs.pname; - owner = finalAttrs.src.owner; - repo = finalAttrs.src.repo; - }; + cmakeFlags = [ + "-DCMAKE_VERBOSE_MAKEFILE=ON" + "-DCMAKE_BUILD_TYPE=Release" + "-DLLVM_TARGETS_TO_BUILD=AMDGPU;${llvmNativeTarget}" + ]; meta = with lib; { description = "APIs for compiling and inspecting AMDGPU code objects"; @@ -55,8 +52,5 @@ stdenv.mkDerivation (finalAttrs: { license = licenses.ncsa; maintainers = with maintainers; [ lovesegfault ] ++ teams.rocm.members; platforms = platforms.linux; - broken = - versions.minor finalAttrs.version != versions.minor stdenv.cc.version - || versionAtLeast finalAttrs.version "7.0.0"; }; }) diff --git a/pkgs/development/rocm-modules/6/rocm-core/default.nix b/pkgs/development/rocm-modules/6/rocm-core/default.nix index 31f47fb26b976..e71205db3abba 100644 --- a/pkgs/development/rocm-modules/6/rocm-core/default.nix +++ b/pkgs/development/rocm-modules/6/rocm-core/default.nix @@ -4,28 +4,43 @@ fetchFromGitHub, rocmUpdateScript, cmake, + writeText, }: stdenv.mkDerivation (finalAttrs: { pname = "rocm-core"; - version = "6.0.2"; + version = "6.3.1"; src = fetchFromGitHub { owner = "ROCm"; repo = "rocm-core"; rev = "rocm-${finalAttrs.version}"; - hash = "sha256-sgL1UMt3o01zA8v41dyCG1fAsK/PkTRsHQJOvlNatZ4="; + hash = "sha256-UDnPGvgwzwv49CzF+Kt0v95CsxS33BZeqNcKw1K6jRI="; }; nativeBuildInputs = [ cmake ]; - cmakeFlags = [ "-DROCM_VERSION=${finalAttrs.version}" ]; + # FIXME: What's the correct way to set this? + env.ROCM_LIBPATCH_VERSION = "${lib.versions.major finalAttrs.version}0${lib.versions.minor finalAttrs.version}0${lib.versions.patch finalAttrs.version}"; + env.BUILD_ID = "nixos-${finalAttrs.env.ROCM_LIBPATCH_VERSION}"; + env.ROCM_BUILD_ID = "release-${finalAttrs.env.BUILD_ID}"; + cmakeFlags = [ + "-DROCM_LIBPATCH_VERSION=${finalAttrs.env.ROCM_LIBPATCH_VERSION}" + "-DROCM_VERSION=${finalAttrs.version}" + "-DBUILD_ID=${finalAttrs.env.BUILD_ID}" + ]; + setupHook = writeText "setupHook.sh" '' + export ROCM_LIBPATCH_VERSION="${finalAttrs.env.ROCM_LIBPATCH_VERSION}" + export BUILD_ID="${finalAttrs.env.BUILD_ID}" + export ROCM_BUILD_ID="${finalAttrs.env.ROCM_BUILD_ID}" + ''; + + passthru.ROCM_LIBPATCH_VERSION = finalAttrs.env.ROCM_LIBPATCH_VERSION; passthru.updateScript = rocmUpdateScript { name = finalAttrs.pname; - owner = finalAttrs.src.owner; - repo = finalAttrs.src.repo; - page = "tags?per_page=1"; - filter = ".[0].name | split(\"-\") | .[1]"; + inherit (finalAttrs.src) owner; + inherit (finalAttrs.src) repo; + page = "tags?per_page=4"; }; meta = with lib; { @@ -34,8 +49,5 @@ stdenv.mkDerivation (finalAttrs: { license = with licenses; [ mit ]; maintainers = teams.rocm.members; platforms = platforms.linux; - broken = - versions.minor finalAttrs.version != versions.minor stdenv.cc.version - || versionAtLeast finalAttrs.version "7.0.0"; }; }) diff --git a/pkgs/development/rocm-modules/6/rocm-device-libs/cmake.patch b/pkgs/development/rocm-modules/6/rocm-device-libs/cmake.patch index 500ff37a99052..de46bdb87258e 100644 --- a/pkgs/development/rocm-modules/6/rocm-device-libs/cmake.patch +++ b/pkgs/development/rocm-modules/6/rocm-device-libs/cmake.patch @@ -1,7 +1,7 @@ diff --git a/cmake/Packages.cmake b/cmake/Packages.cmake index 07c60eb..c736b3e 100644 ---- a/cmake/Packages.cmake -+++ b/cmake/Packages.cmake +--- a/amd/device-libs/cmake/Packages.cmake ++++ b/amd/device-libs/cmake/Packages.cmake @@ -12,24 +12,29 @@ set_target_properties(${target} PROPERTIES IMPORTED_LOCATION \"${target_path}\")") endforeach() diff --git a/pkgs/development/rocm-modules/6/rocm-device-libs/default.nix b/pkgs/development/rocm-modules/6/rocm-device-libs/default.nix index a16eee74c3f17..81d2ab54e069a 100644 --- a/pkgs/development/rocm-modules/6/rocm-device-libs/default.nix +++ b/pkgs/development/rocm-modules/6/rocm-device-libs/default.nix @@ -1,11 +1,14 @@ { lib, stdenv, - fetchFromGitHub, - rocmUpdateScript, cmake, - rocm-cmake, + ninja, libxml2, + zlib, + zstd, + ncurses, + rocm-merged-llvm, + python3, }: let @@ -19,30 +22,34 @@ let in stdenv.mkDerivation (finalAttrs: { pname = "rocm-device-libs"; - version = "6.0.2"; + # In-tree with ROCm LLVM + inherit (rocm-merged-llvm) version; + src = rocm-merged-llvm.llvm-src; - src = fetchFromGitHub { - owner = "ROCm"; - repo = "ROCm-Device-Libs"; - rev = "rocm-${finalAttrs.version}"; - hash = "sha256-7XG7oSkJ3EPWTYGea0I50eB1/DPMD5agmjctxZYTbLQ="; - }; + postPatch = '' + cd amd/device-libs + ''; patches = [ ./cmake.patch ]; nativeBuildInputs = [ cmake - rocm-cmake + ninja + python3 ]; - buildInputs = [ libxml2 ]; - cmakeFlags = [ "-DLLVM_TARGETS_TO_BUILD=AMDGPU;${llvmNativeTarget}" ]; + buildInputs = [ + libxml2 + zlib + zstd + ncurses + rocm-merged-llvm + ]; - passthru.updateScript = rocmUpdateScript { - name = finalAttrs.pname; - owner = finalAttrs.src.owner; - repo = finalAttrs.src.repo; - }; + cmakeFlags = [ + "-DCMAKE_RELEASE_TYPE=Release" + "-DLLVM_TARGETS_TO_BUILD=AMDGPU;${llvmNativeTarget}" + ]; meta = with lib; { description = "Set of AMD-specific device-side language runtime libraries"; @@ -50,8 +57,5 @@ stdenv.mkDerivation (finalAttrs: { license = licenses.ncsa; maintainers = with maintainers; [ lovesegfault ] ++ teams.rocm.members; platforms = platforms.linux; - broken = - versions.minor finalAttrs.version != versions.minor stdenv.cc.version - || versionAtLeast finalAttrs.version "7.0.0"; }; }) diff --git a/pkgs/development/rocm-modules/6/rocm-docs-core/default.nix b/pkgs/development/rocm-modules/6/rocm-docs-core/default.nix index 06232b1575e34..5302f165e5d99 100644 --- a/pkgs/development/rocm-modules/6/rocm-docs-core/default.nix +++ b/pkgs/development/rocm-modules/6/rocm-docs-core/default.nix @@ -1,23 +1,23 @@ -{ lib -, stdenv -, fetchFromGitHub -, gitUpdater -, buildPythonPackage -, setuptools -, beautifulsoup4 -, gitpython -, pydata-sphinx-theme -, pygithub -, sphinx -, breathe -, myst-parser -, sphinx-book-theme -, sphinx-copybutton -, sphinx-design -, sphinx-external-toc -, sphinx-notfound-page -, pyyaml -, fastjsonschema +{ + lib, + fetchFromGitHub, + gitUpdater, + buildPythonPackage, + setuptools, + beautifulsoup4, + gitpython, + pydata-sphinx-theme, + pygithub, + sphinx, + breathe, + myst-parser, + sphinx-book-theme, + sphinx-copybutton, + sphinx-design, + sphinx-external-toc, + sphinx-notfound-page, + pyyaml, + fastjsonschema, }: # FIXME: Move to rocmPackages_common @@ -59,7 +59,10 @@ buildPythonPackage rec { meta = with lib; { description = "ROCm Documentation Python package for ReadTheDocs build standardization"; homepage = "https://github.com/ROCm/rocm-docs-core"; - license = with licenses; [ mit cc-by-40 ]; + license = with licenses; [ + mit + cc-by-40 + ]; maintainers = teams.rocm.members; platforms = platforms.linux; }; diff --git a/pkgs/development/rocm-modules/6/rocm-path/default.nix b/pkgs/development/rocm-modules/6/rocm-path/default.nix new file mode 100644 index 0000000000000..374ebd647c4f5 --- /dev/null +++ b/pkgs/development/rocm-modules/6/rocm-path/default.nix @@ -0,0 +1,25 @@ +{ + symlinkJoin, + linkFarm, + clr, + hipblas, + rocblas, + rocsolver, + rocsparse, + rocm-device-libs, + rocm-smi, + llvm, +}: +symlinkJoin { + name = "rocm-path-${clr.version}"; + paths = [ + clr + hipblas + rocblas + rocsolver + rocsparse + rocm-device-libs + rocm-smi + (linkFarm "rocm-llvm-subdir" { llvm = llvm.clang; }) + ]; +} diff --git a/pkgs/development/rocm-modules/6/rocm-runtime/default.nix b/pkgs/development/rocm-modules/6/rocm-runtime/default.nix index a24c751a44ef0..52635ed32e519 100644 --- a/pkgs/development/rocm-modules/6/rocm-runtime/default.nix +++ b/pkgs/development/rocm-modules/6/rocm-runtime/default.nix @@ -2,41 +2,43 @@ lib, stdenv, fetchFromGitHub, - fetchpatch, rocmUpdateScript, pkg-config, cmake, + ninja, xxd, rocm-device-libs, - rocm-thunk, elfutils, libdrm, numactl, valgrind, libxml2, + rocm-merged-llvm, }: stdenv.mkDerivation (finalAttrs: { pname = "rocm-runtime"; - version = "6.0.2"; + version = "6.3.1"; src = fetchFromGitHub { owner = "ROCm"; repo = "ROCR-Runtime"; rev = "rocm-${finalAttrs.version}"; - hash = "sha256-xNMG954HI9SOfvYYB/62fhmm9mmR4I10uHP2nqn9EgI="; + hash = "sha256-btpiIPV9REMvrmRSUzBIpBO6ehVIMmEmG+H8hqHDxdE="; }; - sourceRoot = "${finalAttrs.src.name}/src"; + env.CFLAGS = "-I${numactl.dev}/include -I${elfutils.dev}/include -w"; + env.CXXFLAGS = "-I${numactl.dev}/include -I${elfutils.dev}/include -w"; nativeBuildInputs = [ pkg-config cmake + ninja xxd + rocm-merged-llvm ]; buildInputs = [ - rocm-thunk elfutils libdrm numactl @@ -44,34 +46,65 @@ stdenv.mkDerivation (finalAttrs: { libxml2 ]; + cmakeFlags = [ + "-DBUILD_SHARED_LIBS=ON" + "-DCMAKE_INSTALL_BINDIR=bin" + "-DCMAKE_INSTALL_LIBDIR=lib" + "-DCMAKE_INSTALL_INCLUDEDIR=include" + ]; + patches = [ + # Patches for UB at runtime https://github.com/ROCm/ROCR-Runtime/issues/272 + (fetchpatch { + # [PATCH] hsa-runtime: set underlying type of hsa_region_info_t and hsa_amd_region_info_t to int + url = "https://github.com/ROCm/ROCR-Runtime/commit/39a6a168fa07e289a10f6e20e6ead4e303e99ba0.patch"; + hash = "sha256-CshJJDvII1nNyNmt+YjwMwfBHUTlrdsxkhwfgBwO+WE="; + }) + (fetchpatch { + # [PATCH] rocr: refactor of runtime.cpp based on Coverity + url = "https://github.com/ROCm/ROCR-Runtime/commit/441bd9fe6c7bdb5c4c31f71524ed642786bc923e.patch"; + hash = "sha256-7bQXxGkipzgT2aXRxCuh3Sfmo/zc/IOmA0x1zB+fMb0="; + }) (fetchpatch { - name = "extend-isa-compatibility-check.patch"; - url = "https://salsa.debian.org/rocm-team/rocr-runtime/-/raw/076026d43bbee7f816b81fea72f984213a9ff961/debian/patches/0004-extend-isa-compatibility-check.patch"; - hash = "sha256-cC030zVGS4kNXwaztv5cwfXfVwOldpLGV9iYgEfPEnY="; - stripLen = 1; + # [PATCH] queues: fix UB due to 1 << 31 + url = "https://github.com/ROCm/ROCR-Runtime/commit/9b8a0f5dbee1903fa990a7d8accc1c5fbc549636.patch"; + hash = "sha256-KlZWjfngH8yKly08iwC+Bzpvp/4dkaTpRIKdFYwRI+U="; }) + (fetchpatch { + # [PATCH] topology: fix UB due to 1 << 31 + url = "https://github.com/ROCm/ROCR-Runtime/commit/d1d00bfee386d263e13c2b64fb6ffd1156deda7c.patch"; + hash = "sha256-u70WEZaphQ7qTfgQPFATwdKWtHytu7CFH7Pzv1rOM8w="; + }) + (fetchpatch { + # [PATCH] kfd_ioctl: fix UB due to 1 << 31 + url = "https://github.com/ROCm/ROCR-Runtime/commit/41bfc66aef437a5b349f71105fa4b907cc7e17d5.patch"; + hash = "sha256-A7VhPR3eSsmjq2cTBSjBIz9i//WiNjoXm0EsRKtF+ns="; + }) + ./remove-hsa-aqlprofile-dep.patch + ./ub.patch ]; postPatch = '' - patchShebangs image/blit_src/create_hsaco_ascii_file.sh - patchShebangs core/runtime/trap_handler/create_trap_handler_header.sh - patchShebangs core/runtime/blit_shaders/create_blit_shader_header.sh + patchShebangs --host image + patchShebangs --host core + patchShebangs --host runtime substituteInPlace CMakeLists.txt \ --replace 'hsa/include/hsa' 'include/hsa' # We compile clang before rocm-device-libs, so patch it in afterwards # Replace object version: https://github.com/ROCm/ROCR-Runtime/issues/166 (TODO: Remove on LLVM update?) - substituteInPlace image/blit_src/CMakeLists.txt \ - --replace '-cl-denorms-are-zero' '-cl-denorms-are-zero --rocm-device-lib-path=${rocm-device-libs}/amdgcn/bitcode' \ - --replace '-mcode-object-version=4' '-mcode-object-version=5' + # substituteInPlace image/blit_src/CMakeLists.txt \ + # --replace '-cl-denorms-are-zero' '-cl-denorms-are-zero --rocm-device-lib-path=${rocm-device-libs}/amdgcn/bitcode' \ + # --replace '-mcode-object-version=4' '-mcode-object-version=5' + + export HIP_DEVICE_LIB_PATH="${rocm-device-libs}/amdgcn/bitcode" ''; passthru.updateScript = rocmUpdateScript { name = finalAttrs.pname; - owner = finalAttrs.src.owner; - repo = finalAttrs.src.repo; + inherit (finalAttrs.src) owner; + inherit (finalAttrs.src) repo; }; meta = with lib; { @@ -80,8 +113,5 @@ stdenv.mkDerivation (finalAttrs: { license = with licenses; [ ncsa ]; maintainers = with maintainers; [ lovesegfault ] ++ teams.rocm.members; platforms = platforms.linux; - broken = - versions.minor finalAttrs.version != versions.minor stdenv.cc.version - || versionAtLeast finalAttrs.version "7.0.0"; }; }) diff --git a/pkgs/development/rocm-modules/6/rocm-runtime/remove-hsa-aqlprofile-dep.patch b/pkgs/development/rocm-modules/6/rocm-runtime/remove-hsa-aqlprofile-dep.patch new file mode 100644 index 0000000000000..b8ed57d049bc9 --- /dev/null +++ b/pkgs/development/rocm-modules/6/rocm-runtime/remove-hsa-aqlprofile-dep.patch @@ -0,0 +1,27 @@ +libhsa-amd-aqlprofile64 library is unfree +Bug: https://github.com/ROCm/ROCm/issues/1781 +--- a/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp ++++ b/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp +@@ -1333,11 +1333,6 @@ hsa_status_t GpuAgent::GetInfo(hsa_agent_info_t attribute, void* value) const { + setFlag(HSA_EXTENSION_AMD_PC_SAMPLING); + } + +- if (os::LibHandle lib = os::LoadLib(kAqlProfileLib)) { +- os::CloseLib(lib); +- setFlag(HSA_EXTENSION_AMD_AQLPROFILE); +- } +- + setFlag(HSA_EXTENSION_AMD_PROFILER); + + break; +--- a/runtime/hsa-runtime/core/runtime/hsa.cpp ++++ b/runtime/hsa-runtime/core/runtime/hsa.cpp +@@ -490,7 +490,7 @@ hsa_status_t hsa_system_get_major_extension_table(uint16_t extension, uint16_t v + return HSA_STATUS_SUCCESS; + } + +- if (extension == HSA_EXTENSION_AMD_AQLPROFILE) { ++ if (0) { + if (version_major != hsa_ven_amd_aqlprofile_VERSION_MAJOR) { + debug_print("aqlprofile API incompatible ver %d, current ver %d\n", + version_major, hsa_ven_amd_aqlprofile_VERSION_MAJOR); diff --git a/pkgs/development/rocm-modules/6/rocm-runtime/ub.patch b/pkgs/development/rocm-modules/6/rocm-runtime/ub.patch new file mode 100644 index 0000000000000..9427c76f8eaec --- /dev/null +++ b/pkgs/development/rocm-modules/6/rocm-runtime/ub.patch @@ -0,0 +1,66 @@ +commit 56ad93a08c185cd43f925488ee5295149cce4d9d +Author: Luna Nova +Date: Mon Dec 30 11:58:30 2024 -0800 + + kfd_ioctl: cast to unsigned before << 31 to avoid UB + +diff --git a/libhsakmt/include/hsakmt/linux/kfd_ioctl.h b/libhsakmt/include/hsakmt/linux/kfd_ioctl.h +index e132cedf..ab15e69f 100644 +--- a/libhsakmt/include/hsakmt/linux/kfd_ioctl.h ++++ b/libhsakmt/include/hsakmt/linux/kfd_ioctl.h +@@ -1026,7 +1026,7 @@ struct kfd_ioctl_acquire_vm_args { + #define KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL (1 << 3) + #define KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP (1 << 4) + /* Allocation flags: attributes/access options */ +-#define KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE (1 << 31) ++#define KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE (1U << 31U) + #define KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE (1 << 30) + #define KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC (1 << 29) + #define KFD_IOC_ALLOC_MEM_FLAGS_NO_SUBSTITUTE (1 << 28) +commit 967f8c11b702fa769e0f95052f7b37a357b215f2 +Author: Luna Nova +Date: Mon Dec 30 15:32:49 2024 -0800 + + topology: fix signed integer overflow due to << 31 + +diff --git a/libhsakmt/src/topology.c b/libhsakmt/src/topology.c +index 9990286a..d3b54d11 100644 +--- a/libhsakmt/src/topology.c ++++ b/libhsakmt/src/topology.c +@@ -460,7 +460,7 @@ static void cpumap_to_cpu_ci(char *shared_cpu_map, + struct proc_cpuinfo *cpuinfo, + HsaCacheProperties *this_cache) + { +- int num_hexs, bit; ++ unsigned int num_hexs, bit; + uint32_t proc, apicid, mask; + char *ch_ptr; + +@@ -473,7 +473,7 @@ static void cpumap_to_cpu_ci(char *shared_cpu_map, + while (num_hexs-- > 0) { + mask = strtol(ch_ptr, NULL, 16); /* each X */ + for (bit = 0; bit < 32; bit++) { +- if (!((1 << bit) & mask)) ++ if (!((1U << bit) & mask)) + continue; + proc = num_hexs * 32 + bit; + apicid = cpuinfo[proc].apicid; +commit 8524c5b6af087ba29999119245effc4dc94f1584 +Author: Luna Nova +Date: Mon Dec 30 15:34:05 2024 -0800 + + queues: fix signed integer overflow due to << 31 + +diff --git a/libhsakmt/src/queues.c b/libhsakmt/src/queues.c +index de37f8b9..d2aaa7b6 100644 +--- a/libhsakmt/src/queues.c ++++ b/libhsakmt/src/queues.c +@@ -663,7 +663,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtCreateQueueExt(HSAuint32 NodeId, + /* cu_mask_count counts bits. It must be multiple of 32 */ + q->cu_mask_count = ALIGN_UP_32(cu_num, 32); + for (i = 0; i < cu_num; i++) +- q->cu_mask[i/32] |= (1 << (i % 32)); ++ q->cu_mask[i/32] |= (1U << (i % 32)); + } + + struct kfd_ioctl_create_queue_args args = {0}; diff --git a/pkgs/development/rocm-modules/6/rocm-smi/default.nix b/pkgs/development/rocm-modules/6/rocm-smi/default.nix index aeb38e4ad8de7..c909764bc8e39 100644 --- a/pkgs/development/rocm-modules/6/rocm-smi/default.nix +++ b/pkgs/development/rocm-modules/6/rocm-smi/default.nix @@ -1,20 +1,21 @@ -{ lib -, stdenv -, fetchFromGitHub -, rocmUpdateScript -, cmake -, wrapPython +{ + lib, + stdenv, + fetchFromGitHub, + rocmUpdateScript, + cmake, + wrapPython, }: stdenv.mkDerivation (finalAttrs: { pname = "rocm-smi"; - version = "6.0.2"; + version = "6.3.1"; src = fetchFromGitHub { owner = "ROCm"; repo = "rocm_smi_lib"; rev = "rocm-${finalAttrs.version}"; - hash = "sha256-fS52hpTv1WEycwkGZLXjz383WJWzyk8RvJRshEQSG/A="; + hash = "sha256-j9pkyUt+p6IkhawIhiTymqDBydxXZunxmdyCyRN0RxE="; }; patches = [ ./cmake.patch ]; @@ -34,13 +35,14 @@ stdenv.mkDerivation (finalAttrs: { postInstall = '' wrapPythonProgramsIn $out + mv $out/libexec/rocm_smi/.rsmiBindingsInit.py-wrapped $out/libexec/rocm_smi/rsmiBindingsInit.py mv $out/libexec/rocm_smi/.rsmiBindings.py-wrapped $out/libexec/rocm_smi/rsmiBindings.py ''; passthru.updateScript = rocmUpdateScript { name = finalAttrs.pname; - owner = finalAttrs.src.owner; - repo = finalAttrs.src.repo; + inherit (finalAttrs.src) owner; + inherit (finalAttrs.src) repo; }; meta = with lib; { @@ -49,6 +51,5 @@ stdenv.mkDerivation (finalAttrs: { license = with licenses; [ mit ]; maintainers = with maintainers; [ lovesegfault ] ++ teams.rocm.members; platforms = [ "x86_64-linux" ]; - broken = versions.minor finalAttrs.version != versions.minor stdenv.cc.version || versionAtLeast finalAttrs.version "7.0.0"; }; }) diff --git a/pkgs/development/rocm-modules/6/rocm-tests/default.nix b/pkgs/development/rocm-modules/6/rocm-tests/default.nix new file mode 100644 index 0000000000000..7a9a961b45576 --- /dev/null +++ b/pkgs/development/rocm-modules/6/rocm-tests/default.nix @@ -0,0 +1,31 @@ +{ + clr, + ollama, + python3Packages, + rocmPackages, + emptyDirectory, + stdenv, +}: +# This package exists purely to have a bunch of passthru.tests attrs +stdenv.mkDerivation { + name = "rocm-tests"; + nativeBuildInputs = [ + clr + ]; + src = emptyDirectory; + postInstall = '' + mkdir $out + touch $out/empty + ''; + passthru.tests = { + ollama = ollama.override { + inherit rocmPackages; + acceleration = "rocm"; + }; + torch = python3Packages.torch.override { + inherit rocmPackages; + rocmSupport = true; + cudaSupport = false; + }; + }; +} diff --git a/pkgs/development/rocm-modules/6/rocm-thunk/default.nix b/pkgs/development/rocm-modules/6/rocm-thunk/default.nix deleted file mode 100644 index 99a1d3c542d16..0000000000000 --- a/pkgs/development/rocm-modules/6/rocm-thunk/default.nix +++ /dev/null @@ -1,54 +0,0 @@ -{ lib -, stdenv -, fetchFromGitHub -, rocmUpdateScript -, pkg-config -, cmake -, libdrm -, numactl -}: - -stdenv.mkDerivation (finalAttrs: { - pname = "rocm-thunk"; - version = "6.0.2"; - - src = fetchFromGitHub { - owner = "ROCm"; - repo = "ROCT-Thunk-Interface"; - rev = "rocm-${finalAttrs.version}"; - hash = "sha256-F6Qi+A9DuSx2e4WSfp4cnniKr0CkCZcZqsKwQmmZHhk="; - }; - - nativeBuildInputs = [ - pkg-config - cmake - ]; - - buildInputs = [ - libdrm - numactl - ]; - - cmakeFlags = [ - # Manually define CMAKE_INSTALL_ - # See: https://github.com/NixOS/nixpkgs/pull/197838 - "-DCMAKE_INSTALL_BINDIR=bin" - "-DCMAKE_INSTALL_LIBDIR=lib" - "-DCMAKE_INSTALL_INCLUDEDIR=include" - ]; - - passthru.updateScript = rocmUpdateScript { - name = finalAttrs.pname; - owner = finalAttrs.src.owner; - repo = finalAttrs.src.repo; - }; - - meta = with lib; { - description = "Radeon open compute thunk interface"; - homepage = "https://github.com/ROCm/ROCT-Thunk-Interface"; - license = with licenses; [ bsd2 mit ]; - maintainers = with maintainers; [ lovesegfault ] ++ teams.rocm.members; - platforms = platforms.linux; - broken = versions.minor finalAttrs.version != versions.minor stdenv.cc.version || versionAtLeast finalAttrs.version "7.0.0"; - }; -}) diff --git a/pkgs/development/rocm-modules/6/rocminfo/default.nix b/pkgs/development/rocm-modules/6/rocminfo/default.nix index 9c88274d722f2..79780012a4059 100644 --- a/pkgs/development/rocm-modules/6/rocminfo/default.nix +++ b/pkgs/development/rocm-modules/6/rocminfo/default.nix @@ -9,24 +9,25 @@ busybox, python3, gnugrep, + clr, # Only for localGpuTargets # rocminfo requires that the calling user have a password and be in # the video group. If we let rocm_agent_enumerator rely upon # rocminfo's output, then it, too, has those requirements. Instead, # we can specify the GPU targets for this system (e.g. "gfx803" for # Polaris) such that no system call is needed for downstream # compilers to determine the desired target. - defaultTargets ? [ ], + defaultTargets ? (clr.localGpuTargets or [ ]), }: stdenv.mkDerivation (finalAttrs: { - version = "6.0.2"; + version = "6.3.1"; pname = "rocminfo"; src = fetchFromGitHub { owner = "ROCm"; repo = "rocminfo"; rev = "rocm-${finalAttrs.version}"; - sha256 = "sha256-k0QeCyQcarGbAh4ft8Y7JBK6l2nWxDUc20XoYmtrMMs="; + sha256 = "sha256-TL57Mznq5qPorDON0EaINBCoEFMN4dcAmRfRgS//nok="; }; nativeBuildInputs = [ @@ -49,8 +50,8 @@ stdenv.mkDerivation (finalAttrs: { passthru.updateScript = rocmUpdateScript { name = finalAttrs.pname; - owner = finalAttrs.src.owner; - repo = finalAttrs.src.repo; + inherit (finalAttrs.src) owner; + inherit (finalAttrs.src) repo; }; meta = with lib; { @@ -59,9 +60,5 @@ stdenv.mkDerivation (finalAttrs: { license = licenses.ncsa; maintainers = with maintainers; [ lovesegfault ] ++ teams.rocm.members; platforms = platforms.linux; - broken = - stdenv.hostPlatform.isAarch64 - || versions.minor finalAttrs.version != versions.minor stdenv.cc.version - || versionAtLeast finalAttrs.version "7.0.0"; }; }) diff --git a/pkgs/development/rocm-modules/6/rocmlir/default.nix b/pkgs/development/rocm-modules/6/rocmlir/default.nix index b0063bf9f7446..408838e17f55d 100644 --- a/pkgs/development/rocm-modules/6/rocmlir/default.nix +++ b/pkgs/development/rocm-modules/6/rocmlir/default.nix @@ -6,7 +6,6 @@ cmake, rocm-cmake, rocminfo, - ninja, clr, git, libxml2, @@ -19,6 +18,13 @@ buildTests ? false, # `argument of type 'NoneType' is not iterable` }: +# FIXME: rocmlir has an entire separate LLVM build in a subdirectory this is silly +# It seems to be forked from AMD's own LLVM +# If possible reusing the rocmPackages.llvm build would be better +# Would have to confirm it is compatible with ROCm's tagged LLVM. +# Fairly likely it's not given AMD's track record with forking their own software in incompatible ways +# in subdirs + # Theoretically, we could have our MLIR have an output # with the source and built objects so that we can just # use it as the external LLVM repo for this @@ -35,7 +41,7 @@ let in stdenv.mkDerivation (finalAttrs: { pname = "rocmlir${suffix}"; - version = "6.0.2"; + version = "6.3.1"; outputs = [ @@ -49,13 +55,16 @@ stdenv.mkDerivation (finalAttrs: { owner = "ROCm"; repo = "rocMLIR"; rev = "rocm-${finalAttrs.version}"; - hash = "sha256-AypY0vL8Ij1zLycwpG2EPWWl4utp4ejXpAK0Jj/UvrA="; + hash = "sha256-0SQ6uLDRfVfdCX+8a7D6pu6dYlFvX0HFzCDEvlKYfak="; }; + patches = [ + ./initparamdata-sort-const.patch + ]; + nativeBuildInputs = [ cmake rocm-cmake - ninja clr python3Packages.python python3Packages.tomli @@ -76,8 +85,11 @@ stdenv.mkDerivation (finalAttrs: { cmakeFlags = [ "-DLLVM_TARGETS_TO_BUILD=AMDGPU;${llvmNativeTarget}" - "-DLLVM_ENABLE_ZSTD=ON" - "-DLLVM_ENABLE_ZLIB=ON" + "-DCMAKE_BUILD_TYPE=Release" + "-DLLVM_USE_LINKER=lld" + "-DLLVM_ENABLE_ZSTD=FORCE_ON" + "-DLLVM_ENABLE_ZLIB=FORCE_ON" + "-DLLVM_ENABLE_LIBCXX=ON" "-DLLVM_ENABLE_TERMINFO=ON" "-DROCM_PATH=${clr}" # Manually define CMAKE_INSTALL_ @@ -85,18 +97,27 @@ stdenv.mkDerivation (finalAttrs: { "-DCMAKE_INSTALL_BINDIR=bin" "-DCMAKE_INSTALL_LIBDIR=lib" "-DCMAKE_INSTALL_INCLUDEDIR=include" - ] - ++ lib.optionals buildRockCompiler [ - "-DBUILD_FAT_LIBROCKCOMPILER=ON" + (lib.cmakeBool "BUILD_FAT_LIBROCKCOMPILER" buildRockCompiler) ] ++ lib.optionals (!buildRockCompiler) [ "-DROCM_TEST_CHIPSET=gfx000" ]; + preConfigure = '' + makeFlagsArray+=("-l$(((NIX_BUILD_CORES * 2) / 3))") + ''; + postPatch = '' patchShebangs mlir patchShebangs external/llvm-project/mlir/lib/Dialect/GPU/AmdDeviceLibsIncGen.py + # rocmlir-rock> /build/source/mlir/lib/Analysis/BufferDependencyAnalysis.cpp:41:19: error: redefinition of 'read' + # 41 | enum EffectType { read, write, unknown }; + # /nix/store/aax0hx68i2ikhpf27hdm6a2a209d4s6p-glibc-2.40-36-dev/include/unistd.h:371:16: note: previous definition is here + # 371 | extern ssize_t read (int __fd, void *__buf, size_t __nbytes) __wur + substituteInPlace mlir/lib/Analysis/BufferDependencyAnalysis.cpp \ + --replace-fail "enum EffectType { read, write, unknown };" "enum class EffectType { read, write, unknown };" + # remove when no longer required substituteInPlace mlir/test/{e2e/generateE2ETest.py,fusion/e2e/generate-fusion-tests.py} \ --replace-fail "\"/opt/rocm/bin" "\"${rocminfo}/bin" @@ -136,10 +157,9 @@ stdenv.mkDerivation (finalAttrs: { passthru.updateScript = rocmUpdateScript { name = finalAttrs.pname; - owner = finalAttrs.src.owner; - repo = finalAttrs.src.repo; - page = "tags?per_page=2"; - filter = ".[1].name | split(\"-\") | .[1]"; + inherit (finalAttrs.src) owner; + inherit (finalAttrs.src) repo; + page = "tags?per_page=4"; }; meta = with lib; { @@ -148,8 +168,5 @@ stdenv.mkDerivation (finalAttrs: { license = with licenses; [ asl20 ]; maintainers = teams.rocm.members; platforms = platforms.linux; - broken = - versions.minor finalAttrs.version != versions.minor stdenv.cc.version - || versionAtLeast finalAttrs.version "7.0.0"; }; }) diff --git a/pkgs/development/rocm-modules/6/rocmlir/initparamdata-sort-const.patch b/pkgs/development/rocm-modules/6/rocmlir/initparamdata-sort-const.patch new file mode 100644 index 0000000000000..37f521f6e02d7 --- /dev/null +++ b/pkgs/development/rocm-modules/6/rocmlir/initparamdata-sort-const.patch @@ -0,0 +1,13 @@ +diff --git a/mlir/include/mlir/Dialect/Rock/Tuning/GridwiseGemmParams.h b/mlir/include/mlir/Dialect/Rock/Tuning/GridwiseGemmParams.h +index 3f5ee596819a..590d53788822 100644 +--- a/mlir/include/mlir/Dialect/Rock/Tuning/GridwiseGemmParams.h ++++ b/mlir/include/mlir/Dialect/Rock/Tuning/GridwiseGemmParams.h +@@ -209,7 +209,7 @@ private: + size_t original_pos; + int64_t padding_amount; + +- bool operator<(const InitParamData &rhs) { ++ bool operator<(const InitParamData &rhs) const { + if (this->padding_amount < rhs.padding_amount) { + return true; + } else if (this->padding_amount == rhs.padding_amount) { diff --git a/pkgs/development/rocm-modules/6/rocprim/default.nix b/pkgs/development/rocm-modules/6/rocprim/default.nix index 3e8525655141e..84868ffde0187 100644 --- a/pkgs/development/rocm-modules/6/rocprim/default.nix +++ b/pkgs/development/rocm-modules/6/rocprim/default.nix @@ -1,44 +1,41 @@ -{ lib -, fetchpatch -, stdenv -, fetchFromGitHub -, rocmUpdateScript -, cmake -, rocm-cmake -, clr -, gtest -, gbenchmark -, buildTests ? false -, buildBenchmarks ? false -, gpuTargets ? [ ] +{ + lib, + stdenv, + fetchFromGitHub, + rocmUpdateScript, + cmake, + rocm-cmake, + clr, + gtest, + gbenchmark, + buildTests ? false, + buildBenchmarks ? false, + gpuTargets ? [ ], }: stdenv.mkDerivation (finalAttrs: { pname = "rocprim"; - version = "6.0.2"; + version = "6.3.1"; - outputs = [ - "out" - ] ++ lib.optionals buildTests [ - "test" - ] ++ lib.optionals buildBenchmarks [ - "benchmark" - ]; + outputs = + [ + "out" + ] + ++ lib.optionals buildTests [ + "test" + ] + ++ lib.optionals buildBenchmarks [ + "benchmark" + ]; src = fetchFromGitHub { owner = "ROCm"; repo = "rocPRIM"; rev = "rocm-${finalAttrs.version}"; - hash = "sha256-nWvq26qRPZ6Au1rc5cR74TKArcdUFg7O9djFi8SvMeM="; + hash = "sha256-0aHxpBuYIYhI2UER45YhHHL5YcxA+XeXoihcUs2AmCo="; }; - patches = [ - (fetchpatch { - name = "arch-conversion-marco.patch"; - url = "https://salsa.debian.org/rocm-team/rocprim/-/raw/70c8aaee3cf545d92685f4ed9bf8f41e3d4d570c/debian/patches/arch-conversion-macro.patch"; - hash = "sha256-oXdmbCArOB5bKE8ozDFrSh4opbO+c4VI6PNhljeUSms="; - }) - ]; + patches = [ ]; nativeBuildInputs = [ cmake @@ -46,42 +43,51 @@ stdenv.mkDerivation (finalAttrs: { clr ]; - buildInputs = lib.optionals buildTests [ - gtest - ] ++ lib.optionals buildBenchmarks [ - gbenchmark - ]; + buildInputs = + lib.optionals buildTests [ + gtest + ] + ++ lib.optionals buildBenchmarks [ + gbenchmark + ]; - cmakeFlags = [ - "-DCMAKE_CXX_COMPILER=hipcc" - # Manually define CMAKE_INSTALL_ - # See: https://github.com/NixOS/nixpkgs/pull/197838 - "-DCMAKE_INSTALL_BINDIR=bin" - "-DCMAKE_INSTALL_LIBDIR=lib" - "-DCMAKE_INSTALL_INCLUDEDIR=include" - ] ++ lib.optionals (gpuTargets != [ ]) [ - "-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}" - ] ++ lib.optionals buildTests [ - "-DBUILD_TEST=ON" - ] ++ lib.optionals buildBenchmarks [ - "-DBUILD_BENCHMARK=ON" - ]; + cmakeFlags = + [ + "-DCMAKE_BUILD_TYPE=Release" + # Manually define CMAKE_INSTALL_ + # See: https://github.com/NixOS/nixpkgs/pull/197838 + "-DCMAKE_INSTALL_BINDIR=bin" + "-DCMAKE_INSTALL_LIBDIR=lib" + "-DCMAKE_INSTALL_INCLUDEDIR=include" + ] + ++ lib.optionals (gpuTargets != [ ]) [ + "-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}" + ] + ++ lib.optionals buildTests [ + "-DBUILD_TEST=ON" + ] + ++ lib.optionals buildBenchmarks [ + "-DBUILD_BENCHMARK=ON" + ]; - postInstall = lib.optionalString buildTests '' - mkdir -p $test/bin - mv $out/bin/test_* $test/bin - mv $out/bin/rocprim $test/bin - '' + lib.optionalString buildBenchmarks '' - mkdir -p $benchmark/bin - mv $out/bin/benchmark_* $benchmark/bin - '' + lib.optionalString (buildTests || buildBenchmarks) '' - rmdir $out/bin - ''; + postInstall = + lib.optionalString buildTests '' + mkdir -p $test/bin + mv $out/bin/test_* $test/bin + mv $out/bin/rocprim $test/bin + '' + + lib.optionalString buildBenchmarks '' + mkdir -p $benchmark/bin + mv $out/bin/benchmark_* $benchmark/bin + '' + + lib.optionalString (buildTests || buildBenchmarks) '' + rmdir $out/bin + ''; passthru.updateScript = rocmUpdateScript { name = finalAttrs.pname; - owner = finalAttrs.src.owner; - repo = finalAttrs.src.repo; + inherit (finalAttrs.src) owner; + inherit (finalAttrs.src) repo; }; meta = with lib; { @@ -90,6 +96,5 @@ stdenv.mkDerivation (finalAttrs: { license = with licenses; [ mit ]; maintainers = teams.rocm.members; platforms = platforms.linux; - broken = versions.minor finalAttrs.version != versions.minor stdenv.cc.version || versionAtLeast finalAttrs.version "7.0.0"; }; }) diff --git a/pkgs/development/rocm-modules/6/rocprofiler-register/default.nix b/pkgs/development/rocm-modules/6/rocprofiler-register/default.nix new file mode 100644 index 0000000000000..d7b48bd213f8a --- /dev/null +++ b/pkgs/development/rocm-modules/6/rocprofiler-register/default.nix @@ -0,0 +1,82 @@ +{ + lib, + stdenv, + rocm-runtime, + rocprofiler, + numactl, + libpciaccess, + libxml2, + elfutils, + fetchFromGitHub, + rocmUpdateScript, + cmake, + clang, + clr, + python3Packages, + gpuTargets ? clr.gpuTargets, +}: + +stdenv.mkDerivation (finalAttrs: { + pname = "rocprofiler-register"; + version = "6.3.1"; + + src = fetchFromGitHub { + owner = "ROCm"; + repo = "rocprofiler-register"; + rev = "rocm-${finalAttrs.version}"; + hash = "sha256-UZsCiGnudsbL1v5lKBx7Vz3/HRnGn4f86Pd+qu3ryh0="; + fetchSubmodules = true; + }; + + patches = [ + ]; + + nativeBuildInputs = [ + cmake + clang + clr + ]; + + buildInputs = [ + numactl + libpciaccess + libxml2 + elfutils + rocm-runtime + + rocprofiler.rocmtoolkit-merged + + python3Packages.lxml + python3Packages.cppheaderparser + python3Packages.pyyaml + python3Packages.barectf + python3Packages.pandas + ]; + cmakeFlags = [ + "-DCMAKE_MODULE_PATH=${clr}/lib/cmake/hip" + "-DHIP_ROOT_DIR=${clr}" + "-DGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}" + # Manually define CMAKE_INSTALL_ + # See: https://github.com/NixOS/nixpkgs/pull/197838 + "-DBUILD_TEST=OFF" + "-DROCPROFILER_BUILD_TESTS=0" + "-DROCPROFILER_BUILD_SAMPLES=0" + "-DCMAKE_INSTALL_BINDIR=bin" + "-DCMAKE_INSTALL_LIBDIR=lib" + "-DCMAKE_INSTALL_INCLUDEDIR=include" + ]; + + passthru.updateScript = rocmUpdateScript { + name = finalAttrs.pname; + inherit (finalAttrs.src) owner; + inherit (finalAttrs.src) repo; + }; + + meta = with lib; { + description = "Profiling with perf-counters and derived metrics"; + homepage = "https://github.com/ROCm/rocprofiler"; + license = with licenses; [ mit ]; # mitx11 + maintainers = teams.rocm.members; + platforms = platforms.linux; + }; +}) diff --git a/pkgs/development/rocm-modules/6/rocprofiler/default.nix b/pkgs/development/rocm-modules/6/rocprofiler/default.nix index c3a0bf7112a62..62f49539d3149 100644 --- a/pkgs/development/rocm-modules/6/rocprofiler/default.nix +++ b/pkgs/development/rocm-modules/6/rocprofiler/default.nix @@ -4,17 +4,14 @@ fetchFromGitHub, rocmUpdateScript, symlinkJoin, - substituteAll, cmake, clang, clr, rocm-core, - rocm-thunk, + rocm-runtime, rocm-device-libs, roctracer, rocdbgapi, - rocm-smi, - hsa-amd-aqlprofile-bin, numactl, libpciaccess, libxml2, @@ -22,6 +19,7 @@ mpi, systemd, gtest, + git, python3Packages, gpuTargets ? clr.gpuTargets, }: @@ -32,12 +30,10 @@ let paths = [ rocm-core - rocm-thunk + rocm-runtime rocm-device-libs roctracer rocdbgapi - rocm-smi - hsa-amd-aqlprofile-bin clr ]; @@ -48,33 +44,36 @@ let in stdenv.mkDerivation (finalAttrs: { pname = "rocprofiler"; - version = "6.0.2"; + version = "6.3.1"; src = fetchFromGitHub { owner = "ROCm"; repo = "rocprofiler"; rev = "rocm-${finalAttrs.version}"; - hash = "sha256-yzgw9g5cHAZpdbU44+1ScZyUcZ2I4GGfjbm9GSqCClk="; + hash = "sha256-kLiqKwxpeAkziBq4FRWhJ3IAvxVRcxi3AEEWgcVOfw4="; + fetchSubmodules = true; }; patches = [ # These just simply won't build ./0000-dont-install-tests-hsaco.patch + ./optional-aql-in-cmake.patch # Fix bad paths - (substituteAll { - src = ./0001-fix-shell-scripts.patch; - rocmtoolkit_merged = rocmtoolkit-merged; - }) + # (substituteAll { + # src = ./0001-fix-shell-scripts.patch; + # rocmtoolkit_merged = rocmtoolkit-merged; + # }) - # Fix for missing uint32_t not defined - ./0002-include-stdint-in-version.patch + # # Fix for missing uint32_t not defined + # ./0002-include-stdint-in-version.patch ]; nativeBuildInputs = [ cmake clang clr + git python3Packages.lxml python3Packages.cppheaderparser python3Packages.pyyaml @@ -94,12 +93,20 @@ stdenv.mkDerivation (finalAttrs: { propagatedBuildInputs = [ rocmtoolkit-merged ]; + # HACK: allow building without aqlprofile, probably explodes at runtime if use profiling + env.LDFLAGS = "-z nodefs -Wl,-undefined,dynamic_lookup,--unresolved-symbols=ignore-all"; + #HACK: rocprofiler's cmake doesn't add these deps properly + env.CXXFLAGS = "-I${libpciaccess}/include -I${numactl.dev}/include -I${rocmtoolkit-merged}/include -I${elfutils.dev}/include -w"; + cmakeFlags = [ "-DCMAKE_MODULE_PATH=${clr}/lib/cmake/hip" "-DHIP_ROOT_DIR=${clr}" "-DGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}" # Manually define CMAKE_INSTALL_ # See: https://github.com/NixOS/nixpkgs/pull/197838 + "-DBUILD_TEST=OFF" + "-DROCPROFILER_BUILD_TESTS=0" + "-DROCPROFILER_BUILD_SAMPLES=0" "-DCMAKE_INSTALL_BINDIR=bin" "-DCMAKE_INSTALL_LIBDIR=lib" "-DCMAKE_INSTALL_INCLUDEDIR=include" @@ -108,6 +115,13 @@ stdenv.mkDerivation (finalAttrs: { postPatch = '' patchShebangs . + substituteInPlace cmake_modules/rocprofiler_utils.cmake \ + --replace-fail 'function(ROCPROFILER_CHECKOUT_GIT_SUBMODULE)' 'function(ROCPROFILER_CHECKOUT_GIT_SUBMODULE) + return()' + + substituteInPlace CMakeLists.txt \ + --replace-fail 'set(ROCPROFILER_BUILD_TESTS ON)' "" + substituteInPlace tests-v2/featuretests/profiler/CMakeLists.txt \ --replace "--build-id=sha1" "--build-id=sha1 --rocm-path=${clr} --rocm-device-lib-path=${rocm-device-libs}/amdgcn/bitcode" @@ -117,8 +131,8 @@ stdenv.mkDerivation (finalAttrs: { postInstall = '' # Why do these not already have the executable bit set? - chmod +x $out/lib/rocprofiler/librocprof-tool.so - chmod +x $out/share/rocprofiler/tests-v1/test/ocl/SimpleConvolution + # chmod +x $out/lib/rocprofiler/librocprof-tool.so + # chmod +x $out/share/rocprofiler/tests-v1/test/ocl/SimpleConvolution # Why do these have the executable bit set? chmod -x $out/libexec/rocprofiler/counters/basic_counters.xml @@ -127,9 +141,10 @@ stdenv.mkDerivation (finalAttrs: { passthru.updateScript = rocmUpdateScript { name = finalAttrs.pname; - owner = finalAttrs.src.owner; - repo = finalAttrs.src.repo; + inherit (finalAttrs.src) owner; + inherit (finalAttrs.src) repo; }; + passthru.rocmtoolkit-merged = rocmtoolkit-merged; meta = with lib; { description = "Profiling with perf-counters and derived metrics"; @@ -137,8 +152,5 @@ stdenv.mkDerivation (finalAttrs: { license = with licenses; [ mit ]; # mitx11 maintainers = teams.rocm.members; platforms = platforms.linux; - broken = - versions.minor finalAttrs.version != versions.minor clr.version - || versionAtLeast finalAttrs.version "7.0.0"; }; }) diff --git a/pkgs/development/rocm-modules/6/rocprofiler/optional-aql-in-cmake.patch b/pkgs/development/rocm-modules/6/rocprofiler/optional-aql-in-cmake.patch new file mode 100644 index 0000000000000..83bf88fb1e7e8 --- /dev/null +++ b/pkgs/development/rocm-modules/6/rocprofiler/optional-aql-in-cmake.patch @@ -0,0 +1,147 @@ +From https://raw.githubusercontent.com/AphidGit/rocm_compile/refs/heads/main/rocprofiler.patch +diff --git a/cmake_modules/rocprofiler_env.cmake b/cmake_modules/rocprofiler_env.cmake +index 7b7c472..0aba3ed 100644 +--- a/cmake_modules/rocprofiler_env.cmake ++++ b/cmake_modules/rocprofiler_env.cmake +@@ -36,6 +36,7 @@ if(ROCPROFILER_DEBUG_TRACE) + target_compile_definitions(rocprofiler-build-flags INTERFACE DEBUG_TRACE=1) + endif() + ++set(ROCPROFILER_LD_AQLPROFILE false) + # Enable direct loading of AQL-profile HSA extension + if(ROCPROFILER_LD_AQLPROFILE) + target_compile_definitions(rocprofiler-build-flags INTERFACE ROCP_LD_AQLPROFILE=1) +@@ -80,9 +81,3 @@ if("${ROCM_ROOT_DIR}" STREQUAL "") + message(FATAL_ERROR "ROCM_ROOT_DIR is not found.") + endif() + +-find_library( +- HSA_AMD_AQLPROFILE_LIBRARY +- NAMES hsa-amd-aqlprofile64 +- HINTS ${CMAKE_PREFIX_PATH} +- PATHS ${ROCM_ROOT_DIR} +- PATH_SUFFIXES lib REQUIRED) +diff --git a/src/api/CMakeLists.txt b/src/api/CMakeLists.txt +index 61782f0..16c83bf 100644 +--- a/src/api/CMakeLists.txt ++++ b/src/api/CMakeLists.txt +@@ -51,15 +51,6 @@ find_file( + NO_DEFAULT_PATH REQUIRED) + get_filename_component(HSA_RUNTIME_INC_PATH ${HSA_H} DIRECTORY) + +-find_library( +- AQLPROFILE_LIB "libhsa-amd-aqlprofile64.so" +- HINTS ${CMAKE_PREFIX_PATH} +- PATHS ${ROCM_PATH} +- PATH_SUFFIXES lib) +- +-if(NOT AQLPROFILE_LIB) +- message(FATAL_ERROR "AQL_PROFILE not installed. Please install hsa-amd-aqlprofile!") +-endif() + + # ######################################################################################## + # Adding Old Library Files +@@ -247,7 +238,7 @@ target_include_directories( + PUBLIC $ + PRIVATE ${LIB_DIR} ${ROOT_DIR} ${PROJECT_SOURCE_DIR}/include/rocprofiler) + target_link_libraries( +- ${ROCPROFILER_TARGET} PRIVATE ${AQLPROFILE_LIB} hsa-runtime64::hsa-runtime64 c stdc++ ++ ${ROCPROFILER_TARGET} PRIVATE hsa-runtime64::hsa-runtime64 c stdc++ + dl rocprofiler::build-flags rocprofiler::memcheck) + + get_target_property(ROCPROFILER_LIBRARY_V1_NAME ${ROCPROFILER_TARGET} NAME) +@@ -325,8 +316,7 @@ target_link_options( + -Wl,--no-undefined) + target_link_libraries( + rocprofiler-v2 +- PRIVATE ${AQLPROFILE_LIB} +- hsa-runtime64::hsa-runtime64 ++ PRIVATE hsa-runtime64::hsa-runtime64 + Threads::Threads + atomic + numa +diff --git a/src/util/hsa_rsrc_factory.cpp b/src/util/hsa_rsrc_factory.cpp +index 2c47186..6b39634 100644 +--- a/src/util/hsa_rsrc_factory.cpp ++++ b/src/util/hsa_rsrc_factory.cpp +@@ -155,17 +155,6 @@ HsaRsrcFactory::HsaRsrcFactory(bool initialize_hsa) : initialize_hsa_(initialize + if (kern_arg_pool_ == nullptr) + CHECK_STATUS("Kern-arg memory pool is not found", HSA_STATUS_ERROR); + +- // Get AqlProfile API table +- aqlprofile_api_ = {}; +-#ifdef ROCP_LD_AQLPROFILE +- status = LoadAqlProfileLib(&aqlprofile_api_); +-#else +- status = hsa_api_.hsa_system_get_major_extension_table(HSA_EXTENSION_AMD_AQLPROFILE, +- hsa_ven_amd_aqlprofile_VERSION_MAJOR, +- sizeof(aqlprofile_api_), &aqlprofile_api_); +-#endif +- CHECK_STATUS("aqlprofile API table load failed", status); +- + // Get Loader API table + loader_api_ = {}; + status = hsa_api_.hsa_system_get_major_extension_table(HSA_EXTENSION_AMD_LOADER, 1, +diff --git a/test/util/hsa_rsrc_factory.cpp b/test/util/hsa_rsrc_factory.cpp +index 0a44d18..fab5b75 100644 +--- a/test/util/hsa_rsrc_factory.cpp ++++ b/test/util/hsa_rsrc_factory.cpp +@@ -137,17 +137,6 @@ HsaRsrcFactory::HsaRsrcFactory(bool initialize_hsa) : initialize_hsa_(initialize + if (cpu_pool_ == NULL) CHECK_STATUS("CPU memory pool is not found", HSA_STATUS_ERROR); + if (kern_arg_pool_ == NULL) CHECK_STATUS("Kern-arg memory pool is not found", HSA_STATUS_ERROR); + +- // Get AqlProfile API table +- aqlprofile_api_ = {0}; +-#ifdef ROCP_LD_AQLPROFILE +- status = LoadAqlProfileLib(&aqlprofile_api_); +-#else +- status = hsa_api_.hsa_system_get_major_extension_table(HSA_EXTENSION_AMD_AQLPROFILE, +- hsa_ven_amd_aqlprofile_VERSION_MAJOR, +- sizeof(aqlprofile_api_), &aqlprofile_api_); +-#endif +- CHECK_STATUS("aqlprofile API table load failed", status); +- + // Get Loader API table + loader_api_ = {0}; + status = hsa_api_.hsa_system_get_major_extension_table(HSA_EXTENSION_AMD_LOADER, 1, +diff --git a/tests-v2/unittests/core/CMakeLists.txt b/tests-v2/unittests/core/CMakeLists.txt +index 107cb51..0f6d4bf 100644 +--- a/tests-v2/unittests/core/CMakeLists.txt ++++ b/tests-v2/unittests/core/CMakeLists.txt +@@ -235,8 +235,7 @@ set_target_properties(runCoreUnitTests PROPERTIES + INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_DATAROOTDIR}/${PROJECT_NAME}/tests") + target_link_libraries( + runCoreUnitTests +- PRIVATE ${AQLPROFILE_LIB} +- test_hsatool_library ++ PRIVATE test_hsatool_library + hsa-runtime64::hsa-runtime64 + Threads::Threads + GTest::gtest GTest::gtest_main +@@ -285,4 +284,4 @@ endif() + # for the *_FilePlugin tests + if(NOT EXISTS "${PROJECT_BINARY_DIR}/test-output") + file(MAKE_DIRECTORY "${PROJECT_BINARY_DIR}/test-output") +-endif() +\ No newline at end of file ++endif() +diff --git a/tests-v2/unittests/profiler/CMakeLists.txt b/tests-v2/unittests/profiler/CMakeLists.txt +index 53180d5..0c4d4a7 100644 +--- a/tests-v2/unittests/profiler/CMakeLists.txt ++++ b/tests-v2/unittests/profiler/CMakeLists.txt +@@ -122,7 +122,7 @@ target_compile_definitions( + PRIVATE PROF_API_IMPL HIP_PROF_HIP_API_STRING=1 __HIP_PLATFORM_AMD__=1) + + target_link_libraries( +- runUnitTests PRIVATE rocprofiler-v2 ${AQLPROFILE_LIB} hsa-runtime64::hsa-runtime64 ++ runUnitTests PRIVATE rocprofiler-v2 hsa-runtime64::hsa-runtime64 + GTest::gtest GTest::gtest_main stdc++fs ${PCIACCESS_LIBRARIES} dw elf c dl) + + add_dependencies(tests runUnitTests) +@@ -158,4 +158,4 @@ endif() + # for the *_FilePlugin tests + if(NOT EXISTS "${PROJECT_BINARY_DIR}/test-output") + file(MAKE_DIRECTORY "${PROJECT_BINARY_DIR}/test-output") +-endif() +\ No newline at end of file ++endif() diff --git a/pkgs/development/rocm-modules/6/rocr-debug-agent/default.nix b/pkgs/development/rocm-modules/6/rocr-debug-agent/default.nix index 6d9a91be00ef1..cc1a5f973d9f0 100644 --- a/pkgs/development/rocm-modules/6/rocr-debug-agent/default.nix +++ b/pkgs/development/rocm-modules/6/rocr-debug-agent/default.nix @@ -12,13 +12,13 @@ stdenv.mkDerivation (finalAttrs: { pname = "rocr-debug-agent"; - version = "6.0.2"; + version = "6.3.1"; src = fetchFromGitHub { owner = "ROCm"; repo = "rocr_debug_agent"; rev = "rocm-${finalAttrs.version}"; - hash = "sha256-8Q800T7mwBy8/rujVNyCQ0ZpZ9uPKKk+Sv9ibpWou/8="; + hash = "sha256-HYag5/E72hopDhS9EVcdyGgSvzbCMzKqLC+SIS28Y9M="; }; nativeBuildInputs = [ @@ -45,8 +45,8 @@ stdenv.mkDerivation (finalAttrs: { passthru.updateScript = rocmUpdateScript { name = finalAttrs.pname; - owner = finalAttrs.src.owner; - repo = finalAttrs.src.repo; + inherit (finalAttrs.src) owner; + inherit (finalAttrs.src) repo; }; meta = with lib; { @@ -55,8 +55,5 @@ stdenv.mkDerivation (finalAttrs: { license = with licenses; [ ncsa ]; maintainers = teams.rocm.members; platforms = platforms.linux; - broken = - versions.minor finalAttrs.version != versions.minor stdenv.cc.version - || versionAtLeast finalAttrs.version "7.0.0"; }; }) diff --git a/pkgs/development/rocm-modules/6/rocrand/default.nix b/pkgs/development/rocm-modules/6/rocrand/default.nix index 7d0e80b5af4e6..02c96b5a8c18a 100644 --- a/pkgs/development/rocm-modules/6/rocrand/default.nix +++ b/pkgs/development/rocm-modules/6/rocrand/default.nix @@ -1,34 +1,38 @@ -{ lib -, stdenv -, fetchFromGitHub -, rocmUpdateScript -, cmake -, rocm-cmake -, clr -, gtest -, gbenchmark -, buildTests ? false -, buildBenchmarks ? false -, gpuTargets ? [ ] +{ + lib, + stdenv, + fetchFromGitHub, + rocmUpdateScript, + cmake, + rocm-cmake, + clr, + gtest, + gbenchmark, + buildTests ? false, + buildBenchmarks ? false, + gpuTargets ? clr.localGpuTargets or [ ], }: stdenv.mkDerivation (finalAttrs: { - pname = "rocrand"; - version = "6.0.2"; + pname = "rocrand${clr.gpuArchSuffix}"; + version = "6.3.1"; - outputs = [ - "out" - ] ++ lib.optionals buildTests [ - "test" - ] ++ lib.optionals buildBenchmarks [ - "benchmark" - ]; + outputs = + [ + "out" + ] + ++ lib.optionals buildTests [ + "test" + ] + ++ lib.optionals buildBenchmarks [ + "benchmark" + ]; src = fetchFromGitHub { owner = "ROCm"; repo = "rocRAND"; rev = "rocm-${finalAttrs.version}"; - hash = "sha256-BBkcYOP+zh3OQTxuSkeiJizwnE9Gr5Jbhx0e8SU/mmU="; + hash = "sha256-rrRLPqEw39M+6dtPW8DcnQiSZNwxWNINJ1wjU098Vkk="; }; nativeBuildInputs = [ @@ -37,45 +41,52 @@ stdenv.mkDerivation (finalAttrs: { clr ]; - buildInputs = lib.optionals buildTests [ - gtest - ] ++ lib.optionals buildBenchmarks [ - gbenchmark - ]; + buildInputs = + lib.optionals buildTests [ + gtest + ] + ++ lib.optionals buildBenchmarks [ + gbenchmark + ]; - cmakeFlags = [ - "-DCMAKE_C_COMPILER=hipcc" - "-DCMAKE_CXX_COMPILER=hipcc" - "-DHIP_ROOT_DIR=${clr}" - # Manually define CMAKE_INSTALL_ - # See: https://github.com/NixOS/nixpkgs/pull/197838 - "-DCMAKE_INSTALL_BINDIR=bin" - "-DCMAKE_INSTALL_LIBDIR=lib" - "-DCMAKE_INSTALL_INCLUDEDIR=include" - ] ++ lib.optionals (gpuTargets != [ ]) [ - "-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}" - ] ++ lib.optionals buildTests [ - "-DBUILD_TEST=ON" - ] ++ lib.optionals buildBenchmarks [ - "-DBUILD_BENCHMARK=ON" - ]; + cmakeFlags = + [ + "-DHIP_ROOT_DIR=${clr}" + # Manually define CMAKE_INSTALL_ + # See: https://github.com/NixOS/nixpkgs/pull/197838 + "-DCMAKE_INSTALL_BINDIR=bin" + "-DCMAKE_INSTALL_LIBDIR=lib" + "-DCMAKE_INSTALL_INCLUDEDIR=include" + ] + ++ lib.optionals (gpuTargets != [ ]) [ + "-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}" + ] + ++ lib.optionals buildTests [ + "-DBUILD_TEST=ON" + ] + ++ lib.optionals buildBenchmarks [ + "-DBUILD_BENCHMARK=ON" + ]; - postInstall = lib.optionalString buildTests '' - mkdir -p $test/bin - mv $out/bin/test_* $test/bin - '' + lib.optionalString buildBenchmarks '' - mkdir -p $benchmark/bin - mv $out/bin/benchmark_* $benchmark/bin - '' + lib.optionalString (buildTests || buildBenchmarks) '' - rm -r $out/bin/rocRAND - # Fail if bin/ isn't actually empty - rmdir $out/bin - ''; + postInstall = + lib.optionalString buildTests '' + mkdir -p $test/bin + mv $out/bin/test_* $test/bin + '' + + lib.optionalString buildBenchmarks '' + mkdir -p $benchmark/bin + mv $out/bin/benchmark_* $benchmark/bin + '' + + lib.optionalString (buildTests || buildBenchmarks) '' + rm -r $out/bin/rocRAND + # Fail if bin/ isn't actually empty + rmdir $out/bin + ''; passthru.updateScript = rocmUpdateScript { name = finalAttrs.pname; - owner = finalAttrs.src.owner; - repo = finalAttrs.src.repo; + inherit (finalAttrs.src) owner; + inherit (finalAttrs.src) repo; }; meta = with lib; { @@ -84,6 +95,5 @@ stdenv.mkDerivation (finalAttrs: { license = with licenses; [ mit ]; maintainers = teams.rocm.members; platforms = platforms.linux; - broken = versions.minor finalAttrs.version != versions.minor stdenv.cc.version || versionAtLeast finalAttrs.version "7.0.0"; }; }) diff --git a/pkgs/development/rocm-modules/6/rocsolver/default.nix b/pkgs/development/rocm-modules/6/rocsolver/default.nix index 577ef73d25343..f9126fd188be2 100644 --- a/pkgs/development/rocm-modules/6/rocsolver/default.nix +++ b/pkgs/development/rocm-modules/6/rocsolver/default.nix @@ -6,6 +6,7 @@ cmake, rocm-cmake, rocblas, + rocprim, rocsparse, clr, fmt, @@ -14,12 +15,25 @@ lapack-reference, buildTests ? false, buildBenchmarks ? false, - gpuTargets ? [ ], # gpuTargets = [ "gfx803" "gfx900" "gfx906:xnack-" ] + gpuTargets ? ( + clr.localGpuTargets or [ + "gfx900" + "gfx906" + "gfx908" + "gfx90a" + "gfx942" + "gfx1010" + "gfx1030" + "gfx1100" + "gfx1101" + "gfx1102" + ] + ), }: stdenv.mkDerivation (finalAttrs: { - pname = "rocsolver"; - version = "6.0.2"; + pname = "rocsolver${clr.gpuArchSuffix}"; + version = "6.3.1"; outputs = [ @@ -36,12 +50,13 @@ stdenv.mkDerivation (finalAttrs: { owner = "ROCm"; repo = "rocSOLVER"; rev = "rocm-${finalAttrs.version}"; - hash = "sha256-tglQpwCSFABRuEDiJrzQVFIdx9p85E2MiUYN0aoTAXo="; + hash = "sha256-+sGU+0CB48iolJSyYo+xH36q5LCUp+nKtOYbguzMuhg="; }; nativeBuildInputs = [ cmake + # no ninja, it buffers console output and nix times out long periods of no output rocm-cmake clr ] @@ -51,7 +66,11 @@ stdenv.mkDerivation (finalAttrs: { buildInputs = [ + # FIXME: rocblas and rocsolver can't build in parallel + # but rocsolver doesn't need rocblas' offload builds at build time + # could we build against a rocblas-minimal? rocblas + rocprim rocsparse fmt ] @@ -62,10 +81,16 @@ stdenv.mkDerivation (finalAttrs: { lapack-reference ]; + # Reduce parallelism of build to account for internal parallelism from HIP_CLANG_NUM_PARALLEL_JOBS + preConfigure = '' + export NIX_BUILD_CORES=$((1 + NIX_BUILD_CORES/10)) + makeFlagsArray+=("-l$(nproc)") + ''; cmakeFlags = [ - "-DCMAKE_CXX_COMPILER=hipcc" - "-DCMAKE_CXX_FLAGS=-Wno-switch" # Way too many warnings + "-DHIP_CLANG_NUM_PARALLEL_JOBS=10" + "-DCMAKE_BUILD_TYPE=Release" + "-DCMAKE_VERBOSE_MAKEFILE=ON" # Manually define CMAKE_INSTALL_ # See: https://github.com/NixOS/nixpkgs/pull/197838 "-DCMAKE_INSTALL_BINDIR=bin" @@ -97,10 +122,11 @@ stdenv.mkDerivation (finalAttrs: { passthru.updateScript = rocmUpdateScript { name = finalAttrs.pname; - owner = finalAttrs.src.owner; - repo = finalAttrs.src.repo; + inherit (finalAttrs.src) owner; + inherit (finalAttrs.src) repo; }; + enableParallelBuilding = true; requiredSystemFeatures = [ "big-parallel" ]; meta = with lib; { @@ -111,8 +137,5 @@ stdenv.mkDerivation (finalAttrs: { platforms = platforms.linux; timeout = 14400; # 4 hours maxSilent = 14400; # 4 hours - broken = - versions.minor finalAttrs.version != versions.minor stdenv.cc.version - || versionAtLeast finalAttrs.version "7.0.0"; }; }) diff --git a/pkgs/development/rocm-modules/6/rocsparse/default.nix b/pkgs/development/rocm-modules/6/rocsparse/default.nix index a4c29bc980c92..422e6ef9b9fef 100644 --- a/pkgs/development/rocm-modules/6/rocsparse/default.nix +++ b/pkgs/development/rocm-modules/6/rocsparse/default.nix @@ -15,12 +15,12 @@ python3Packages, buildTests ? false, buildBenchmarks ? false, # Seems to depend on tests - gpuTargets ? [ ], + gpuTargets ? clr.localGpuTargets or clr.gpuTargets, }: stdenv.mkDerivation (finalAttrs: { pname = "rocsparse"; - version = "6.0.2"; + version = "6.3.1"; outputs = [ @@ -37,11 +37,14 @@ stdenv.mkDerivation (finalAttrs: { owner = "ROCm"; repo = "rocSPARSE"; rev = "rocm-${finalAttrs.version}"; - hash = "sha256-nTYnEHkTtq0jBeMj4HXpqkJu8LQc+Z6mpjhMP7tJAHQ="; + hash = "sha256-vyLfXbnxPZlR6mfbLh1E7S7HdOSHjuhGQcfihAlvvwY="; }; + # env.CFLAGS = "-fsanitize=undefined"; + # env.CXXFLAGS = "-fsanitize=undefined"; nativeBuildInputs = [ cmake + # no ninja, it buffers console output and nix times out long periods of no output rocm-cmake clr gfortran @@ -61,7 +64,7 @@ stdenv.mkDerivation (finalAttrs: { cmakeFlags = [ - "-DCMAKE_CXX_COMPILER=hipcc" + "-DCMAKE_BUILD_TYPE=Release" # Manually define CMAKE_INSTALL_ # See: https://github.com/NixOS/nixpkgs/pull/197838 "-DCMAKE_INSTALL_BINDIR=bin" @@ -145,8 +148,8 @@ stdenv.mkDerivation (finalAttrs: { updateScript = rocmUpdateScript { name = finalAttrs.pname; - owner = finalAttrs.src.owner; - repo = finalAttrs.src.repo; + inherit (finalAttrs.src) owner; + inherit (finalAttrs.src) repo; }; }; @@ -156,8 +159,5 @@ stdenv.mkDerivation (finalAttrs: { license = with licenses; [ mit ]; maintainers = teams.rocm.members; platforms = platforms.linux; - broken = - versions.minor finalAttrs.version != versions.minor stdenv.cc.version - || versionAtLeast finalAttrs.version "7.0.0"; }; }) diff --git a/pkgs/development/rocm-modules/6/rocthrust/default.nix b/pkgs/development/rocm-modules/6/rocthrust/default.nix index 2044968441ff8..05c12afcd4375 100644 --- a/pkgs/development/rocm-modules/6/rocthrust/default.nix +++ b/pkgs/development/rocm-modules/6/rocthrust/default.nix @@ -15,7 +15,7 @@ stdenv.mkDerivation (finalAttrs: { pname = "rocthrust"; - version = "6.0.2"; + version = "6.3.1"; outputs = [ @@ -32,7 +32,7 @@ stdenv.mkDerivation (finalAttrs: { owner = "ROCm"; repo = "rocThrust"; rev = "rocm-${finalAttrs.version}"; - hash = "sha256-Zk7FxcedaDUbx9RCX8aWN0xZO/B5cOs/l5MDqZKQpJo="; + hash = "sha256-c1+hqP/LipaQ2/lPJo79YBd9H0n0Y7yHkxe0/INE14s="; }; nativeBuildInputs = [ @@ -48,7 +48,6 @@ stdenv.mkDerivation (finalAttrs: { cmakeFlags = [ - "-DCMAKE_CXX_COMPILER=hipcc" "-DHIP_ROOT_DIR=${clr}" # Manually define CMAKE_INSTALL_ # See: https://github.com/NixOS/nixpkgs/pull/197838 @@ -64,9 +63,6 @@ stdenv.mkDerivation (finalAttrs: { ] ++ lib.optionals buildBenchmarks [ "-DBUILD_BENCHMARKS=ON" - ] - ++ lib.optionals (buildTests || buildBenchmarks) [ - "-DCMAKE_CXX_FLAGS=-Wno-deprecated-builtins" # Too much spam ]; postInstall = @@ -84,8 +80,8 @@ stdenv.mkDerivation (finalAttrs: { passthru.updateScript = rocmUpdateScript { name = finalAttrs.pname; - owner = finalAttrs.src.owner; - repo = finalAttrs.src.repo; + inherit (finalAttrs.src) owner; + inherit (finalAttrs.src) repo; }; meta = with lib; { @@ -94,8 +90,5 @@ stdenv.mkDerivation (finalAttrs: { license = with licenses; [ asl20 ]; maintainers = teams.rocm.members; platforms = platforms.linux; - broken = - versions.minor finalAttrs.version != versions.minor stdenv.cc.version - || versionAtLeast finalAttrs.version "7.0.0"; }; }) diff --git a/pkgs/development/rocm-modules/6/roctracer/default.nix b/pkgs/development/rocm-modules/6/roctracer/default.nix index b5e411dd059d6..5ddd3ced37fc1 100644 --- a/pkgs/development/rocm-modules/6/roctracer/default.nix +++ b/pkgs/development/rocm-modules/6/roctracer/default.nix @@ -19,7 +19,7 @@ stdenv.mkDerivation (finalAttrs: { pname = "roctracer"; - version = "6.0.2"; + version = "6.3.1"; outputs = [ @@ -36,7 +36,7 @@ stdenv.mkDerivation (finalAttrs: { owner = "ROCm"; repo = "roctracer"; rev = "rocm-${finalAttrs.version}"; - hash = "sha256-a6/N6W3JXVI0VZRGxlS3cVENC3VTP1w9UFnd0+EWAuo="; + hash = "sha256-GhnF7rqNLQLLB7nzIp0xNqyqBOwj9ZJ+hzzj1EAaXWU="; }; nativeBuildInputs = @@ -106,8 +106,8 @@ stdenv.mkDerivation (finalAttrs: { passthru.updateScript = rocmUpdateScript { name = finalAttrs.pname; - owner = finalAttrs.src.owner; - repo = finalAttrs.src.repo; + inherit (finalAttrs.src) owner; + inherit (finalAttrs.src) repo; }; meta = with lib; { @@ -116,8 +116,5 @@ stdenv.mkDerivation (finalAttrs: { license = with licenses; [ mit ]; # mitx11 maintainers = teams.rocm.members; platforms = platforms.linux; - broken = - versions.minor finalAttrs.version != versions.minor clr.version - || versionAtLeast finalAttrs.version "7.0.0"; }; }) diff --git a/pkgs/development/rocm-modules/6/rocwmma/default.nix b/pkgs/development/rocm-modules/6/rocwmma/default.nix index 7f6669312fd13..d529bb8d241d4 100644 --- a/pkgs/development/rocm-modules/6/rocwmma/default.nix +++ b/pkgs/development/rocm-modules/6/rocwmma/default.nix @@ -14,12 +14,12 @@ buildExtendedTests ? false, buildBenchmarks ? false, buildSamples ? false, - gpuTargets ? [ ], # gpuTargets = [ "gfx908:xnack-" "gfx90a:xnack-" "gfx90a:xnack+" ... ] + gpuTargets ? [ ], }: stdenv.mkDerivation (finalAttrs: { pname = "rocwmma"; - version = "6.0.2"; + version = "6.3.1"; outputs = [ @@ -39,7 +39,7 @@ stdenv.mkDerivation (finalAttrs: { owner = "ROCm"; repo = "rocWMMA"; rev = "rocm-${finalAttrs.version}"; - hash = "sha256-vbC4OuCmEpD38lVq0uXNw86iS4KkL6isOVq6vmlu1oM="; + hash = "sha256-kih3hn6QhcMmyj9n8f8eO+RIgKQgWKIuzg8fb0eoRPE="; }; patches = lib.optionals (buildTests || buildBenchmarks) [ @@ -64,7 +64,9 @@ stdenv.mkDerivation (finalAttrs: { cmakeFlags = [ - "-DCMAKE_CXX_COMPILER=hipcc" + "-DOpenMP_C_INCLUDE_DIR=${openmp.dev}/include" + "-DOpenMP_CXX_INCLUDE_DIR=${openmp.dev}/include" + "-DOpenMP_omp_LIBRARY=${openmp}/lib" "-DROCWMMA_BUILD_TESTS=${if buildTests || buildBenchmarks then "ON" else "OFF"}" "-DROCWMMA_BUILD_SAMPLES=${if buildSamples then "ON" else "OFF"}" # Manually define CMAKE_INSTALL_ @@ -105,8 +107,8 @@ stdenv.mkDerivation (finalAttrs: { passthru.updateScript = rocmUpdateScript { name = finalAttrs.pname; - owner = finalAttrs.src.owner; - repo = finalAttrs.src.repo; + inherit (finalAttrs.src) owner; + inherit (finalAttrs.src) repo; }; meta = with lib; { @@ -115,8 +117,5 @@ stdenv.mkDerivation (finalAttrs: { license = with licenses; [ mit ]; maintainers = teams.rocm.members; platforms = platforms.linux; - broken = - versions.minor finalAttrs.version != versions.minor stdenv.cc.version - || versionAtLeast finalAttrs.version "7.0.0"; }; }) diff --git a/pkgs/development/rocm-modules/6/rpp/default.nix b/pkgs/development/rocm-modules/6/rpp/default.nix index 31b67cc860576..cfde91f7d5237 100644 --- a/pkgs/development/rocm-modules/6/rpp/default.nix +++ b/pkgs/development/rocm-modules/6/rpp/default.nix @@ -29,13 +29,13 @@ stdenv.mkDerivation (finalAttrs: { "cpu" ); - version = "6.0.2"; + version = "6.3.1"; src = fetchFromGitHub { owner = "ROCm"; repo = "rpp"; rev = "rocm-${finalAttrs.version}"; - hash = "sha256-AquAVoEqlsBVxd41hG2sVo9UoSS+255eCQzIfGkC/Tk="; + hash = "sha256-METwagek17/DdZGaOTQqvyU6xGt7OBMLHk4YM4KmgtA="; }; nativeBuildInputs = @@ -55,16 +55,19 @@ stdenv.mkDerivation (finalAttrs: { boost ]; + CFLAGS = "-I${openmp.dev}/include"; + CXXFLAGS = "-I${openmp.dev}/include"; cmakeFlags = [ + "-DOpenMP_C_INCLUDE_DIR=${openmp.dev}/include" + "-DOpenMP_CXX_INCLUDE_DIR=${openmp.dev}/include" + "-DOpenMP_omp_LIBRARY=${openmp}/lib" "-DROCM_PATH=${clr}" ] ++ lib.optionals (gpuTargets != [ ]) [ "-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}" ] ++ lib.optionals (!useOpenCL && !useCPU) [ - "-DCMAKE_C_COMPILER=hipcc" - "-DCMAKE_CXX_COMPILER=hipcc" "-DBACKEND=HIP" ] ++ lib.optionals (useOpenCL && !useCPU) [ @@ -86,8 +89,8 @@ stdenv.mkDerivation (finalAttrs: { passthru.updateScript = rocmUpdateScript { name = finalAttrs.pname; - owner = finalAttrs.src.owner; - repo = finalAttrs.src.repo; + inherit (finalAttrs.src) owner; + inherit (finalAttrs.src) repo; }; meta = with lib; { @@ -96,8 +99,5 @@ stdenv.mkDerivation (finalAttrs: { license = with licenses; [ mit ]; maintainers = teams.rocm.members; platforms = platforms.linux; - broken = - versions.minor finalAttrs.version != versions.minor stdenv.cc.version - || versionAtLeast finalAttrs.version "7.0.0"; }; }) diff --git a/pkgs/development/rocm-modules/6/tensile/0001-solutionstructs-perf-fix.diff b/pkgs/development/rocm-modules/6/tensile/0001-solutionstructs-perf-fix.diff new file mode 100644 index 0000000000000..7157238042ece --- /dev/null +++ b/pkgs/development/rocm-modules/6/tensile/0001-solutionstructs-perf-fix.diff @@ -0,0 +1,48 @@ +diff --git a/Tensile/SolutionStructs.py b/Tensile/SolutionStructs.py +index f663c6f1..17bcf897 100644 +--- a/Tensile/SolutionStructs.py ++++ b/Tensile/SolutionStructs.py +@@ -4828,24 +4828,26 @@ class Solution(collections.abc.Mapping): + # create a dictionary of lists of parameter values + @staticmethod + def getSerialNaming(objs): ++ valid_params = sorted(validParameters.keys()) + data = {} +- for objIdx in range(0, len(objs)): +- obj = objs[objIdx] +- for paramName in sorted(obj.keys()): +- if paramName in list(validParameters.keys()): +- paramValue = obj[paramName] +- if paramName in data: +- if paramValue not in data[paramName]: +- data[paramName].append(paramValue) +- else: +- data[paramName] = [ paramValue ] +- maxObjs = 1 +- for paramName in data: +- if not isinstance(data[paramName][0],dict): +- data[paramName] = sorted(data[paramName]) +- maxObjs *= len(data[paramName]) +- numDigits = len(str(maxObjs)) +- return [ data, numDigits ] ++ ++ objs = [getattr(obj, "_state", obj) for obj in objs] ++ ++ for param in valid_params: ++ d = [] ++ for obj in objs: ++ if param in obj: ++ v = obj[param] ++ if v not in d: ++ d.append(v) ++ if len(d): ++ if not isinstance(d[0], dict): d.sort() ++ data[param] = d ++ ++ # Calculate max objects using prod() from math module ++ max_objs = math.prod(len(values) for values in data.values()) ++ num_digits = len(str(max_objs)) ++ return data, num_digits + + ######################################## + # Get Name Serial \ No newline at end of file diff --git a/pkgs/development/rocm-modules/6/tensile/0002-msgpack-zstd.diff b/pkgs/development/rocm-modules/6/tensile/0002-msgpack-zstd.diff new file mode 100644 index 0000000000000..fd1fcad2f8351 --- /dev/null +++ b/pkgs/development/rocm-modules/6/tensile/0002-msgpack-zstd.diff @@ -0,0 +1,56 @@ +diff --git a/Tensile/Source/lib/source/msgpack/MessagePack.cpp b/Tensile/Source/lib/source/msgpack/MessagePack.cpp +index de97929c..dbc397e0 100644 +--- a/Tensile/Source/lib/source/msgpack/MessagePack.cpp ++++ b/Tensile/Source/lib/source/msgpack/MessagePack.cpp +@@ -28,6 +28,8 @@ + + #include + ++#include ++ + #include + + namespace Tensile +@@ -86,6 +88,34 @@ namespace Tensile + return nullptr; + } + ++ // Check if the file is zstd compressed ++ char magic[4]; ++ in.read(magic, 4); ++ bool isCompressed = (in.gcount() == 4 && magic[0] == '\x28' && magic[1] == '\xB5' && magic[2] == '\x2F' && magic[3] == '\xFD'); ++ // Reset file pointer to the beginning ++ in.seekg(0, std::ios::beg); ++ ++ if (isCompressed) { ++ // Decompress zstd file ++ std::vector compressedData((std::istreambuf_iterator(in)), std::istreambuf_iterator()); ++ ++ size_t decompressedSize = ZSTD_getFrameContentSize(compressedData.data(), compressedData.size()); ++ if (decompressedSize == ZSTD_CONTENTSIZE_ERROR || decompressedSize == ZSTD_CONTENTSIZE_UNKNOWN) { ++ if(Debug::Instance().printDataInit()) ++ std::cout << "Error: Unable to determine decompressed size for " << filename << std::endl; ++ return nullptr; ++ } ++ ++ std::vector decompressedData(decompressedSize); ++ size_t dSize = ZSTD_decompress(decompressedData.data(), decompressedSize, compressedData.data(), compressedData.size()); ++ if (ZSTD_isError(dSize)) { ++ if(Debug::Instance().printDataInit()) ++ std::cout << "Error: ZSTD decompression failed for " << filename << std::endl; ++ return nullptr; ++ } ++ ++ msgpack::unpack(result, decompressedData.data(), dSize); ++ } else { + msgpack::unpacker unp; + bool finished_parsing; + constexpr size_t buffer_size = 1 << 19; +@@ -109,6 +139,7 @@ namespace Tensile + + return nullptr; + } ++ } + } + catch(std::runtime_error const& exc) + { diff --git a/pkgs/development/rocm-modules/6/tensile/Parallel.py b/pkgs/development/rocm-modules/6/tensile/Parallel.py new file mode 100644 index 0000000000000..943900b3b9a53 --- /dev/null +++ b/pkgs/development/rocm-modules/6/tensile/Parallel.py @@ -0,0 +1,146 @@ +import os +import time +from typing import Any, Callable +import multiprocessing +from functools import partial + +load_average_supported = hasattr(os, 'getloadavg') +delays = 0 +nix_build_cores = int(os.environ.get('NIX_LOAD_LIMIT', os.environ.get('NIX_BUILD_CORES', "-1"))) + +def CPUThreadCount(enable=True): + if not enable: + return 1 + else: + from .Common import globalParameters + cpuThreads = nix_build_cores if nix_build_cores else globalParameters["CpuThreads"] + if cpuThreads < 1: + if os.name == "nt": + cpuThreads = os.cpu_count() + else: + cpuThreads = len(os.sched_getaffinity(0)) + return max(1, min(cpuThreads, 32)) + + +def OverwriteGlobalParameters(newGlobalParameters): + from . import Common + + Common.globalParameters.clear() + Common.globalParameters.update(newGlobalParameters) + + +def pcallWithGlobalParamsMultiArg(f, args, newGlobalParameters): + OverwriteGlobalParameters(newGlobalParameters) + return f(*args) + + +def pcallWithGlobalParamsSingleArg(f, arg, newGlobalParameters): + OverwriteGlobalParameters(newGlobalParameters) + return f(arg) + +def worker_function(args, function, multiArg, shared_dict): + if load_average_supported: + global delays + lim = CPUThreadCount() + while (os.getloadavg()[0] - delays) > lim: + time.sleep(1) + delays += 1 + OverwriteGlobalParameters(shared_dict) + if multiArg: + return function(*args) + else: + return function(args) + +def imap_with_progress(pool, func, iterable, total, message): + results = [] + idx = 0 + for result in pool.imap(func, iterable, chunksize=max(1, total // 2000)): + results.append(result) + idx += 1 + if idx % (1 + (total // 100)) == 0: + print(f"{message}\t{idx: 5d}/{total: 5d}") + print(f"\n{message} done!\t{idx: 5d}/{total: 5d}") + return results + +def _with_idx(func, parts): + idx, obj = parts + return idx, func(obj) + +def imap_with_progress2(pool, func, iterable, total, message): + results = [None] * total + + fn = partial(_with_idx, func) + for idx, result in enumerate(pool.imap_unordered(fn, enumerate(iterable), chunksize=max(1, total // 2500))): + orig_idx, item_result = result + results[orig_idx] = item_result + if idx % (1 + (total // 100)) == 0: + print(f"{message}\t{idx+1: 5d}/{total: 5d}") + print(f"\n{message} done!\t{idx+1: 5d}/{total: 5d}") + return results + +def ParallelMap( + function: Callable, + objects: Any, + message: str = "", + enable: bool = True, + multiArg: bool = True, + return_as: str = "list" +) -> list: + """Executes a function over a list of objects in parallel or sequentially. + + This function is generally equivalent to ``list(map(function, objects))``. However, it provides + additional functionality to run in parallel, depending on the 'enable' flag and available CPU + threads. + + Args: + function: The function to apply to each item in 'objects'. If 'multiArg' is True, 'function' + should accept multiple arguments. + objects: An iterable of objects to be processed by 'function'. If 'multiArg' is True, each + item in 'objects' should be an iterable of arguments for 'function'. + message: Optional; a message describing the operation. Default is an empty string. + enable: Optional; if False, disables parallel execution and runs sequentially. Default is True. + multiArg: Optional; if True, treats each item in 'objects' as multiple arguments for + 'function'. Default is True. + + Returns: + A list containing the results of applying **function** to each item in **objects**. + """ + if return_as != "list": + print(f"Ignoring unknown return_as {return_as} for {message}\n") + from .Common import globalParameters + + threadCount = CPUThreadCount(enable) + + if not hasattr(objects, "__len__"): + objects = list(objects) + + objLen = len(objects) + if objLen == 0: + return [] + + f = (lambda x: function(*x)) if multiArg else function + if objLen == 1: + print(f"{message}: (1 task)") + return [f(x) for x in objects] + + extra_message = ( + f": {threadCount} thread(s)" + f", {objLen} tasks" + if objLen + else "" + ) + + print(f"\nParallelMap {message}{extra_message}\n") + + if threadCount <= 1: + return [f(x) for x in objects] + + ctx = multiprocessing.get_context('forkserver') + multiprocessing.set_forkserver_preload(["tensile.Common", "tensile.Parallel", "tensile.TensileCreateLibrary"]) + with ctx.Pool(processes=threadCount, maxtasksperchild=1024) as pool: + worker = partial(worker_function, function=function, multiArg=multiArg, shared_dict=globalParameters) + return list(imap_with_progress(pool, worker, objects, objLen, message)) + +# Compat with tensilelite folder version of tensile that's +# in-tree with hipblaslt, not needed for tensile for rocm-6.2 +ParallelMap2 = ParallelMap +ParallelMapReturnAsGenerator = ParallelMap diff --git a/pkgs/development/rocm-modules/6/tensile/default.nix b/pkgs/development/rocm-modules/6/tensile/default.nix index 0a1f4d60ea5ea..8b0039a96d8c3 100644 --- a/pkgs/development/rocm-modules/6/tensile/default.nix +++ b/pkgs/development/rocm-modules/6/tensile/default.nix @@ -2,52 +2,89 @@ lib, stdenv, fetchFromGitHub, - fetchpatch, rocmUpdateScript, buildPythonPackage, pytestCheckHook, setuptools, pyyaml, msgpack, + simplejson, + ujson, + orjson, pandas, joblib, filelock, rocminfo, + zstd, + rich, + isTensileLite ? false, + altParallelImpl ? false, }: buildPythonPackage rec { - pname = "tensile"; - version = "6.0.2"; + pname = if isTensileLite then "tensilelite" else "tensile"; + version = "6.2.4"; format = "pyproject"; src = fetchFromGitHub { owner = "ROCm"; repo = "Tensile"; - rev = "rocm-${version}"; - hash = "sha256-B9/2Iw1chwDL6it1CKC8W8v4Qac/J2z9nwlpwjnllDc="; + # rev = "rocm-${version}"; + # hash = "sha256-E9UtdCLPUzRoNMzjD+A00faMx9eOxH5ouU04WNl2vjM="; + rev = "1752af518190500891a865379a4569b8abf6ba01"; # with code object compression! + hash = "sha256-Wvz4PVs//3Ox7ykZHpjPzOVwlyATyc+MmVVenfTzWK4="; }; + # TODO: run asm caps test ONCE for all supported arches as part of this build + # We currently disable the test because it's slow and runs each time tensile launches (multiple times per build) + + postPatch = '' + ${lib.optionalString (!isTensileLite) '' + if grep -F .SafeLoader Tensile/LibraryIO.py; then + substituteInPlace Tensile/LibraryIO.py \ + --replace-fail "yaml.SafeLoader" "yaml.CSafeLoader" + fi + substituteInPlace Tensile/Common.py \ + --replace-fail 'globalParameters["PrintLevel"] = 1' 'globalParameters["PrintLevel"] = 2' + # See TODO above about asm caps test + substituteInPlace Tensile/Common.py \ + --replace-fail 'if globalParameters["AssemblerPath"] is not None:' "if False:" + ''} + ${lib.optionalString altParallelImpl '' + substituteInPlace requirements.txt \ + --replace-fail joblib "" + rm Tensile/Parallel.py + cp ${./Parallel.py} Tensile/Parallel.py + ''} + find . -type f -iname "*.sh" -exec chmod +x {} \; + patchShebangs Tensile + ''; + buildInputs = [ setuptools ]; - propagatedBuildInputs = [ - pyyaml - msgpack - pandas - joblib - ]; + propagatedBuildInputs = + [ + pyyaml + msgpack + pandas + # FIXME: zsd optional only on if msgpack-zstd on + zstd # propagated because this *produces source that needs to link to zlib* when invoked in downstream builds + ] + ++ lib.optional (!altParallelImpl) joblib + ++ lib.optionals (!isTensileLite) [ + rich + ] + ++ lib.optionals isTensileLite [ + simplejson + ujson + orjson + ]; - patches = [ - (fetchpatch { - name = "Extend-Tensile-HIP-ISA-compatibility.patch"; - url = "https://github.com/GZGavinZhao/Tensile/commit/855cb15839849addb0816a6dde45772034a3e41f.patch"; - hash = "sha256-d+fVf/vz+sxGqJ96vuxe0jRMgbC5K6j5FQ5SJ1e3Sl8="; - }) - (fetchpatch { - name = "Don-t-copy-file-twice-in-copyStaticFiles.patch"; - url = "https://github.com/GZGavinZhao/Tensile/commit/9e14d5a00a096bddac605910a0e4dfb4c35bb0d5.patch"; - hash = "sha256-gOzjJyD1K056OFQ+hK5nbUeBhxLTIgQLoT+0K12SypI="; - }) - ]; + patches = + (lib.optional (!isTensileLite) ./tensile-6.3.0-create-library-dont-copy-twice.diff) + ++ (lib.optional isTensileLite ./tensile-create-library-dont-copy-twice.diff) + ++ (lib.optional isTensileLite ./gen_assembly-venv-err-handling.diff) + ++ (lib.optional isTensileLite ./log-fallback.diff); doCheck = false; # Too many errors, not sure how to set this up properly @@ -65,8 +102,8 @@ buildPythonPackage rec { passthru.updateScript = rocmUpdateScript { name = pname; - owner = src.owner; - repo = src.repo; + inherit (src) owner; + inherit (src) repo; }; meta = with lib; { @@ -75,7 +112,5 @@ buildPythonPackage rec { license = with licenses; [ mit ]; maintainers = teams.rocm.members; platforms = platforms.linux; - broken = - versions.minor version != versions.minor stdenv.cc.version || versionAtLeast version "7.0.0"; }; } diff --git a/pkgs/development/rocm-modules/6/tensile/gen_assembly-venv-err-handling.diff b/pkgs/development/rocm-modules/6/tensile/gen_assembly-venv-err-handling.diff new file mode 100644 index 0000000000000..0667599a58391 --- /dev/null +++ b/pkgs/development/rocm-modules/6/tensile/gen_assembly-venv-err-handling.diff @@ -0,0 +1,36 @@ +diff --git a/Tensile/Ops/gen_assembly.sh b/Tensile/Ops/gen_assembly.sh +index 0b21b6c6..609f1dd1 100755 +--- a/Tensile/Ops/gen_assembly.sh ++++ b/Tensile/Ops/gen_assembly.sh +@@ -23,6 +23,8 @@ + # + ################################################################################ + ++set -x ++ + archStr=$1 + dst=$2 + venv=$3 +@@ -35,7 +37,13 @@ fi + + toolchain=${rocm_path}/llvm/bin/clang++ + +-. ${venv}/bin/activate ++if ! [ -z ${TENSILE_GEN_ASSEMBLY_TOOLCHAIN+x} ]; then ++ toolchain="${TENSILE_GEN_ASSEMBLY_TOOLCHAIN}" ++fi ++ ++if [ -f ${venv}/bin/activate ]; then ++ . ${venv}/bin/activate ++fi + + IFS=';' read -r -a archs <<< "$archStr" + +@@ -77,4 +85,6 @@ for arch in "${archs[@]}"; do + python3 ./ExtOpCreateLibrary.py --src=$dst --co=$dst/extop_$arch.co --output=$dst --arch=$arch + done + +-deactivate ++if [ -f ${venv}/bin/activate ]; then ++ deactivate ++fi diff --git a/pkgs/development/rocm-modules/6/tensile/log-fallback.diff b/pkgs/development/rocm-modules/6/tensile/log-fallback.diff new file mode 100644 index 0000000000000..7d5de6d68d0af --- /dev/null +++ b/pkgs/development/rocm-modules/6/tensile/log-fallback.diff @@ -0,0 +1,101 @@ +diff --git a/Tensile/Ops/AMaxGenerator.py b/Tensile/Ops/AMaxGenerator.py +index 03325974..666c53e4 100644 +--- a/Tensile/Ops/AMaxGenerator.py ++++ b/Tensile/Ops/AMaxGenerator.py +@@ -125,6 +125,8 @@ class AMaxKernelGenerator: + self.op = 'AMax' + self.sgprs = collections.OrderedDict() + self.vgprs = collections.OrderedDict() ++ if any(value is None for value in self.__dict__.values()): ++ raise Exception(f"Some values in {self.__dict__} are None") + + @property + def lds_usage_byte(self) -> int: +@@ -841,6 +843,7 @@ if __name__ == '__main__': + isa = gfxArch(arch) + + if any([not i for i in (arch, toolchain_path, isa)]): ++ print("Fell back to defaults due to `not i` in AMaxGenerator! (arch, toolchain_path, isa)", (arch, toolchain_path, isa)) + restoreDefaultGlobalParameters() + assignGlobalParameters({}) + detectGlobalCurrentISA() +@@ -865,11 +868,15 @@ if __name__ == '__main__': + output_path_basename = os.path.splitext(output_path)[0] + + if debug_build: +- build_args = ['-x', 'assembler', '-target', 'amdgcn-amd-amdhsa', '-mcode-object-version=4', f'-mcpu={arch}', '-mwavefrontsize64', '-c', '-g', '-o', f'{output_path_basename}.o', f'{output_path_basename}.s'] ++ build_args = ['-x', 'assembler', '-target', 'amdgcn-amd-amdhsa', '-mcode-object-version=5', f'-mcpu={arch}', '-mwavefrontsize64', '-c', '-g', '-o', f'{output_path_basename}.o', f'{output_path_basename}.s'] + else: +- build_args = ['-x', 'assembler', '-target', 'amdgcn-amd-amdhsa', '-mcode-object-version=4', f'-mcpu={arch}', '-mwavefrontsize64', '-c', '-o', f'{output_path_basename}.o', f'{output_path_basename}.s'] ++ build_args = ['-x', 'assembler', '-target', 'amdgcn-amd-amdhsa', '-mcode-object-version=5', f'-mcpu={arch}', '-mwavefrontsize64', '-c', '-o', f'{output_path_basename}.o', f'{output_path_basename}.s'] + + ret = subprocess.run([toolchain_path] + build_args) ++ if ret > 0: ++ exit(ret) + ret = subprocess.run([toolchain_path, '-target', 'amdcgn-amdhsa', '-o', f'{output_path_basename}.co', f'{output_path_basename}.o']) ++ if ret > 0: ++ exit(ret) + amax.dump('yaml', f'{output_path_basename}.yaml') + +diff --git a/Tensile/Ops/LayerNormGenerator.py b/Tensile/Ops/LayerNormGenerator.py +index 9546d3c2..19a6735a 100644 +--- a/Tensile/Ops/LayerNormGenerator.py ++++ b/Tensile/Ops/LayerNormGenerator.py +@@ -123,6 +123,8 @@ class LayerNormKernelGenerator: + self.op = 'LayerNorm' + self.sgprs = collections.OrderedDict() + self.vgprs = collections.OrderedDict() ++ if any(value is None for value in self.__dict__.values()): ++ raise Exception(f"Some values in {self.__dict__} are None") + + @property + def lds_usage_byte(self) -> int: +@@ -922,6 +924,7 @@ if __name__ == '__main__': + isa = gfxArch(arch) + + if any([not i for i in (arch, toolchain_path, isa)]): ++ print("Fell back to defaults due to `not i` in AMaxGenerator! (arch, toolchain_path, isa)", (arch, toolchain_path, isa)) + restoreDefaultGlobalParameters() + assignGlobalParameters({}) + detectGlobalCurrentISA() +@@ -946,11 +949,15 @@ if __name__ == '__main__': + output_path_basename = os.path.splitext(output_path)[0] + + if debug_build: +- build_args = ['-x', 'assembler', '-target', 'amdgcn-amd-amdhsa', '-mcode-object-version=4', f'-mcpu={arch}', '-mwavefrontsize64', '-c', '-g', '-o', f'{output_path_basename}.o', f'{output_path_basename}.s'] ++ build_args = ['-x', 'assembler', '-target', 'amdgcn-amd-amdhsa', '-mcode-object-version=5', f'-mcpu={arch}', '-mwavefrontsize64', '-c', '-g', '-o', f'{output_path_basename}.o', f'{output_path_basename}.s'] + else: +- build_args = ['-x', 'assembler', '-target', 'amdgcn-amd-amdhsa', '-mcode-object-version=4', f'-mcpu={arch}', '-mwavefrontsize64', '-c', '-o', f'{output_path_basename}.o', f'{output_path_basename}.s'] ++ build_args = ['-x', 'assembler', '-target', 'amdgcn-amd-amdhsa', '-mcode-object-version=5', f'-mcpu={arch}', '-mwavefrontsize64', '-c', '-o', f'{output_path_basename}.o', f'{output_path_basename}.s'] + + ret = subprocess.run([toolchain_path] + build_args) ++ if ret > 0: ++ exit(ret) + ret = subprocess.run([toolchain_path, '-target', 'amdcgn-amdhsa', '-o', f'{output_path_basename}.co', f'{output_path_basename}.o']) ++ if ret > 0: ++ exit(ret) + layernorm.dump('yaml', f'{output_path_basename}.yaml') + +diff --git a/Tensile/Ops/SoftmaxGenerator.py b/Tensile/Ops/SoftmaxGenerator.py +index dc4c53aa..57851859 100644 +--- a/Tensile/Ops/SoftmaxGenerator.py ++++ b/Tensile/Ops/SoftmaxGenerator.py +@@ -97,6 +97,8 @@ class SoftmaxKernelGenerator: + self.debug_label = True + self.arch = arch + self.op = 'Softmax' ++ if any(value is None for value in self.__dict__.values()): ++ raise Exception(f"Some values in {self.__dict__} are None") + + def _validate(self): + assert self.num_cols * self.num_rows == self.num_workitems +@@ -719,5 +721,9 @@ if __name__ == '__main__': + build_args = ['-x', 'assembler', '-target', 'amdgcn-amd-amdhsa', '-mcode-object-version=4', f'-mcpu={arch}', '-mwavefrontsize64', '-c', '-o', f'{output_path_basename}.o', f'{output_path_basename}.s'] + + ret = subprocess.run([toolchain_path] + build_args) ++ if ret > 0: ++ exit(ret) + ret = subprocess.run([toolchain_path, '-target', 'amdcgn-amdhsa', '-o', f'{output_path_basename}.co', f'{output_path_basename}.o']) ++ if ret > 0: ++ exit(ret) + softmax.dump('yaml', f'{output_path_basename}.yaml') diff --git a/pkgs/development/rocm-modules/6/tensile/tensile-6.3.0-create-library-dont-copy-twice.diff b/pkgs/development/rocm-modules/6/tensile/tensile-6.3.0-create-library-dont-copy-twice.diff new file mode 100644 index 0000000000000..2b680241d9e4e --- /dev/null +++ b/pkgs/development/rocm-modules/6/tensile/tensile-6.3.0-create-library-dont-copy-twice.diff @@ -0,0 +1,20 @@ +diff --git a/Tensile/TensileCreateLibrary.py b/Tensile/TensileCreateLibrary.py +index a1644606..c6ca2882 100644 +--- a/Tensile/TensileCreateLibrary.py ++++ b/Tensile/TensileCreateLibrary.py +@@ -852,9 +852,14 @@ def copyStaticFiles(outputPath=None): + "KernelHeader.h", + ] + ++ import filecmp + for fileName in libraryStaticFiles: + # copy file +- shutil.copy(os.path.join(globalParameters["SourcePath"], fileName), outputPath) ++ # no need to copy twice if it has already been copied ++ src = os.path.join(globalParameters["SourcePath"], fileName) ++ dst = os.path.join(outputPath, os.path.basename(src)) ++ if not os.path.isfile(dst) or not filecmp.cmp(src, dst): ++ shutil.copyfile(src, dst) + + return libraryStaticFiles + diff --git a/pkgs/development/rocm-modules/6/tensile/tensile-create-library-dont-copy-twice.diff b/pkgs/development/rocm-modules/6/tensile/tensile-create-library-dont-copy-twice.diff new file mode 100644 index 0000000000000..c630803c191f4 --- /dev/null +++ b/pkgs/development/rocm-modules/6/tensile/tensile-create-library-dont-copy-twice.diff @@ -0,0 +1,37 @@ +diff --git a/Tensile/TensileCreateLibrary.py b/Tensile/TensileCreateLibrary.py +index 2b9da394..b001fa7c 100644 +--- a/Tensile/TensileCreateLibrary.py ++++ b/Tensile/TensileCreateLibrary.py +@@ -808,10 +808,13 @@ def copyStaticFiles(outputPath=None): + "ReductionTemplate.h", + "memory_gfx.h" ] + ++ import filecmp + for fileName in libraryStaticFiles: +- # copy file +- shutil.copy( os.path.join(globalParameters["SourcePath"], fileName), \ +- outputPath ) ++ src = os.path.join(globalParameters["SourcePath"], fileName) ++ dst = os.path.join(outputPath, os.path.basename(src)) ++ # no need to copy twice if it has already been copied ++ if not os.path.isfile(dst) or not filecmp.cmp(src, dst): ++ shutil.copyfile(src, dst) + + return libraryStaticFiles + +@@ -1417,9 +1420,13 @@ def TensileCreateLibrary(): + writeCMake(outputPath, solutionFiles, sourceKernelFiles, staticFiles, masterLibraries) + + # Make sure to copy the library static files. ++ import filecmp + for fileName in staticFiles: +- shutil.copy( os.path.join(globalParameters["SourcePath"], fileName), \ +- outputPath ) ++ src = os.path.join(globalParameters["SourcePath"], fileName) ++ dst = os.path.join(outputPath, os.path.basename(src)) ++ # no need to copy twice if it has already been copied ++ if not os.path.isfile(dst) or not filecmp.cmp(src, dst): ++ shutil.copyfile(src, dst) + + # write solutions and kernels + codeObjectFiles = writeSolutionsAndKernels(outputPath, CxxCompiler, None, solutions, diff --git a/pkgs/development/rocm-modules/6/update.nix b/pkgs/development/rocm-modules/6/update.nix index ee638dc58c2f4..69a45f9d4cd40 100644 --- a/pkgs/development/rocm-modules/6/update.nix +++ b/pkgs/development/rocm-modules/6/update.nix @@ -7,8 +7,10 @@ name ? "", owner ? "", repo ? "", - page ? "releases/latest", - filter ? ".tag_name | split(\"-\") | .[1]", + page ? "releases", + # input: array of [ { tag_name: "rocm-6.x.x", }, ... ]. some entries may have bad names like rocm-test-date we want to skip + # output: first tag_name/name that's a proper version if any + filter ? "map(.tag_name // .name) | map(select(test(\"^rocm-[0-9]+\\\\.[0-9]+(\\\\.[0-9]+)?$\"))) | first | ltrimstr(\"rocm-\")", }: let @@ -18,13 +20,39 @@ let updateScript = writeScript "update.sh" '' #!/usr/bin/env nix-shell #!nix-shell -i bash -p curl jq common-updater-scripts - version="$(curl ''${GITHUB_TOKEN:+-u ":$GITHUB_TOKEN"} \ - -sL "https://api.github.com/repos/${owner}/${repo}/${page}" | jq '${filter}' --raw-output)" + set -euo pipefail - IFS='.' read -a version_arr <<< "$version" + fetch_releases() { + local api_url="https://api.github.com/repos/${owner}/${repo}/${page}" + if [ "${page}" = "releases" ]; then + api_url="$api_url?per_page=4" + fi + >&2 echo $api_url + curl ''${GITHUB_TOKEN:+-u ":$GITHUB_TOKEN"} -sL "$api_url" + } + + find_valid_version() { + local releases="$1" + >&2 echo "$releases" + # Wrap in array if not already an array to make handline specific release or tags page the same + >&2 echo jq -r 'if type == "array" then . else [.] end | ${filter}' + echo "$releases" | jq -r 'if type == "array" then . else [.] end | ${filter}' + } + + releases="$(fetch_releases)" + version="$(find_valid_version "$releases")" + + if [ -z "$version" ]; then + echo "No valid version found in the fetched release(s)." >&2 + exit 1 + fi + + IFS='.' read -ra version_arr <<< "$version" + + >&2 echo parsed version "$version_arr" from "$version" if (( ''${version_arr[0]} > 6 )); then - echo "'rocmPackages_6.${pname}' is already at it's maximum allowed version.''\nAny further upgrades should go into 'rocmPackages_X.${pname}'." 1>&2 + echo "'rocmPackages_6.${pname}' is already at its maximum allowed version.''\nAny further upgrades should go into 'rocmPackages_X.${pname}'." >&2 exit 1 fi From a170449e90ad96466326f99cc9e2d879e4de3fc4 Mon Sep 17 00:00:00 2001 From: Luna Nova Date: Sat, 4 Jan 2025 10:24:51 -0800 Subject: [PATCH 02/11] ucx: fic unsupported argument parallel-jobs error by using rocm stdenv --- pkgs/by-name/uc/ucx/package.nix | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pkgs/by-name/uc/ucx/package.nix b/pkgs/by-name/uc/ucx/package.nix index 87fb2eaaf824e..f38a5b0d14f08 100644 --- a/pkgs/by-name/uc/ucx/package.nix +++ b/pkgs/by-name/uc/ucx/package.nix @@ -33,8 +33,10 @@ let paths = rocmList; }; + # rocm build fails with gcc stdenv due to unrecognised arg parallel-jobs + stdenv' = if enableRocm then rocmPackages.stdenv else stdenv; in -stdenv.mkDerivation rec { +stdenv'.mkDerivation rec { pname = "ucx"; version = "1.17.0"; From bbd976e281ee234599b7204f8f4ef18667b94a84 Mon Sep 17 00:00:00 2001 From: Luna Nova Date: Sat, 4 Jan 2025 10:25:49 -0800 Subject: [PATCH 03/11] pkgsRocm: add package set with cuda config off, rocm config on --- pkgs/top-level/stage.nix | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/pkgs/top-level/stage.nix b/pkgs/top-level/stage.nix index 1cedd8dd18458..6d8482e4f1405 100644 --- a/pkgs/top-level/stage.nix +++ b/pkgs/top-level/stage.nix @@ -321,6 +321,15 @@ let }; }); + # Full package set with rocm on cuda off + # Mostly useful for asserting pkgs.pkgsRocm.torchWithRocm == pkgs.torchWithRocm and similar + pkgsRocm = nixpkgsFun ({ + config = super.config // { + cudaSupport = false; + rocmSupport = true; + }; + }); + pkgsExtraHardening = nixpkgsFun { overlays = [ (self': super': { From 1cbf7f22453b7e98c3826020f74ee579777cf0ad Mon Sep 17 00:00:00 2001 From: Luna Nova Date: Sat, 4 Jan 2025 10:26:30 -0800 Subject: [PATCH 04/11] torch: fix rocm build --- .../python-modules/torch/default.nix | 49 +++++++++++++------ 1 file changed, 35 insertions(+), 14 deletions(-) diff --git a/pkgs/development/python-modules/torch/default.nix b/pkgs/development/python-modules/torch/default.nix index 26c1e15207a59..f6ae2e4eb067b 100644 --- a/pkgs/development/python-modules/torch/default.nix +++ b/pkgs/development/python-modules/torch/default.nix @@ -16,11 +16,10 @@ if cudaSupport then magma-cuda-static else if rocmSupport then - magma-hip + null else magma, magma, - magma-hip, magma-cuda-static, # Use the system NCCL as long as we're targeting CUDA on a supported platform. useSystemNccl ? (cudaSupport && !cudaPackages.nccl.meta.unsupported || rocmSupport), @@ -36,6 +35,7 @@ symlinkJoin, which, pybind11, + pkg-config, removeReferencesTo, # Build inputs @@ -54,6 +54,7 @@ cffi, click, typing-extensions, + six, # ROCm build and `torch.compile` requires `triton` tritonSupport ? (!stdenv.hostPlatform.isDarwin), triton, @@ -66,7 +67,13 @@ # (dependencies without cuda support). # Instead we should rely on overlays and nixpkgsFun. # (@SomeoneSerge) - _tritonEffective ? if cudaSupport then triton-cuda else triton, + _tritonEffective ? + if cudaSupport then + triton-cuda + else if rocmSupport then + rocmPackages.triton + else + triton, triton-cuda, # Unit tests @@ -86,13 +93,13 @@ # dependencies for torch.utils.tensorboard pillow, - six, + future, tensorboard, protobuf, # ROCm dependencies rocmSupport ? config.rocmSupport, - rocmPackages_5, + rocmPackages, gpuTargets ? [ ], vulkanSupport ? false, @@ -112,8 +119,6 @@ let triton = throw "python3Packages.torch: use _tritonEffective instead of triton to avoid divergence"; - rocmPackages = rocmPackages_5; - setBool = v: if v then "1" else "0"; # https://github.com/pytorch/pytorch/blob/v2.4.0/torch/utils/cpp_extension.py#L1953 @@ -179,7 +184,7 @@ let clr rccl miopen - miopengemm + aotriton rocrand rocblas rocsparse @@ -191,10 +196,12 @@ let rocfft rocsolver hipfft + hiprand hipsolver + hipblas-common hipblas + hipblaslt rocminfo - rocm-thunk rocm-comgr rocm-device-libs rocm-runtime @@ -211,6 +218,7 @@ let brokenConditions = attrsets.filterAttrs (_: cond: cond) { "CUDA and ROCm are mutually exclusive" = cudaSupport && rocmSupport; "CUDA is not targeting Linux" = cudaSupport && !stdenv.hostPlatform.isLinux; + "ROCm 6 is currently not compatible with magma" = rocmSupport && effectiveMagma != null; "Unsupported CUDA version" = cudaSupport && !(builtins.elem cudaPackages.cudaMajorVersion [ @@ -224,8 +232,6 @@ let # In particular, this triggered warnings from cuda's `aliases.nix` "Magma cudaPackages does not match cudaPackages" = cudaSupport && (effectiveMagma.cudaPackages.cudaVersion != cudaPackages.cudaVersion); - "Rocm support is currently broken because `rocmPackages.hipblaslt` is unpackaged. (2024-06-09)" = - rocmSupport; }; git-unroll = fetchFromGitea { @@ -300,6 +306,10 @@ buildPythonPackage rec { # annotations (3.7), print_function (3.0), with_statement (2.6) are all supported sed -i -e "/from __future__ import/d" **.py + substituteInPlace third_party/NNPACK/CMakeLists.txt --replace "PYTHONPATH=" 'PYTHONPATH=$ENV{PYTHONPATH}:' + # flag from cmakeFlags doesn't work, not clear why + # setting it at the top of NNPACK's own CMakeLists does + sed -i '2s;^;set(PYTHON_SIX_SOURCE_DIR ${six.src})\n;' third_party/NNPACK/CMakeLists.txt '' + lib.optionalString rocmSupport '' # https://github.com/facebookincubator/gloo/pull/297 @@ -372,6 +382,10 @@ buildPythonPackage rec { # We only do an imports check, so do not build tests either. BUILD_TEST = setBool false; + # ninja hook doesn't automatically turn on ninja + # because pytorch setup.py is responsible for this + CMAKE_GENERATOR = "Ninja"; + # Unlike MKL, oneDNN (née MKLDNN) is FOSS, so we enable support for # it by default. PyTorch currently uses its own vendored version # of oneDNN through Intel iDeep. @@ -382,14 +396,15 @@ buildPythonPackage rec { # Also avoids pytorch exporting the headers of pybind11 USE_SYSTEM_PYBIND11 = true; - # NB technical debt: building without NNPACK as workaround for missing `six` - USE_NNPACK = 0; + # Multicore CPU convnet support + USE_NNPACK = 1; # Explicitly enable MPS for Darwin USE_MPS = setBool stdenv.hostPlatform.isDarwin; cmakeFlags = [ + (lib.cmakeFeature "PYTHON_SIX_SOURCE_DIR" "${six.src}") # (lib.cmakeBool "CMAKE_FIND_DEBUG_MODE" true) (lib.cmakeFeature "CUDAToolkit_VERSION" cudaPackages.cudaVersion) ] @@ -438,6 +453,8 @@ buildPythonPackage rec { env = { + # Builds faster without this and we don't have enough inputs that cmd length is an issue + NIX_CC_USE_RESPONSE_FILE = 0; # disable warnings as errors as they break the build on every compiler # bump, among other things. # Also of interest: pytorch ignores CXXFLAGS uses CFLAGS for both C and C++: @@ -447,6 +464,9 @@ buildPythonPackage rec { } // lib.optionalAttrs vulkanSupport { VULKAN_SDK = shaderc.bin; + } + // lib.optionalAttrs rocmSupport { + AOTRITON_INSTALLED_PREFIX = "${rocmPackages.aotriton}"; }; nativeBuildInputs = @@ -455,6 +475,7 @@ buildPythonPackage rec { which ninja pybind11 + pkg-config removeReferencesTo ] ++ lib.optionals cudaSupport ( @@ -500,7 +521,7 @@ buildPythonPackage rec { ] ) ++ lib.optionals rocmSupport [ rocmPackages.llvm.openmp ] - ++ lib.optionals (cudaSupport || rocmSupport) [ effectiveMagma ] + ++ lib.optionals (effectiveMagma != null && (cudaSupport || rocmSupport)) [ effectiveMagma ] ++ lib.optionals stdenv.hostPlatform.isLinux [ numactl ] ++ lib.optionals stdenv.hostPlatform.isDarwin [ apple-sdk_13 From 61eed297e0ec32e935fe5cf8caadb264fb9e469d Mon Sep 17 00:00:00 2001 From: Luna Nova Date: Sat, 4 Jan 2025 10:26:45 -0800 Subject: [PATCH 05/11] torchvision: fix rocm build --- pkgs/development/python-modules/torchaudio/default.nix | 1 - 1 file changed, 1 deletion(-) diff --git a/pkgs/development/python-modules/torchaudio/default.nix b/pkgs/development/python-modules/torchaudio/default.nix index a56d4df7d0ccf..992ef77ab8f4a 100644 --- a/pkgs/development/python-modules/torchaudio/default.nix +++ b/pkgs/development/python-modules/torchaudio/default.nix @@ -50,7 +50,6 @@ let hipsolver hipblas rocminfo - rocm-thunk rocm-comgr rocm-device-libs rocm-runtime From 6fd4b1504905a05de4a9b545bdc923efebd1c354 Mon Sep 17 00:00:00 2001 From: Luna Nova Date: Fri, 17 Jan 2025 08:15:41 -0800 Subject: [PATCH 06/11] ollama: fix rocm build --- pkgs/by-name/ol/ollama/package.nix | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/pkgs/by-name/ol/ollama/package.nix b/pkgs/by-name/ol/ollama/package.nix index 89e8e1ee22bdd..6da4905238b6f 100644 --- a/pkgs/by-name/ol/ollama/package.nix +++ b/pkgs/by-name/ol/ollama/package.nix @@ -70,6 +70,7 @@ let rocmLibs = [ rocmPackages.clr + rocmPackages.hipblas-common rocmPackages.hipblas rocmPackages.rocblas rocmPackages.rocsolver @@ -77,10 +78,9 @@ let rocmPackages.rocm-device-libs rocmPackages.rocm-smi ]; - rocmClang = linkFarm "rocm-clang" { llvm = rocmPackages.llvm.clang; }; rocmPath = buildEnv { name = "rocm-path"; - paths = rocmLibs ++ [ rocmClang ]; + paths = rocmLibs; }; cudaLibs = [ @@ -145,6 +145,13 @@ goBuild { ROCM_PATH = rocmPath; CLBlast_DIR = "${clblast}/lib/cmake/CLBlast"; HIP_PATH = rocmPath; + CFLAGS = "-Wno-c++17-extensions -I${rocmPath}/include"; + CXXFLAGS = "-Wno-c++17-extensions -I${rocmPath}/include"; + } + // lib.optionalAttrs (enableRocm && (rocmPackages.clr.localGpuTargets or false) != false) { + # If rocm CLR is set to build for an exact set of targets reuse that target list, + # otherwise let ollama use its builtin defaults + HIP_ARCHS = lib.concatStringsSep ";" rocmPackages.clr.localGpuTargets; } // lib.optionalAttrs enableCuda { CUDA_PATH = cudaPath; @@ -172,10 +179,16 @@ goBuild { ++ lib.optionals stdenv.hostPlatform.isDarwin metalFrameworks; # replace inaccurate version number with actual release version - postPatch = '' - substituteInPlace version/version.go \ - --replace-fail 0.0.0 '${version}' - ''; + postPatch = + '' + substituteInPlace version/version.go \ + --replace-fail 0.0.0 '${version}' + '' + + lib.optionalString enableRocm '' + substituteInPlace make/Makefile.rocm \ + --replace-fail '-I./llama/' '-I./llama/ -I${rocmPath}/include' \ + --replace-fail ' $(ROCBLAS_DIST_DEP_MANIFEST) ' ' ' + ''; overrideModAttrs = ( finalAttrs: prevAttrs: { From 3413fec19122c5252ca29ffd90a2a01a70cb7bde Mon Sep 17 00:00:00 2001 From: Luna Nova Date: Mon, 20 Jan 2025 12:33:38 -0800 Subject: [PATCH 07/11] rocm-6/llvm: apply patch suggested by @shuni64 for testing --- pkgs/development/rocm-modules/6/llvm/default.nix | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/pkgs/development/rocm-modules/6/llvm/default.nix b/pkgs/development/rocm-modules/6/llvm/default.nix index d162118d0a973..68ee0bdffb113 100644 --- a/pkgs/development/rocm-modules/6/llvm/default.nix +++ b/pkgs/development/rocm-modules/6/llvm/default.nix @@ -19,6 +19,7 @@ libffi, libxml2, removeReferencesTo, + fetchpatch, # Build compilers and stdenv suitable for profiling # compressed line tables (-g1 -gz) and # frame pointers for sampling profilers (-fno-omit-frame-pointer -momit-leaf-frame-pointer) @@ -411,7 +412,18 @@ rec { clang-tools = llvmPackagesRocm.clang-tools.override { inherit clang-unwrapped clang; }; - inherit (llvmPackagesRocm) compiler-rt compiler-rt-libc; + # inherit (llvmPackagesRocm) compiler-rt compiler-rt-libc; + compiler-rt-libc = llvmPackagesRocm.compiler-rt-libc.overrideAttrs (old: { + patches = old.patches ++ [ + (fetchpatch { + name = "Fix-missing-main-function-in-float16-bfloat16-support-checks.patch"; + url = "https://github.com/ROCm/llvm-project/commit/68d8b3846ab1e6550910f2a9a685690eee558af2.patch"; + hash = "sha256-Db+L1HFMWVj4CrofsGbn5lnMoCzEcU+7q12KKFb17/g="; + relative = "compiler-rt"; + }) + ]; + }); + compiler-rt = compiler-rt-libc; bintools = wrapBintoolsWith { bintools = llvmPackagesRocm.bintools-unwrapped.override { inherit lld llvm; From 7716bd18fd31822d5f62cb4aabeefb3377882569 Mon Sep 17 00:00:00 2001 From: Luna Nova Date: Mon, 20 Jan 2025 18:07:56 -0800 Subject: [PATCH 08/11] rocm-6: reapply ISA compatibility and UB patches --- ...0001-handle-v1-of-compressed-fatbins.patch | 176 ------------------ .../rocm-modules/6/clr/default.nix | 49 ++--- .../rocm-modules/6/hipblaslt/default.nix | 1 + .../rocm-modules/6/llvm/default.nix | 8 +- .../rocm-modules/6/rocblas/default.nix | 12 +- .../6/rocblas/offload-compress.diff | 68 ------- .../6/rocblas/offload-compress.py | 85 --------- .../rocm-modules/6/rocm-comgr/default.nix | 16 ++ .../rocm-modules/6/rocm-runtime/default.nix | 2 +- .../rocm-modules/6/rocm-runtime/ub.patch | 66 ------- .../rocm-modules/6/tensile/default.nix | 6 + 11 files changed, 64 insertions(+), 425 deletions(-) delete mode 100644 pkgs/development/rocm-modules/6/clr/0001-handle-v1-of-compressed-fatbins.patch delete mode 100644 pkgs/development/rocm-modules/6/rocblas/offload-compress.diff delete mode 100644 pkgs/development/rocm-modules/6/rocblas/offload-compress.py delete mode 100644 pkgs/development/rocm-modules/6/rocm-runtime/ub.patch diff --git a/pkgs/development/rocm-modules/6/clr/0001-handle-v1-of-compressed-fatbins.patch b/pkgs/development/rocm-modules/6/clr/0001-handle-v1-of-compressed-fatbins.patch deleted file mode 100644 index a48579d1d5381..0000000000000 --- a/pkgs/development/rocm-modules/6/clr/0001-handle-v1-of-compressed-fatbins.patch +++ /dev/null @@ -1,176 +0,0 @@ -From 26a65d37e388c25898a13b60a42ab606d63fda2e Mon Sep 17 00:00:00 2001 -From: Tom Rix -Date: Fri, 25 Oct 2024 08:38:53 -0700 -Subject: [PATCH] handle v1 of compressed fatbins - -The size of the compressed modules is needed to uncompress them but -that information is only stored in the header in v2. - -Because the uncompressed size is known, the compressed size can be -greater than what it actually is. - -So if v1 is detected use the maximum possible size. - -Signed-off-by: Tom Rix ---- - hipamd/src/hip_code_object.cpp | 9 ++++++--- - hipamd/src/hip_code_object.hpp | 2 +- - hipamd/src/hip_fatbin.cpp | 8 +++++--- - hipamd/src/hip_fatbin.hpp | 2 +- - rocclr/os/os.hpp | 2 +- - rocclr/os/os_posix.cpp | 3 ++- - rocclr/os/os_win32.cpp | 2 +- - 7 files changed, 17 insertions(+), 11 deletions(-) - -diff --git a/hipamd/src/hip_code_object.cpp b/hipamd/src/hip_code_object.cpp -index 73b9e2603c98..15098612ebf7 100644 ---- a/hipamd/src/hip_code_object.cpp -+++ b/hipamd/src/hip_code_object.cpp -@@ -600,10 +600,13 @@ hipError_t CodeObject::extractCodeObjectFromFatBinary( - } - - // ================================================================================================ --size_t CodeObject::getFatbinSize(const void* data, const bool isCompressed) { -+size_t CodeObject::getFatbinSize(const void* data, const bool isCompressed, size_t maximum_possible_size) { - if (isCompressed) { - const auto obheader = reinterpret_cast(data); -- return obheader->totalSize; -+ if (obheader->versionNumber > 1) -+ return obheader->totalSize; -+ else -+ return maximum_possible_size; - } else { - const auto obheader = reinterpret_cast(data); - const __ClangOffloadBundleInfo* desc = &obheader->desc[0]; -@@ -632,7 +635,7 @@ hipError_t CodeObject::extractCodeObjectFromFatBinaryUsingComgr( - return hipErrorInvalidKernelFile; - } - -- if (size == 0) size = getFatbinSize(data, isCompressed); -+ size = getFatbinSize(data, isCompressed, size); - - amd_comgr_data_t dataCodeObj{0}; - amd_comgr_data_set_t dataSetBundled{0}; -diff --git a/hipamd/src/hip_code_object.hpp b/hipamd/src/hip_code_object.hpp -index f0407f7bd48f..1dbcc2ab44e1 100644 ---- a/hipamd/src/hip_code_object.hpp -+++ b/hipamd/src/hip_code_object.hpp -@@ -66,7 +66,7 @@ class CodeObject { - static bool IsClangOffloadMagicBundle(const void* data, bool& isCompressed); - - // Return size of fat bin -- static size_t getFatbinSize(const void* data, const bool isCompressed = false); -+ static size_t getFatbinSize(const void* data, const bool isCompressed = false, size_t maximum_possible_size = 0); - - /** - * @brief Extract code object from fatbin using comgr unbundling action -diff --git a/hipamd/src/hip_fatbin.cpp b/hipamd/src/hip_fatbin.cpp -index 8b52e9f32fc6..1d39fa18b636 100644 ---- a/hipamd/src/hip_fatbin.cpp -+++ b/hipamd/src/hip_fatbin.cpp -@@ -126,6 +126,7 @@ hipError_t FatBinaryInfo::ExtractFatBinaryUsingCOMGR(const std::vector= 2 && minor >= 8) { -- hip_status = ExtractFatBinaryUsingCOMGR(image_, devices); -+ hip_status = ExtractFatBinaryUsingCOMGR(image_, maximum_possible_size, devices); - break; - } else if (isCompressed) { - LogPrintfError( -@@ -467,6 +468,7 @@ hipError_t FatBinaryInfo::BuildProgram(const int device_id) { - - // ================================================================================================ - hipError_t FatBinaryInfo::ExtractFatBinaryUsingCOMGR(const void *data, -+ size_t maximum_possible_size, - const std::vector& devices) { - hipError_t hip_status = hipSuccess; - // At this line, image should be a valid ptr. -@@ -481,7 +483,7 @@ hipError_t FatBinaryInfo::ExtractFatBinaryUsingCOMGR(const void *data, - device_names.push_back(devices[dev_idx]->devices()[0]->isa().isaName()); - } - -- hip_status = CodeObject::extractCodeObjectFromFatBinaryUsingComgr(data, 0, -+ hip_status = CodeObject::extractCodeObjectFromFatBinaryUsingComgr(data, maximum_possible_size, - device_names, code_objs); - if (hip_status == hipErrorNoBinaryForGpu || hip_status == hipSuccess) { - for (size_t dev_idx = 0; dev_idx < devices.size(); ++dev_idx) { -diff --git a/hipamd/src/hip_fatbin.hpp b/hipamd/src/hip_fatbin.hpp -index 5c4ea29761af..afd3cb2a2061 100644 ---- a/hipamd/src/hip_fatbin.hpp -+++ b/hipamd/src/hip_fatbin.hpp -@@ -79,7 +79,7 @@ public: - * - * @see CodeObject::extractCodeObjectFromFatBinaryUsingComgr() - */ -- hipError_t ExtractFatBinaryUsingCOMGR(const void* data, -+ hipError_t ExtractFatBinaryUsingCOMGR(const void* data, size_t maximum_possible_size, - const std::vector& devices); - hipError_t ExtractFatBinary(const std::vector& devices); - hipError_t AddDevProgram(const int device_id); -diff --git a/rocclr/os/os.hpp b/rocclr/os/os.hpp -index c9bd0b99e5cd..752f62f2f8c9 100644 ---- a/rocclr/os/os.hpp -+++ b/rocclr/os/os.hpp -@@ -117,7 +117,7 @@ class Os : AllStatic { - - // Returns the file name & file offset of mapped memory if the file is mapped. - static bool FindFileNameFromAddress(const void* image, std::string* fname_ptr, -- size_t* foffset_ptr); -+ size_t* foffset_ptr, size_t *max_possible_size); - - // Given a valid file descriptor returns mmaped memory for size and offset - static bool MemoryMapFileDesc(FileDesc fdesc, size_t fsize, size_t foffset, -diff --git a/rocclr/os/os_posix.cpp b/rocclr/os/os_posix.cpp -index 739795e0cb19..659bbb54a253 100644 ---- a/rocclr/os/os_posix.cpp -+++ b/rocclr/os/os_posix.cpp -@@ -789,7 +789,7 @@ bool Os::GetFileHandle(const char* fname, FileDesc* fd_ptr, size_t* sz_ptr) { - } - - bool amd::Os::FindFileNameFromAddress(const void* image, std::string* fname_ptr, -- size_t* foffset_ptr) { -+ size_t* foffset_ptr, size_t *maximum_possible_size) { - - // Get the list of mapped file list - bool ret_value = false; -@@ -831,6 +831,7 @@ bool amd::Os::FindFileNameFromAddress(const void* image, std::string* fname_ptr, - - *fname_ptr = uri_file_path; - *foffset_ptr = offset + address - low_address; -+ *maximum_possible_size = high_address - address + 1; - ret_value = true; - break; - } -diff --git a/rocclr/os/os_win32.cpp b/rocclr/os/os_win32.cpp -index 3923ec37dfc5..6fca0d9f4ccc 100644 ---- a/rocclr/os/os_win32.cpp -+++ b/rocclr/os/os_win32.cpp -@@ -936,7 +936,7 @@ bool Os::MemoryMapFileTruncated(const char* fname, const void** mmap_ptr, size_t - return true; - } - --bool Os::FindFileNameFromAddress(const void* image, std::string* fname_ptr, size_t* foffset_ptr) { -+bool Os::FindFileNameFromAddress(const void* image, std::string* fname_ptr, size_t* foffset_ptr, size_t *maximum_possible_size) { - // TODO: Implementation on windows side pending. - return false; - } --- -2.47.0 - diff --git a/pkgs/development/rocm-modules/6/clr/default.nix b/pkgs/development/rocm-modules/6/clr/default.nix index a939ad41c6274..42222fc186c2d 100644 --- a/pkgs/development/rocm-modules/6/clr/default.nix +++ b/pkgs/development/rocm-modules/6/clr/default.nix @@ -3,6 +3,7 @@ stdenv, callPackage, fetchFromGitHub, + fetchpatch, rocmUpdateScript, makeWrapper, cmake, @@ -123,29 +124,33 @@ stdenv.mkDerivation (finalAttrs: { # TODO: rebase patches patches = [ ./cmake-find-x11-libgl.patch - ./0001-handle-v1-of-compressed-fatbins.patch # https://github.com/ROCm/clr/issues/99 - # ./fix-null-stream-sync-perf.patch # https://github.com/ROCm/clr/issues/78 - # (fetchpatch { - # name = "add-missing-operators.patch"; - # url = "https://github.com/ROCm/clr/commit/86bd518981b364c138f9901b28a529899d8654f3.patch"; - # hash = "sha256-lbswri+zKLxif0hPp4aeJDeVfadhWZz4z+m+G2XcCPI="; - # }) - # (fetchpatch { - # name = "static-functions.patch"; - # url = "https://github.com/ROCm/clr/commit/77c581a3ebd47b5e2908973b70adea66891159ee.patch"; - # hash = "sha256-auBedbd7rghlKav7A9V6l64J7VmtE9GizIdi5gWj+fs="; - # }) - # (fetchpatch { - # name = "extend-hip-isa-compatibility-check.patch"; - # url = "https://salsa.debian.org/rocm-team/rocm-hipamd/-/raw/d6d20142c37e1dff820950b16ff8f0523241d935/debian/patches/0026-extend-hip-isa-compatibility-check.patch"; - # hash = "sha256-eG0ALZZQLRzD7zJueJFhi2emontmYy6xx8Rsm346nQI="; - # }) - # (fetchpatch { - # name = "improve-rocclr-isa-compatibility-check.patch"; - # url = "https://salsa.debian.org/rocm-team/rocm-hipamd/-/raw/d6d20142c37e1dff820950b16ff8f0523241d935/debian/patches/0025-improve-rocclr-isa-compatibility-check.patch"; - # hash = "sha256-8eowuRiOAdd9ucKv4Eg9FPU7c6367H3eP3fRAGfXc6Y="; - # }) + (fetchpatch { + # Fix handling of old fatbin version https://github.com/ROCm/clr/issues/99 + sha256 = "sha256-CK/QwgWJQEruiG4DqetF9YM0VEWpSiUMxAf1gGdJkuA="; + url = "https://src.fedoraproject.org/rpms/rocclr/raw/rawhide/f/0001-handle-v1-of-compressed-fatbins.patch"; + }) + (fetchpatch { + # improve rocclr isa compatibility check + sha256 = "sha256-wUrhpYN68AbEXeFU5f366C6peqHyq25kujJXY/bBJMs="; + url = "https://github.com/GZGavinZhao/clr/commit/22c17a0ac09c6b77866febf366591f669a1ed133.patch"; + }) + (fetchpatch { + # [PATCH] Improve hipamd compat check + sha256 = "sha256-uZQ8rMrWH61CCbxwLqQGggDmXFmYTi6x8OcgYPrZRC8="; + url = "https://github.com/GZGavinZhao/clr/commit/63c6ee630966744d4199fdfb854e98d2da9e1122.patch"; + }) + (fetchpatch { + # [PATCH] SWDEV-504340 - Move cast of cl_mem inside the condition + # Fixes crash due to UB in KernelBlitManager::setArgument + sha256 = "sha256-nL4CZ7EOXqsTVUtYhuu9DLOMpnMeMRUhkhylEQLTg9I="; + url = "https://github.com/ROCm/clr/commit/fa63919a6339ea2a61111981ba2362c97fbdf743.patch"; + }) + (fetchpatch { + # [PATCH] SWDEV-507104 - Removes alignment requirement for Semaphore class to resolve runtime misaligned memory issues + sha256 = "sha256-nStJ22B/CM0fzQTvYjbHDbQt0GlE8DXxVK+UDU9BAx4="; + url = "https://github.com/ROCm/clr/commit/21d764518363d74187deaef2e66c1a127bc5aa64.patch"; + }) ]; postPatch = '' diff --git a/pkgs/development/rocm-modules/6/hipblaslt/default.nix b/pkgs/development/rocm-modules/6/hipblaslt/default.nix index 407cbbb9b9b9a..417c1a75d84ec 100644 --- a/pkgs/development/rocm-modules/6/hipblaslt/default.nix +++ b/pkgs/development/rocm-modules/6/hipblaslt/default.nix @@ -76,6 +76,7 @@ stdenv.mkDerivation ( requiredSystemFeatures = [ "big-parallel" ]; patches = [ + # TensileCreateExtOpLibraries build failure https://github.com/ROCm/hipBLASLt/issues/1571 ./ext-op-first.diff ]; diff --git a/pkgs/development/rocm-modules/6/llvm/default.nix b/pkgs/development/rocm-modules/6/llvm/default.nix index 68ee0bdffb113..efe1851518827 100644 --- a/pkgs/development/rocm-modules/6/llvm/default.nix +++ b/pkgs/development/rocm-modules/6/llvm/default.nix @@ -309,7 +309,13 @@ rec { pname = "${old.pname}-rocm"; patches = filteredPatches ++ [ ./clang-bodge-ignore-systemwide-incls.diff - ./clang-log-jobs.diff # FIXME: rebase for 20+? + ./clang-log-jobs.diff + (fetchpatch { + # [ClangOffloadBundler]: Add GetBundleIDsInFile to OffloadBundler + sha256 = "sha256-G/mzUdFfrJ2bLJgo4+mBcR6Ox7xGhWu5X+XxT4kH2c8="; + url = "https://github.com/GZGavinZhao/rocm-llvm-project/commit/6d296f879b0fed830c54b2a9d26240da86c8bb3a.patch"; + relative = "clang"; + }) # FIXME: if llvm was overrideable properly this wouldn't be needed (substituteAll { src = ./clang-at-least-16-LLVMgold-path.patch; diff --git a/pkgs/development/rocm-modules/6/rocblas/default.nix b/pkgs/development/rocm-modules/6/rocblas/default.nix index 9ef135ca34769..abe3928f9158b 100644 --- a/pkgs/development/rocm-modules/6/rocblas/default.nix +++ b/pkgs/development/rocm-modules/6/rocblas/default.nix @@ -2,6 +2,7 @@ lib, stdenv, fetchFromGitHub, + fetchpatch, rocmUpdateScript, cmake, rocm-cmake, @@ -170,12 +171,11 @@ stdenv.mkDerivation (finalAttrs: { passthru.amdgpu_targets = gpuTargets'; patches = [ - # (fetchpatch { - # name = "Extend-rocBLAS-HIP-ISA-compatibility.patch"; - # url = "https://github.com/GZGavinZhao/rocBLAS/commit/89b75ff9cc731f71f370fad90517395e117b03bb.patch"; - # hash = "sha256-W/ohOOyNCcYYLOiQlPzsrTlNtCBdJpKVxO8s+4G7sjo="; - # }) - # ./offload-compress.diff + (fetchpatch { + name = "Extend-rocBLAS-HIP-ISA-compatibility.patch"; + url = "https://github.com/GZGavinZhao/rocBLAS/commit/89b75ff9cc731f71f370fad90517395e117b03bb.patch"; + hash = "sha256-W/ohOOyNCcYYLOiQlPzsrTlNtCBdJpKVxO8s+4G7sjo="; + }) ]; # Pass $NIX_BUILD_CORES to Tensile diff --git a/pkgs/development/rocm-modules/6/rocblas/offload-compress.diff b/pkgs/development/rocm-modules/6/rocblas/offload-compress.diff deleted file mode 100644 index c651eab199cd3..0000000000000 --- a/pkgs/development/rocm-modules/6/rocblas/offload-compress.diff +++ /dev/null @@ -1,68 +0,0 @@ -diff --git a/cmake/build-options.cmake b/cmake/build-options.cmake -index 379f8d889..e9de46914 100644 ---- a/cmake/build-options.cmake -+++ b/cmake/build-options.cmake -@@ -27,6 +27,8 @@ - # presented in the superbuild GUI, but then passed into the ExternalProject as -D - # parameters, which would already define them. - -+include(CheckCXXCompilerFlag) -+ - option( BUILD_VERBOSE "Output additional build information" OFF ) - - # BUILD_SHARED_LIBS is a cmake built-in; we make it an explicit option such that it shows in cmake-gui -@@ -46,6 +48,15 @@ endif() - # this file is intended to be loaded by toolchain or early as sets global compiler flags - # rocm-cmake checks will throw warnings if set later as cmake watchers installed - -+ -+option(BUILD_OFFLOAD_COMPRESS "Build rocBLAS with offload compression" ON) -+if (BUILD_OFFLOAD_COMPRESS) -+ check_cxx_compiler_flag("--offload-compress" CXX_COMPILER_SUPPORTS_OFFLOAD_COMPRESS) -+ if (NOT CXX_COMPILER_SUPPORTS_OFFLOAD_COMPRESS) -+ message( STATUS "WARNING: BUILD_OFFLOAD_COMPRESS=ON but flag not supported by compiler. Ignoring option." ) -+ endif() -+endif() -+ - # FOR OPTIONAL CODE COVERAGE - option(BUILD_CODE_COVERAGE "Build rocBLAS with code coverage enabled" OFF) - -diff --git a/library/CMakeLists.txt b/library/CMakeLists.txt -index 2e94e19ea..161a443e0 100644 ---- a/library/CMakeLists.txt -+++ b/library/CMakeLists.txt -@@ -71,6 +71,10 @@ function( rocblas_library_settings lib_target_ ) - set_target_properties( ${lib_target_} PROPERTIES CXX_EXTENSIONS NO ) - set_target_properties( ${lib_target_} PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/staging" ) - -+ if(BUILD_OFFLOAD_COMPRESS AND CXX_COMPILER_SUPPORTS_OFFLOAD_COMPRESS) -+ set_target_properties( ${lib_target_} PROPERTIES COMPILE_FLAGS "--offload-compress" ) -+ endif() -+ - target_link_libraries( ${lib_target_} INTERFACE hip::host ) - if (WIN32) - target_link_libraries( ${lib_target_} PRIVATE hip::device ) -diff --git a/rmake.py b/rmake.py -index 45335278f..f1f8561b0 100755 ---- a/rmake.py -+++ b/rmake.py -@@ -133,6 +133,9 @@ def parse_args(): - experimental_opts.add_argument( '--no-msgpack', dest='tensile_msgpack_backend', required=False, default=True, action='store_false', - help='Build Tensile backend not to use MessagePack and so use YAML (optional)') - -+ general_opts.add_argument( '--no-offload-compress', dest='no_offload_compress', required=False, default=False, action='store_true', -+ help='Do not apply offload compression.') -+ - general_opts.add_argument( '-r', '--relocatable', required=False, default=False, action='store_true', - help='Linux only: Add RUNPATH (based on ROCM_RPATH) and remove ldconf entry.') - -@@ -399,6 +402,9 @@ def config_cmd(): - if args.address_sanitizer: - cmake_options.append(f"-DBUILD_ADDRESS_SANITIZER=ON") - -+ if args.no_offload_compress: -+ cmake_options.append(f"-DBUILD_OFFLOAD_COMPRESS=OFF") -+ - # clean - delete_dir(build_path) - diff --git a/pkgs/development/rocm-modules/6/rocblas/offload-compress.py b/pkgs/development/rocm-modules/6/rocblas/offload-compress.py deleted file mode 100644 index 48a3bf2617780..0000000000000 --- a/pkgs/development/rocm-modules/6/rocblas/offload-compress.py +++ /dev/null @@ -1,85 +0,0 @@ -# Compress standalone hsaco/co files the way clang-offload-bundler does -# https://clang.llvm.org/docs/ClangOffloadBundler.html#compression-and-decompression -import zstandard as zstd -import struct -import hashlib -import os -import argparse -import glob - -# Constants -MAGIC_NUMBER = b'CCOB' -VERSION = 2 -COMPRESSION_METHOD_ZSTD = 1 # Assuming 1 represents zstd in the LLVM compression enumeration - -def calculate_md5(data): - return hashlib.md5(data).digest()[:8] # 64-bit truncated MD5 hash - -# struct __ClangOffloadBundleCompressedHeader { -# const char magic[kOffloadBundleCompressedMagicStrSize - 1]; -# uint16_t versionNumber; -# uint16_t compressionMethod; -# uint32_t totalSize; -# uint32_t uncompressedBinarySize; -# uint64_t Hash; -# const char compressedBinarydesc[1]; -# }; - -def compress_file(input_file): - # Read the input file - with open(input_file, 'rb') as f: - uncompressed_data = f.read() - - if uncompressed_data[0:len(MAGIC_NUMBER)] == MAGIC_NUMBER: - print(f"{input_file} already compressed, skipping") - return - - # Compress the data - cctx = zstd.ZstdCompressor() - compressed_data = cctx.compress(uncompressed_data) - - # Calculate hash - hash_value = calculate_md5(uncompressed_data) - - # Create header - header = struct.pack('@4sHHII8s', - MAGIC_NUMBER, - VERSION, - COMPRESSION_METHOD_ZSTD, - len(compressed_data) + 24, # Total file size (header + compressed data) - len(uncompressed_data), - hash_value) - - # Write compressed file - with open(input_file, 'wb') as f: - f.write(header) - f.write(compressed_data) - -def process_directory(directory): - # Get all .hsaco and .co files in the directory - files_to_compress = list(glob.glob(os.path.join(directory, '**', '*.hsaco'), recursive=True) + glob.glob(os.path.join(directory, '**', '*.co'), recursive=True)) - - successes = 0 - for file in files_to_compress: - try: - compress_file(file) - print(f"Compressed: {file}") - successes += 1 - except Exception as e: - print(f"Error compressing {file}: {str(e)}") - - print(f"Compression complete. Compressed {successes: 5d} / {len(files_to_compress): 5d}") - -def main(): - parser = argparse.ArgumentParser(description="Compress .hsaco and .co files in a directory using zstd.") - parser.add_argument("directory", help="Directory containing files to compress") - args = parser.parse_args() - - if not os.path.isdir(args.directory): - print(f"Error: {args.directory} is not a valid directory") - return - - process_directory(args.directory) - -if __name__ == '__main__': - main() diff --git a/pkgs/development/rocm-modules/6/rocm-comgr/default.nix b/pkgs/development/rocm-modules/6/rocm-comgr/default.nix index ca9be81e85eb6..64f4f85053116 100644 --- a/pkgs/development/rocm-modules/6/rocm-comgr/default.nix +++ b/pkgs/development/rocm-modules/6/rocm-comgr/default.nix @@ -1,6 +1,7 @@ { lib, stdenv, + fetchpatch, cmake, python3, rocm-merged-llvm, @@ -27,6 +28,21 @@ stdenv.mkDerivation (finalAttrs: { sourceRoot = "${finalAttrs.src.name}/amd/comgr"; + patches = [ + # [Comgr] Extend ISA compatibility + (fetchpatch { + sha256 = "sha256-dgow0kwSWM1TnkqWOZDRQrh5nuF8p5jbYyOLCpQsH4k="; + url = "https://github.com/GZGavinZhao/rocm-llvm-project/commit/a439e4f37ce71de48d4a979594276e3be0e6278f.patch"; + relative = "amd/comgr"; + }) + #[Comgr] Extend ISA compatibility for CCOB + (fetchpatch { + sha256 = "sha256-6Rwz12Lk4R2JK3olii3cr2Zd0ZLYe7VSpK1YRCOsJWY="; + url = "https://github.com/GZGavinZhao/rocm-llvm-project/commit/2d8c459a4d4c0567a7a275b4b54560d88e5c6919.patch"; + relative = "amd/comgr"; + }) + ]; + nativeBuildInputs = [ cmake python3 diff --git a/pkgs/development/rocm-modules/6/rocm-runtime/default.nix b/pkgs/development/rocm-modules/6/rocm-runtime/default.nix index 52635ed32e519..bfccdd0c2a975 100644 --- a/pkgs/development/rocm-modules/6/rocm-runtime/default.nix +++ b/pkgs/development/rocm-modules/6/rocm-runtime/default.nix @@ -2,6 +2,7 @@ lib, stdenv, fetchFromGitHub, + fetchpatch, rocmUpdateScript, pkg-config, cmake, @@ -81,7 +82,6 @@ stdenv.mkDerivation (finalAttrs: { hash = "sha256-A7VhPR3eSsmjq2cTBSjBIz9i//WiNjoXm0EsRKtF+ns="; }) ./remove-hsa-aqlprofile-dep.patch - ./ub.patch ]; postPatch = '' diff --git a/pkgs/development/rocm-modules/6/rocm-runtime/ub.patch b/pkgs/development/rocm-modules/6/rocm-runtime/ub.patch deleted file mode 100644 index 9427c76f8eaec..0000000000000 --- a/pkgs/development/rocm-modules/6/rocm-runtime/ub.patch +++ /dev/null @@ -1,66 +0,0 @@ -commit 56ad93a08c185cd43f925488ee5295149cce4d9d -Author: Luna Nova -Date: Mon Dec 30 11:58:30 2024 -0800 - - kfd_ioctl: cast to unsigned before << 31 to avoid UB - -diff --git a/libhsakmt/include/hsakmt/linux/kfd_ioctl.h b/libhsakmt/include/hsakmt/linux/kfd_ioctl.h -index e132cedf..ab15e69f 100644 ---- a/libhsakmt/include/hsakmt/linux/kfd_ioctl.h -+++ b/libhsakmt/include/hsakmt/linux/kfd_ioctl.h -@@ -1026,7 +1026,7 @@ struct kfd_ioctl_acquire_vm_args { - #define KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL (1 << 3) - #define KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP (1 << 4) - /* Allocation flags: attributes/access options */ --#define KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE (1 << 31) -+#define KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE (1U << 31U) - #define KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE (1 << 30) - #define KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC (1 << 29) - #define KFD_IOC_ALLOC_MEM_FLAGS_NO_SUBSTITUTE (1 << 28) -commit 967f8c11b702fa769e0f95052f7b37a357b215f2 -Author: Luna Nova -Date: Mon Dec 30 15:32:49 2024 -0800 - - topology: fix signed integer overflow due to << 31 - -diff --git a/libhsakmt/src/topology.c b/libhsakmt/src/topology.c -index 9990286a..d3b54d11 100644 ---- a/libhsakmt/src/topology.c -+++ b/libhsakmt/src/topology.c -@@ -460,7 +460,7 @@ static void cpumap_to_cpu_ci(char *shared_cpu_map, - struct proc_cpuinfo *cpuinfo, - HsaCacheProperties *this_cache) - { -- int num_hexs, bit; -+ unsigned int num_hexs, bit; - uint32_t proc, apicid, mask; - char *ch_ptr; - -@@ -473,7 +473,7 @@ static void cpumap_to_cpu_ci(char *shared_cpu_map, - while (num_hexs-- > 0) { - mask = strtol(ch_ptr, NULL, 16); /* each X */ - for (bit = 0; bit < 32; bit++) { -- if (!((1 << bit) & mask)) -+ if (!((1U << bit) & mask)) - continue; - proc = num_hexs * 32 + bit; - apicid = cpuinfo[proc].apicid; -commit 8524c5b6af087ba29999119245effc4dc94f1584 -Author: Luna Nova -Date: Mon Dec 30 15:34:05 2024 -0800 - - queues: fix signed integer overflow due to << 31 - -diff --git a/libhsakmt/src/queues.c b/libhsakmt/src/queues.c -index de37f8b9..d2aaa7b6 100644 ---- a/libhsakmt/src/queues.c -+++ b/libhsakmt/src/queues.c -@@ -663,7 +663,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtCreateQueueExt(HSAuint32 NodeId, - /* cu_mask_count counts bits. It must be multiple of 32 */ - q->cu_mask_count = ALIGN_UP_32(cu_num, 32); - for (i = 0; i < cu_num; i++) -- q->cu_mask[i/32] |= (1 << (i % 32)); -+ q->cu_mask[i/32] |= (1U << (i % 32)); - } - - struct kfd_ioctl_create_queue_args args = {0}; diff --git a/pkgs/development/rocm-modules/6/tensile/default.nix b/pkgs/development/rocm-modules/6/tensile/default.nix index 8b0039a96d8c3..ce616f06cc034 100644 --- a/pkgs/development/rocm-modules/6/tensile/default.nix +++ b/pkgs/development/rocm-modules/6/tensile/default.nix @@ -2,6 +2,7 @@ lib, stdenv, fetchFromGitHub, + fetchpatch, rocmUpdateScript, buildPythonPackage, pytestCheckHook, @@ -82,6 +83,11 @@ buildPythonPackage rec { patches = (lib.optional (!isTensileLite) ./tensile-6.3.0-create-library-dont-copy-twice.diff) + ++ (lib.optional (!isTensileLite) (fetchpatch { + # [PATCH] Extend Tensile HIP ISA compatibility + sha256 = "sha256-d+fVf/vz+sxGqJ96vuxe0jRMgbC5K6j5FQ5SJ1e3Sl8="; + url = "https://github.com/GZGavinZhao/Tensile/commit/855cb15839849addb0816a6dde45772034a3e41f.patch"; + })) ++ (lib.optional isTensileLite ./tensile-create-library-dont-copy-twice.diff) ++ (lib.optional isTensileLite ./gen_assembly-venv-err-handling.diff) ++ (lib.optional isTensileLite ./log-fallback.diff); From 816197df90829939c646fa54fe4b81efbe2e8f1b Mon Sep 17 00:00:00 2001 From: Luna Nova Date: Tue, 21 Jan 2025 06:51:43 -0800 Subject: [PATCH 09/11] fixup! rocmPackages_6: 6.0.2 -> 6.3.1 --- .../rocm-modules/6/tensile/log-fallback.diff | 19 +------------------ 1 file changed, 1 insertion(+), 18 deletions(-) diff --git a/pkgs/development/rocm-modules/6/tensile/log-fallback.diff b/pkgs/development/rocm-modules/6/tensile/log-fallback.diff index 7d5de6d68d0af..86dbca7ccdbdf 100644 --- a/pkgs/development/rocm-modules/6/tensile/log-fallback.diff +++ b/pkgs/development/rocm-modules/6/tensile/log-fallback.diff @@ -19,24 +19,7 @@ index 03325974..666c53e4 100644 restoreDefaultGlobalParameters() assignGlobalParameters({}) detectGlobalCurrentISA() -@@ -865,11 +868,15 @@ if __name__ == '__main__': - output_path_basename = os.path.splitext(output_path)[0] - - if debug_build: -- build_args = ['-x', 'assembler', '-target', 'amdgcn-amd-amdhsa', '-mcode-object-version=4', f'-mcpu={arch}', '-mwavefrontsize64', '-c', '-g', '-o', f'{output_path_basename}.o', f'{output_path_basename}.s'] -+ build_args = ['-x', 'assembler', '-target', 'amdgcn-amd-amdhsa', '-mcode-object-version=5', f'-mcpu={arch}', '-mwavefrontsize64', '-c', '-g', '-o', f'{output_path_basename}.o', f'{output_path_basename}.s'] - else: -- build_args = ['-x', 'assembler', '-target', 'amdgcn-amd-amdhsa', '-mcode-object-version=4', f'-mcpu={arch}', '-mwavefrontsize64', '-c', '-o', f'{output_path_basename}.o', f'{output_path_basename}.s'] -+ build_args = ['-x', 'assembler', '-target', 'amdgcn-amd-amdhsa', '-mcode-object-version=5', f'-mcpu={arch}', '-mwavefrontsize64', '-c', '-o', f'{output_path_basename}.o', f'{output_path_basename}.s'] - - ret = subprocess.run([toolchain_path] + build_args) -+ if ret > 0: -+ exit(ret) - ret = subprocess.run([toolchain_path, '-target', 'amdcgn-amdhsa', '-o', f'{output_path_basename}.co', f'{output_path_basename}.o']) -+ if ret > 0: -+ exit(ret) - amax.dump('yaml', f'{output_path_basename}.yaml') - + diff --git a/Tensile/Ops/LayerNormGenerator.py b/Tensile/Ops/LayerNormGenerator.py index 9546d3c2..19a6735a 100644 --- a/Tensile/Ops/LayerNormGenerator.py From 8bf6eb3092d97f057a54d4595e47ccbba41f96ab Mon Sep 17 00:00:00 2001 From: Luna Nova Date: Tue, 21 Jan 2025 06:51:43 -0800 Subject: [PATCH 10/11] fixup! rocmPackages_6: 6.0.2 -> 6.3.1 --- .../rocm-modules/6/tensile/log-fallback.diff | 19 +------------------ 1 file changed, 1 insertion(+), 18 deletions(-) diff --git a/pkgs/development/rocm-modules/6/tensile/log-fallback.diff b/pkgs/development/rocm-modules/6/tensile/log-fallback.diff index 86dbca7ccdbdf..5e472d7fc1d8b 100644 --- a/pkgs/development/rocm-modules/6/tensile/log-fallback.diff +++ b/pkgs/development/rocm-modules/6/tensile/log-fallback.diff @@ -41,24 +41,7 @@ index 9546d3c2..19a6735a 100644 restoreDefaultGlobalParameters() assignGlobalParameters({}) detectGlobalCurrentISA() -@@ -946,11 +949,15 @@ if __name__ == '__main__': - output_path_basename = os.path.splitext(output_path)[0] - - if debug_build: -- build_args = ['-x', 'assembler', '-target', 'amdgcn-amd-amdhsa', '-mcode-object-version=4', f'-mcpu={arch}', '-mwavefrontsize64', '-c', '-g', '-o', f'{output_path_basename}.o', f'{output_path_basename}.s'] -+ build_args = ['-x', 'assembler', '-target', 'amdgcn-amd-amdhsa', '-mcode-object-version=5', f'-mcpu={arch}', '-mwavefrontsize64', '-c', '-g', '-o', f'{output_path_basename}.o', f'{output_path_basename}.s'] - else: -- build_args = ['-x', 'assembler', '-target', 'amdgcn-amd-amdhsa', '-mcode-object-version=4', f'-mcpu={arch}', '-mwavefrontsize64', '-c', '-o', f'{output_path_basename}.o', f'{output_path_basename}.s'] -+ build_args = ['-x', 'assembler', '-target', 'amdgcn-amd-amdhsa', '-mcode-object-version=5', f'-mcpu={arch}', '-mwavefrontsize64', '-c', '-o', f'{output_path_basename}.o', f'{output_path_basename}.s'] - - ret = subprocess.run([toolchain_path] + build_args) -+ if ret > 0: -+ exit(ret) - ret = subprocess.run([toolchain_path, '-target', 'amdcgn-amdhsa', '-o', f'{output_path_basename}.co', f'{output_path_basename}.o']) -+ if ret > 0: -+ exit(ret) - layernorm.dump('yaml', f'{output_path_basename}.yaml') - + diff --git a/Tensile/Ops/SoftmaxGenerator.py b/Tensile/Ops/SoftmaxGenerator.py index dc4c53aa..57851859 100644 --- a/Tensile/Ops/SoftmaxGenerator.py From 1b3a8508ad46fcb56efaf28010c8336e57f0085f Mon Sep 17 00:00:00 2001 From: Luna Nova Date: Tue, 21 Jan 2025 06:51:43 -0800 Subject: [PATCH 11/11] fixup! rocmPackages_6: 6.0.2 -> 6.3.1 --- .../rocm-modules/6/tensile/log-fallback.diff | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/pkgs/development/rocm-modules/6/tensile/log-fallback.diff b/pkgs/development/rocm-modules/6/tensile/log-fallback.diff index 5e472d7fc1d8b..d350017b18fbe 100644 --- a/pkgs/development/rocm-modules/6/tensile/log-fallback.diff +++ b/pkgs/development/rocm-modules/6/tensile/log-fallback.diff @@ -55,13 +55,3 @@ index dc4c53aa..57851859 100644 def _validate(self): assert self.num_cols * self.num_rows == self.num_workitems -@@ -719,5 +721,9 @@ if __name__ == '__main__': - build_args = ['-x', 'assembler', '-target', 'amdgcn-amd-amdhsa', '-mcode-object-version=4', f'-mcpu={arch}', '-mwavefrontsize64', '-c', '-o', f'{output_path_basename}.o', f'{output_path_basename}.s'] - - ret = subprocess.run([toolchain_path] + build_args) -+ if ret > 0: -+ exit(ret) - ret = subprocess.run([toolchain_path, '-target', 'amdcgn-amdhsa', '-o', f'{output_path_basename}.co', f'{output_path_basename}.o']) -+ if ret > 0: -+ exit(ret) - softmax.dump('yaml', f'{output_path_basename}.yaml')