diff --git a/README.md b/README.md index 30f2dd25..eee6af39 100644 --- a/README.md +++ b/README.md @@ -1557,6 +1557,10 @@ input0 = pb_utils.Tensor.from_dlpack("INPUT0", pytorch_tensor) This method only supports contiguous Tensors that are in C-order. If the tensor is not C-order contiguous an exception will be raised. +For python models with input or output tensors of type BFloat16 (BF16), the +`as_numpy()` method is not supported, and the `from_dlpack` and `to_dlpack` +methods must be used instead. + ## `pb_utils.Tensor.is_cpu() -> bool` This function can be used to check whether a tensor is placed in CPU or not. diff --git a/src/pb_stub_utils.cc b/src/pb_stub_utils.cc index c9ffd661..9e05feae 100644 --- a/src/pb_stub_utils.cc +++ b/src/pb_stub_utils.cc @@ -1,4 +1,4 @@ -// Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions @@ -168,6 +168,8 @@ triton_to_pybind_dtype(TRITONSERVER_DataType data_type) dtype_numpy = py::dtype(py::format_descriptor::format()); break; case TRITONSERVER_TYPE_BF16: + // NOTE: Currently skipping this call via `if (BF16)` check, but may + // want to better handle this or set some default/invalid dtype. throw PythonBackendException("TYPE_BF16 not currently supported."); case TRITONSERVER_TYPE_INVALID: throw PythonBackendException("Dtype is invalid."); @@ -240,6 +242,10 @@ triton_to_dlpack_type(TRITONSERVER_DataType triton_dtype) case TRITONSERVER_TYPE_BYTES: throw PythonBackendException( "TYPE_BYTES tensors cannot be converted to DLPack."); + case TRITONSERVER_TYPE_BF16: + dl_code = DLDataTypeCode::kDLBfloat; + dt_size = 16; + break; default: throw PythonBackendException( @@ -301,6 +307,15 @@ dlpack_to_triton_type(const DLDataType& data_type) } } + if (data_type.code == DLDataTypeCode::kDLBfloat) { + if (data_type.bits != 16) { + throw PythonBackendException( + "Expected BF16 tensor to have 16 bits, but had: " + + std::to_string(data_type.bits)); + } + return TRITONSERVER_TYPE_BF16; + } + return TRITONSERVER_TYPE_INVALID; } }}} // namespace triton::backend::python diff --git a/src/pb_tensor.cc b/src/pb_tensor.cc index 0915c1d9..1ab95144 100644 --- a/src/pb_tensor.cc +++ b/src/pb_tensor.cc @@ -1,4 +1,4 @@ -// Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions @@ -152,7 +152,10 @@ PbTensor::PbTensor( #ifdef TRITON_PB_STUB if (memory_type_ == TRITONSERVER_MEMORY_CPU || memory_type_ == TRITONSERVER_MEMORY_CPU_PINNED) { - if (dtype != TRITONSERVER_TYPE_BYTES) { + if (dtype == TRITONSERVER_TYPE_BF16) { + // No native numpy representation for BF16. DLPack should be used instead. + numpy_array_ = py::none(); + } else if (dtype != TRITONSERVER_TYPE_BYTES) { py::object numpy_array = py::array(triton_to_pybind_dtype(dtype_), dims_, (void*)memory_ptr_); numpy_array_ = numpy_array.attr("view")(triton_to_numpy_type(dtype_)); @@ -512,12 +515,18 @@ PbTensor::Name() const const py::array* PbTensor::AsNumpy() const { - if (IsCPU()) { - return &numpy_array_; - } else { + if (!IsCPU()) { throw PythonBackendException( "Tensor is stored in GPU and cannot be converted to NumPy."); } + + if (dtype_ == TRITONSERVER_TYPE_BF16) { + throw PythonBackendException( + "Tensor dtype is BF16 and cannot be converted to NumPy. Use " + "to_dlpack() and from_dlpack() instead."); + } + + return &numpy_array_; } #endif // TRITON_PB_STUB @@ -643,7 +652,10 @@ PbTensor::PbTensor( #ifdef TRITON_PB_STUB if (memory_type_ == TRITONSERVER_MEMORY_CPU || memory_type_ == TRITONSERVER_MEMORY_CPU_PINNED) { - if (dtype_ != TRITONSERVER_TYPE_BYTES) { + if (dtype_ == TRITONSERVER_TYPE_BF16) { + // No native numpy representation for BF16. DLPack should be used instead. + numpy_array_ = py::none(); + } else if (dtype_ != TRITONSERVER_TYPE_BYTES) { py::object numpy_array = py::array(triton_to_pybind_dtype(dtype_), dims_, (void*)memory_ptr_); numpy_array_ = numpy_array.attr("view")(triton_to_numpy_type(dtype_));