Skip to content

Commit

Permalink
Merge branch 'OpenNMT:master' into master
Browse files Browse the repository at this point in the history
  • Loading branch information
BBC-Esq authored Nov 29, 2024
2 parents 173f7cb + 2870fe3 commit 6936cba
Show file tree
Hide file tree
Showing 11 changed files with 258 additions and 99 deletions.
46 changes: 26 additions & 20 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ jobs:
backend: [mkl, dnnl]

steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
with:
submodules: recursive

Expand Down Expand Up @@ -82,7 +82,7 @@ jobs:
backend: [openblas]

steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
with:
submodules: recursive

Expand Down Expand Up @@ -137,11 +137,11 @@ jobs:
include:
- os: ubuntu-20.04
arch: aarch64
- os: macos-12
- os: macos-13
arch: arm64

steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
with:
submodules: recursive

Expand All @@ -150,7 +150,7 @@ jobs:
name: Set up QEMU

- name: Build wheels
uses: pypa/cibuildwheel@v2.16.5
uses: pypa/cibuildwheel@v2.21.3
with:
package-dir: python
output-dir: python/wheelhouse
Expand All @@ -168,9 +168,9 @@ jobs:
CIBW_SKIP: pp* *-musllinux_*

- name: Upload Python wheels
uses: actions/upload-artifact@v3
uses: actions/upload-artifact@v4
with:
name: python-wheels
name: python-wheels-${{ runner.os }}-${{ matrix.arch }}
path: python/wheelhouse


Expand All @@ -185,21 +185,23 @@ jobs:

steps:
- name: Set up Python 3.8
uses: actions/setup-python@v4
uses: actions/setup-python@v5
with:
python-version: 3.8

- uses: actions/checkout@v3
- uses: actions/checkout@v4

- name: Prepare test environment
shell: bash
run: |
./python/tools/prepare_test_environment.sh
- name: Download Python wheels
uses: actions/download-artifact@v3
uses: actions/download-artifact@v4
with:
name: python-wheels
pattern: python-wheels-${{ runner.os }}-*
merge-multiple: true
path: .

- name: Install wheel
if: startsWith(matrix.os, 'ubuntu')
Expand All @@ -222,10 +224,10 @@ jobs:
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4

- name: Set up Python 3.8
uses: actions/setup-python@v4
uses: actions/setup-python@v5
with:
python-version: 3.8

Expand Down Expand Up @@ -257,9 +259,11 @@ jobs:

steps:
- name: Download Python wheels
uses: actions/download-artifact@v3
uses: actions/download-artifact@v4
with:
name: python-wheels
pattern: python-wheels-*
merge-multiple: true
path: .

- name: Publish Python wheels to PyPI
uses: pypa/gh-action-pypi-publish@release/v1
Expand All @@ -272,7 +276,7 @@ jobs:
build-and-push-docker-images:
runs-on: ubuntu-20.04
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
with:
submodules: recursive

Expand All @@ -299,17 +303,19 @@ jobs:
needs: [check-python-style, build-python-wheels]

steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4

- name: Set up Python 3.8
uses: actions/setup-python@v4
uses: actions/setup-python@v5
with:
python-version: 3.8

- name: Download CTranslate2 wheels
uses: actions/download-artifact@v3
uses: actions/download-artifact@v4
with:
name: python-wheels
pattern: python-wheels-${{ runner.os }}-*
merge-multiple: true
path: .

- name: Install CTranslate2 wheel
run: |
Expand Down
13 changes: 13 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,19 @@

### Fixes and improvements

## [v4.5.0](https://github.com/OpenNMT/CTranslate2/releases/tag/v4.5.0) (2024-10-22)
Note: The Ctranslate2 Python package now supports CUDNN 9 and is no longer compatible with CUDNN 8.

### New features
* Support Phi3 (#1800)
* Support Mistral Nemo (#1785)
* Support Wav2Vec2Bert ASR (#1778)

### Fixes and improvements
* Upgrade to CUDNN9 (#1803)
* Fix logits vocab (#1786 + #1791)
* Update doc AWQ (#1795)

## [v4.4.0](https://github.com/OpenNMT/CTranslate2/releases/tag/v4.4.0) (2024-09-09)
**Removed**: Flash Attention support in the Python package due to significant package size increase with minimal performance gain.
Note: Flash Attention remains supported in the C++ package with the `WITH_FLASH_ATTN` option.
Expand Down
62 changes: 31 additions & 31 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -551,39 +551,9 @@ if (WITH_CUDA)
else()
list(APPEND LIBRARIES ${CUDA_CUBLAS_LIBRARIES})
endif()
set(CUDA_LINK_LIBRARIES_KEYWORD PRIVATE)
cuda_add_library(${PROJECT_NAME}
${SOURCES}
src/cuda/allocator.cc
src/cuda/primitives.cu
src/cuda/random.cu
src/cuda/utils.cc
src/ops/alibi_add_gpu.cu
src/ops/bias_add_gpu.cu
src/ops/concat_split_slide_gpu.cu
src/ops/conv1d_gpu.cu
src/ops/dequantize_gpu.cu
src/ops/flash_attention_gpu.cu
src/ops/gather_gpu.cu
src/ops/gumbel_max_gpu.cu
src/ops/layer_norm_gpu.cu
src/ops/mean_gpu.cu
src/ops/multinomial_gpu.cu
src/ops/rms_norm_gpu.cu
src/ops/rotary_gpu.cu
src/ops/softmax_gpu.cu
src/ops/tile_gpu.cu
src/ops/topk_gpu.cu
src/ops/topp_mask_gpu.cu
src/ops/quantize_gpu.cu
src/ops/nccl_ops_gpu.cu
src/ops/awq/gemm_gpu.cu
src/ops/awq/gemv_gpu.cu
src/ops/awq/dequantize_gpu.cu
)
if (WITH_FLASH_ATTN)
add_definitions(-DCT2_WITH_FLASH_ATTN)
cuda_add_library(${PROJECT_NAME}
list(APPEND SOURCES
src/ops/flash-attention/flash_fwd_hdim32_bf16_sm80.cu
src/ops/flash-attention/flash_fwd_hdim32_fp16_sm80.cu
src/ops/flash-attention/flash_fwd_hdim64_bf16_sm80.cu
Expand Down Expand Up @@ -653,6 +623,36 @@ if (WITH_CUDA)
src/ops/flash-attention/flash_fwd_split_hdim256_fp16_sm80.cu
PROPERTIES COMPILE_FLAGS "--use_fast_math")
endif()
set(CUDA_LINK_LIBRARIES_KEYWORD PRIVATE)
cuda_add_library(${PROJECT_NAME}
${SOURCES}
src/cuda/allocator.cc
src/cuda/primitives.cu
src/cuda/random.cu
src/cuda/utils.cc
src/ops/alibi_add_gpu.cu
src/ops/bias_add_gpu.cu
src/ops/concat_split_slide_gpu.cu
src/ops/conv1d_gpu.cu
src/ops/dequantize_gpu.cu
src/ops/flash_attention_gpu.cu
src/ops/gather_gpu.cu
src/ops/gumbel_max_gpu.cu
src/ops/layer_norm_gpu.cu
src/ops/mean_gpu.cu
src/ops/multinomial_gpu.cu
src/ops/rms_norm_gpu.cu
src/ops/rotary_gpu.cu
src/ops/softmax_gpu.cu
src/ops/tile_gpu.cu
src/ops/topk_gpu.cu
src/ops/topp_mask_gpu.cu
src/ops/quantize_gpu.cu
src/ops/nccl_ops_gpu.cu
src/ops/awq/gemm_gpu.cu
src/ops/awq/gemv_gpu.cu
src/ops/awq/dequantize_gpu.cu
)


elseif(WITH_CUDNN)
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ The project implements a custom runtime that applies many performance optimizati
The following model types are currently supported:

* Encoder-decoder models: Transformer base/big, M2M-100, NLLB, BART, mBART, Pegasus, T5, Whisper
* Decoder-only models: GPT-2, GPT-J, GPT-NeoX, OPT, BLOOM, MPT, Llama, Mistral, Gemma, CodeGen, GPTBigCode, Falcon
* Decoder-only models: GPT-2, GPT-J, GPT-NeoX, OPT, BLOOM, MPT, Llama, Mistral, Gemma, CodeGen, GPTBigCode, Falcon, Qwen2
* Encoder-only models: BERT, DistilBERT, XLM-RoBERTa

Compatible models should be first converted into an optimized model format. The library includes converters for multiple frameworks:
Expand Down
14 changes: 8 additions & 6 deletions include/ctranslate2/layers/wav2vec2.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#pragma once

#include <optional>
#include "ctranslate2/layers/transformer.h"

namespace ctranslate2 {
Expand Down Expand Up @@ -81,17 +82,18 @@ namespace ctranslate2 {
}

private:
const Wav2Vec2LayerNormConvLayer _feat_layer0;
const std::vector<std::unique_ptr<const Wav2Vec2LayerNormConvLayer>> _feat_layers;
const LayerNorm _fp_norm;
const Dense _fp_ff;
const Wav2Vec2PosConvLayer _pos_conv_embed;
const StorageView* _upgraded_model;
std::optional<Wav2Vec2LayerNormConvLayer> _feat_layer0;
std::optional<std::vector<std::unique_ptr<const Wav2Vec2LayerNormConvLayer>>> _feat_layers;
std::optional<LayerNorm> _fp_norm;
std::optional<Dense> _fp_ff;
std::optional<Wav2Vec2PosConvLayer> _pos_conv_embed;
const ops::Transpose _transpose;
const ops::GELU _gelu;
const dim_t _num_heads;
const std::vector<std::unique_ptr<const TransformerEncoderLayer>> _layers;
const LayerNorm _output_norm;
const Dense _lm_head;
std::optional<Dense> _lm_head;
};

}
Expand Down
5 changes: 3 additions & 2 deletions python/cpp/wav2vec2.cc
Original file line number Diff line number Diff line change
Expand Up @@ -86,8 +86,9 @@ namespace ctranslate2 {
Encodes the input features.
Arguments:
features: Mel spectogram of the audio, as a float array with shape
``[batch_size, 80, 3000]``.
features: hidden_states (up to v.4.3.1, https://github.com/OpenNMT/CTranslate2/blob/59c7dda738892df7a064aa360d0e45a4c3840b07/python/tests/test_transformers.py#L1028) or
raw audio, as a float array with shape (followed by VAD)
``[batch_size, 409, 1024]`` or ``[batch_size, 1, 131200]``
to_cpu: Copy the encoder output to the CPU before returning the value.
Returns:
Expand Down
Loading

0 comments on commit 6936cba

Please sign in to comment.