From 232f6b081f384ad4acba3a0fa566339d2cf2fa50 Mon Sep 17 00:00:00 2001 From: Zhanwen Chen Date: Wed, 5 Jun 2024 23:16:13 -0700 Subject: [PATCH 1/5] Create install_env_pllava.md --- install_env_pllava.md | 59 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) create mode 100644 install_env_pllava.md diff --git a/install_env_pllava.md b/install_env_pllava.md new file mode 100644 index 0000000..53e0a88 --- /dev/null +++ b/install_env_pllava.md @@ -0,0 +1,59 @@ +# install_env_pllava.md + +## 1. Clone base env + +```bash +conda create -n pllava --clone clean_pytorch_ffmpeg_build +rm ${CONDA_PREFIX}/lib/libffi.7.so ${CONDA_PREFIX}/lib/libffi.so.7 # Fixes ImportError: /lib/x86_64-linux-gnu/libp11-kit.so.0: undefined symbol: ffi_type_pointer, version LIBFFI_BASE_7.0 +ln -sf /usr/lib/x86_64-linux-gnu/libstdc++.so.6 ${CONDA_PREFIX}/lib/libstdc++.so.6 # Fixes ImportError: ${CONDA_PREFIX}/bin/../lib/libstdc++.so.6: version `GLIBCXX_3.4.32' not found (required by ${CONDA_PREFIX}/lib/python3.12/site-packages/torch/lib/libtorch_python.so) +export IMAGEIO_FFMPEG_EXE=ffmpeg +# export IMAGEIO_FREEIMAGE_LIB= + +# ImageIO without ffmpeg binary (use system ffmpeg) +pip install imageio imageio-ffmpeg --no-binary imageio-ffmpeg + +# OpenCV with CUDA support and system ffmpeg +cd && git clone --recursive https://github.com/opencv/opencv-python.git && cd opencv-python +git submodule sync +git submodule update --init --recursive +export CMAKE_ARGS="-DCMAKE_BUILD_TYPE=RELEASE -DWITH_CUBLAS=1 -DWITH_CUDA=ON -DWITH_NVCUVID=ON -DWITH_CUBLAS=1 -DWITH_CUDNN=ON -DOPENCV_DNN_CUDA=ON -DCUDA_ARCH_BIN=7.0 -DOPENCV_ENABLE_NONFREE=ON -DENABLE_FAST_MATH=1 -DCUDA_FAST_MATH=1 -DOPENCV_EXTRA_MODULES_PATH=${HOME}/opencv-python/opencv_contrib/modules -DCUDA_CUDA_LIBRARY=/usr/local/cuda/lib64/stubs/libcuda.so -DCUDA_nvidia-encode_LIBRARY=/usr/local/cuda/lib64/stubs/libnvidia-encode.so" #-DCUDA_CUDA_LIBRARY=/usr/local/cuda/lib64:/usr/local/cuda/lib64/stubs" #-DCUDA_nvidia-encode_LIBRARY=" + +sudo ln -s ${CONDA_PREFIX}/lib/python3.12/site-packages/numpy/core/include/numpy /usr/include/numpy + +export ENABLE_HEADLESS=1 +export ENABLE_CONTRIB=1 +scp ${HOME}/Downloads/Video_Codec_SDK_12.0.16/{Interface/nvEncodeAPI.h,Lib/linux/stubs/x86_64/libnvcuvid.so,Lib/linux/stubs/x86_64/libnvidia-encode.so} my_server:~ # NOTE: on laptop +sudo mv ~/nvEncodeAPI.h /usr/local/cuda/include +sudo mv ~/{libnvcuvid.so,libnvidia-encode.so} /usr/local/cuda/lib64/stubs +pip wheel . --verbose | tee install_opencv.log +pip install opencv_contrib_python_headless-4.10.0.82-cp312-cp312-linux_x86_64.whl + +# PyAV without FFMPEG binary (use system ffmpeg) +pip install av --no-binary av + +pip install transformers accelerate safetensors peft +# is imageio already installed? +pip install einops gradio moviepy + +# Install decord + +cd && git clone --recursive https://github.com/zhanwenchen/decord && cd decord +git submodule sync +git submodule update --init --recursive +mkdir build && cd build + +cd python +cmake .. -DUSE_CUDA=ON -DCMAKE_CUDA_ARCHITECTURES=70 -DCMAKE_BUILD_TYPE=Release +make -j + +cd ../python +pip install . + +``` + + +## Run + +```bash +bash scripts/demo.sh +``` From c6b5149588fab3c16d4e7deda2130a267686d2ee Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Thu, 6 Jun 2024 21:37:40 +0000 Subject: [PATCH 2/5] #1 (Fix) Check ampere or above for using flash-attn --- models/pllava/configuration_pllava.py | 3 +++ models/pllava/modeling_pllava.py | 9 +++++++-- tasks/eval/model_utils.py | 3 +++ utils/basic_utils.py | 5 +++++ 4 files changed, 18 insertions(+), 2 deletions(-) diff --git a/models/pllava/configuration_pllava.py b/models/pllava/configuration_pllava.py index 6c429ce..53b58ab 100644 --- a/models/pllava/configuration_pllava.py +++ b/models/pllava/configuration_pllava.py @@ -16,6 +16,7 @@ from transformers.configuration_utils import PretrainedConfig from transformers.utils import logging from transformers.models.auto import CONFIG_MAPPING +from utils.basic_utils import is_gpu_ampere_or_later logger = logging.get_logger(__name__) @@ -141,6 +142,8 @@ def __init__( elif text_config is None: tmp_config = {"_attn_implementation":"flash_attention_2", "gradient_checkpointing": self.gradient_checkpointing} + if not is_gpu_ampere_or_later(): + del tmp_config['_attn_implementation'] self.text_config = CONFIG_MAPPING["llama"](**tmp_config) self.text_config.gradient_checkpointing = self.gradient_checkpointing # self.text_config["_attn_implementation"]="flash_attention_2" # xl: temporal hard code diff --git a/models/pllava/modeling_pllava.py b/models/pllava/modeling_pllava.py index 04d64cf..6a8e9b1 100644 --- a/models/pllava/modeling_pllava.py +++ b/models/pllava/modeling_pllava.py @@ -36,6 +36,8 @@ from .configuration_pllava import PllavaConfig import pickle +from model_utils import is_gpu_ampere_or_later + logger = logging.get_logger(__name__) @@ -175,7 +177,7 @@ class PllavaPreTrainedModel(PreTrainedModel): supports_gradient_checkpointing = True _no_split_modules = ["LlavaVisionAttention"] _skip_keys_device_placement = "past_key_values" - _supports_flash_attn_2 = True + _supports_flash_attn_2 = is_gpu_ampere_or_later() def _init_weights(self, module): # important: this ported version of Llava isn't meant for training from scratch - only @@ -291,7 +293,10 @@ def __init__(self, config: PllavaConfig): self.vision_tower = AutoModel.from_config(config.vision_config) self.multi_modal_projector = PllavaMultiModalProjector(config) self.vocab_size = config.vocab_size - self.language_model = AutoModelForCausalLM.from_config(config.text_config, torch_dtype=config.torch_dtype, attn_implementation="flash_attention_2") + if is_gpu_ampere_or_later(): + self.language_model = AutoModelForCausalLM.from_config(config.text_config, torch_dtype=config.torch_dtype, attn_implementation="flash_attention_2") + else: + self.language_model = AutoModelForCausalLM.from_config(config.text_config, torch_dtype=config.torch_dtype) self.pad_token_id = self.config.pad_token_id if self.config.pad_token_id is not None else self.config.text_config.pad_token_id assert self.pad_token_id is not None, 'provide the model with pad_token_id, this would be used to arranging new embedings' self.post_init() diff --git a/tasks/eval/model_utils.py b/tasks/eval/model_utils.py index f1a700f..4651801 100644 --- a/tasks/eval/model_utils.py +++ b/tasks/eval/model_utils.py @@ -10,6 +10,8 @@ from accelerate.utils import get_balanced_memory from transformers import StoppingCriteria +from utils.basic_utils import is_gpu_ampere_or_later + class KeywordsStoppingCriteria(StoppingCriteria): def __init__(self, keywords, tokenizer, input_ids): self.keywords = keywords @@ -45,6 +47,7 @@ def load_pllava(repo_id, num_frames, use_lora=False, weight_dir=None, lora_alpha kwargs.update(pooling_shape=(0,12,12)) # produce a bug if ever usen the pooling projector config = PllavaConfig.from_pretrained( repo_id if not use_lora else weight_dir, + use_flash_attention_2=is_gpu_ampere_or_later(), pooling_shape=pooling_shape, **kwargs, ) diff --git a/utils/basic_utils.py b/utils/basic_utils.py index fb453d3..ba6bb66 100644 --- a/utils/basic_utils.py +++ b/utils/basic_utils.py @@ -12,12 +12,17 @@ import torch import torch.distributed as dist +from torch.cuda import get_device_properties from .distributed import is_dist_avail_and_initialized logger = logging.getLogger(__name__) +def is_gpu_ampere_or_later(): + return get_device_properties(0).major >= 8 + + class SmoothedValue(object): """Track a series of values and provide access to smoothed values over a window or the global series average. From 4e4666018431ab1a79b2443562b0a3092684bcf8 Mon Sep 17 00:00:00 2001 From: Zhanwen Chen Date: Tue, 16 Jul 2024 12:59:29 -0700 Subject: [PATCH 3/5] Bug Fix: Correct Import Package for from utils.basic_utils for is_gpu_ampere_or_later --- models/pllava/modeling_pllava.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/models/pllava/modeling_pllava.py b/models/pllava/modeling_pllava.py index 6a8e9b1..da01f01 100644 --- a/models/pllava/modeling_pllava.py +++ b/models/pllava/modeling_pllava.py @@ -36,7 +36,7 @@ from .configuration_pllava import PllavaConfig import pickle -from model_utils import is_gpu_ampere_or_later +from utils.basic_utils import is_gpu_ampere_or_later logger = logging.get_logger(__name__) From e16d084b5c7089376b0c1d9bb6978a652913123f Mon Sep 17 00:00:00 2001 From: Zhanwen Chen Date: Sun, 28 Jul 2024 11:46:27 -0700 Subject: [PATCH 4/5] Delete install_env_pllava.md --- install_env_pllava.md | 59 ------------------------------------------- 1 file changed, 59 deletions(-) delete mode 100644 install_env_pllava.md diff --git a/install_env_pllava.md b/install_env_pllava.md deleted file mode 100644 index 53e0a88..0000000 --- a/install_env_pllava.md +++ /dev/null @@ -1,59 +0,0 @@ -# install_env_pllava.md - -## 1. Clone base env - -```bash -conda create -n pllava --clone clean_pytorch_ffmpeg_build -rm ${CONDA_PREFIX}/lib/libffi.7.so ${CONDA_PREFIX}/lib/libffi.so.7 # Fixes ImportError: /lib/x86_64-linux-gnu/libp11-kit.so.0: undefined symbol: ffi_type_pointer, version LIBFFI_BASE_7.0 -ln -sf /usr/lib/x86_64-linux-gnu/libstdc++.so.6 ${CONDA_PREFIX}/lib/libstdc++.so.6 # Fixes ImportError: ${CONDA_PREFIX}/bin/../lib/libstdc++.so.6: version `GLIBCXX_3.4.32' not found (required by ${CONDA_PREFIX}/lib/python3.12/site-packages/torch/lib/libtorch_python.so) -export IMAGEIO_FFMPEG_EXE=ffmpeg -# export IMAGEIO_FREEIMAGE_LIB= - -# ImageIO without ffmpeg binary (use system ffmpeg) -pip install imageio imageio-ffmpeg --no-binary imageio-ffmpeg - -# OpenCV with CUDA support and system ffmpeg -cd && git clone --recursive https://github.com/opencv/opencv-python.git && cd opencv-python -git submodule sync -git submodule update --init --recursive -export CMAKE_ARGS="-DCMAKE_BUILD_TYPE=RELEASE -DWITH_CUBLAS=1 -DWITH_CUDA=ON -DWITH_NVCUVID=ON -DWITH_CUBLAS=1 -DWITH_CUDNN=ON -DOPENCV_DNN_CUDA=ON -DCUDA_ARCH_BIN=7.0 -DOPENCV_ENABLE_NONFREE=ON -DENABLE_FAST_MATH=1 -DCUDA_FAST_MATH=1 -DOPENCV_EXTRA_MODULES_PATH=${HOME}/opencv-python/opencv_contrib/modules -DCUDA_CUDA_LIBRARY=/usr/local/cuda/lib64/stubs/libcuda.so -DCUDA_nvidia-encode_LIBRARY=/usr/local/cuda/lib64/stubs/libnvidia-encode.so" #-DCUDA_CUDA_LIBRARY=/usr/local/cuda/lib64:/usr/local/cuda/lib64/stubs" #-DCUDA_nvidia-encode_LIBRARY=" - -sudo ln -s ${CONDA_PREFIX}/lib/python3.12/site-packages/numpy/core/include/numpy /usr/include/numpy - -export ENABLE_HEADLESS=1 -export ENABLE_CONTRIB=1 -scp ${HOME}/Downloads/Video_Codec_SDK_12.0.16/{Interface/nvEncodeAPI.h,Lib/linux/stubs/x86_64/libnvcuvid.so,Lib/linux/stubs/x86_64/libnvidia-encode.so} my_server:~ # NOTE: on laptop -sudo mv ~/nvEncodeAPI.h /usr/local/cuda/include -sudo mv ~/{libnvcuvid.so,libnvidia-encode.so} /usr/local/cuda/lib64/stubs -pip wheel . --verbose | tee install_opencv.log -pip install opencv_contrib_python_headless-4.10.0.82-cp312-cp312-linux_x86_64.whl - -# PyAV without FFMPEG binary (use system ffmpeg) -pip install av --no-binary av - -pip install transformers accelerate safetensors peft -# is imageio already installed? -pip install einops gradio moviepy - -# Install decord - -cd && git clone --recursive https://github.com/zhanwenchen/decord && cd decord -git submodule sync -git submodule update --init --recursive -mkdir build && cd build - -cd python -cmake .. -DUSE_CUDA=ON -DCMAKE_CUDA_ARCHITECTURES=70 -DCMAKE_BUILD_TYPE=Release -make -j - -cd ../python -pip install . - -``` - - -## Run - -```bash -bash scripts/demo.sh -``` From ec293c604b323173473bdeb352d13da0f1e7be6c Mon Sep 17 00:00:00 2001 From: ermu2001 <55656210+ermu2001@users.noreply.github.com> Date: Wed, 12 Jun 2024 21:44:29 -0700 Subject: [PATCH 5/5] Update pllava_demo.py update default system prompt in demo --- tasks/eval/demo/pllava_demo.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/tasks/eval/demo/pllava_demo.py b/tasks/eval/demo/pllava_demo.py index 935734b..f3f78b8 100644 --- a/tasks/eval/demo/pllava_demo.py +++ b/tasks/eval/demo/pllava_demo.py @@ -13,10 +13,14 @@ ) from tasks.eval.demo import pllava_theme -SYSTEM="""You are Pllava, a large vision-language assistant. -You are able to understand the video content that the user provides, and assist the user with a variety of tasks using natural language. -Follow the instructions carefully and explain your answers in detail based on the provided video. +SYSTEM="""You are a powerful Video Magic ChatBot, a large vision-language assistant. +You are able to understand the video content that the user provides and assist the user in a video-language related task. +The user might provide you with the video and maybe some extra noisy information to help you out or ask you a question. Make use of the information in a proper way to be competent for the job. +### INSTRUCTIONS: +1. Follow the user's instruction. +2. Be critical yet believe in yourself. """ + INIT_CONVERSATION: Conversation = conv_plain_v1.copy()