From 232f6b081f384ad4acba3a0fa566339d2cf2fa50 Mon Sep 17 00:00:00 2001
From: Zhanwen Chen <phil.zhanwen.chen@gmail.com>
Date: Wed, 5 Jun 2024 23:16:13 -0700
Subject: [PATCH 1/5] Create install_env_pllava.md

---
 install_env_pllava.md | 59 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 59 insertions(+)
 create mode 100644 install_env_pllava.md

diff --git a/install_env_pllava.md b/install_env_pllava.md
new file mode 100644
index 0000000..53e0a88
--- /dev/null
+++ b/install_env_pllava.md
@@ -0,0 +1,59 @@
+# install_env_pllava.md
+
+## 1. Clone base env
+
+```bash
+conda create -n pllava --clone clean_pytorch_ffmpeg_build
+rm ${CONDA_PREFIX}/lib/libffi.7.so ${CONDA_PREFIX}/lib/libffi.so.7 # Fixes ImportError: /lib/x86_64-linux-gnu/libp11-kit.so.0: undefined symbol: ffi_type_pointer, version LIBFFI_BASE_7.0
+ln -sf /usr/lib/x86_64-linux-gnu/libstdc++.so.6 ${CONDA_PREFIX}/lib/libstdc++.so.6 # Fixes ImportError: ${CONDA_PREFIX}/bin/../lib/libstdc++.so.6: version `GLIBCXX_3.4.32' not found (required by ${CONDA_PREFIX}/lib/python3.12/site-packages/torch/lib/libtorch_python.so)
+export IMAGEIO_FFMPEG_EXE=ffmpeg
+# export IMAGEIO_FREEIMAGE_LIB=
+
+# ImageIO without ffmpeg binary (use system ffmpeg)
+pip install imageio imageio-ffmpeg --no-binary imageio-ffmpeg
+
+# OpenCV with CUDA support and system ffmpeg
+cd && git clone --recursive https://github.com/opencv/opencv-python.git && cd opencv-python
+git submodule sync
+git submodule update --init --recursive
+export CMAKE_ARGS="-DCMAKE_BUILD_TYPE=RELEASE -DWITH_CUBLAS=1 -DWITH_CUDA=ON -DWITH_NVCUVID=ON -DWITH_CUBLAS=1 -DWITH_CUDNN=ON -DOPENCV_DNN_CUDA=ON -DCUDA_ARCH_BIN=7.0 -DOPENCV_ENABLE_NONFREE=ON -DENABLE_FAST_MATH=1 -DCUDA_FAST_MATH=1 -DOPENCV_EXTRA_MODULES_PATH=${HOME}/opencv-python/opencv_contrib/modules -DCUDA_CUDA_LIBRARY=/usr/local/cuda/lib64/stubs/libcuda.so -DCUDA_nvidia-encode_LIBRARY=/usr/local/cuda/lib64/stubs/libnvidia-encode.so" #-DCUDA_CUDA_LIBRARY=/usr/local/cuda/lib64:/usr/local/cuda/lib64/stubs" #-DCUDA_nvidia-encode_LIBRARY="
+
+sudo ln -s ${CONDA_PREFIX}/lib/python3.12/site-packages/numpy/core/include/numpy /usr/include/numpy
+
+export ENABLE_HEADLESS=1
+export ENABLE_CONTRIB=1
+scp ${HOME}/Downloads/Video_Codec_SDK_12.0.16/{Interface/nvEncodeAPI.h,Lib/linux/stubs/x86_64/libnvcuvid.so,Lib/linux/stubs/x86_64/libnvidia-encode.so} my_server:~ # NOTE: on laptop
+sudo mv ~/nvEncodeAPI.h /usr/local/cuda/include
+sudo mv ~/{libnvcuvid.so,libnvidia-encode.so} /usr/local/cuda/lib64/stubs
+pip wheel . --verbose | tee install_opencv.log
+pip install opencv_contrib_python_headless-4.10.0.82-cp312-cp312-linux_x86_64.whl
+
+# PyAV without FFMPEG binary (use system ffmpeg)
+pip install av --no-binary av
+
+pip install transformers accelerate safetensors peft
+# is imageio already installed?
+pip install einops gradio moviepy
+
+# Install decord
+
+cd && git clone --recursive https://github.com/zhanwenchen/decord && cd decord
+git submodule sync
+git submodule update --init --recursive
+mkdir build && cd build
+
+cd python
+cmake .. -DUSE_CUDA=ON -DCMAKE_CUDA_ARCHITECTURES=70 -DCMAKE_BUILD_TYPE=Release
+make -j
+
+cd ../python
+pip install .
+
+```
+
+
+## Run
+
+```bash
+bash scripts/demo.sh
+```

From c6b5149588fab3c16d4e7deda2130a267686d2ee Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@ip-172-31-2-172.us-west-2.compute.internal>
Date: Thu, 6 Jun 2024 21:37:40 +0000
Subject: [PATCH 2/5] #1 (Fix) Check ampere or above for using flash-attn

---
 models/pllava/configuration_pllava.py | 3 +++
 models/pllava/modeling_pllava.py      | 9 +++++++--
 tasks/eval/model_utils.py             | 3 +++
 utils/basic_utils.py                  | 5 +++++
 4 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/models/pllava/configuration_pllava.py b/models/pllava/configuration_pllava.py
index 6c429ce..53b58ab 100644
--- a/models/pllava/configuration_pllava.py
+++ b/models/pllava/configuration_pllava.py
@@ -16,6 +16,7 @@
 from transformers.configuration_utils import PretrainedConfig
 from transformers.utils import logging
 from transformers.models.auto import CONFIG_MAPPING
+from utils.basic_utils import is_gpu_ampere_or_later
 
 
 logger = logging.get_logger(__name__)
@@ -141,6 +142,8 @@ def __init__(
         elif text_config is None:
             tmp_config = {"_attn_implementation":"flash_attention_2",
                           "gradient_checkpointing": self.gradient_checkpointing}
+            if not is_gpu_ampere_or_later():
+                del tmp_config['_attn_implementation']
             self.text_config = CONFIG_MAPPING["llama"](**tmp_config)
             self.text_config.gradient_checkpointing = self.gradient_checkpointing
         # self.text_config["_attn_implementation"]="flash_attention_2"  # xl: temporal hard code
diff --git a/models/pllava/modeling_pllava.py b/models/pllava/modeling_pllava.py
index 04d64cf..6a8e9b1 100644
--- a/models/pllava/modeling_pllava.py
+++ b/models/pllava/modeling_pllava.py
@@ -36,6 +36,8 @@
 
 from .configuration_pllava import PllavaConfig
 import pickle
+from model_utils import is_gpu_ampere_or_later
+
 
 logger = logging.get_logger(__name__)
 
@@ -175,7 +177,7 @@ class PllavaPreTrainedModel(PreTrainedModel):
     supports_gradient_checkpointing = True
     _no_split_modules = ["LlavaVisionAttention"]
     _skip_keys_device_placement = "past_key_values"
-    _supports_flash_attn_2 = True
+    _supports_flash_attn_2 = is_gpu_ampere_or_later()
 
     def _init_weights(self, module):
         # important: this ported version of Llava isn't meant for training from scratch - only
@@ -291,7 +293,10 @@ def __init__(self, config: PllavaConfig):
         self.vision_tower = AutoModel.from_config(config.vision_config)
         self.multi_modal_projector = PllavaMultiModalProjector(config)
         self.vocab_size = config.vocab_size
-        self.language_model = AutoModelForCausalLM.from_config(config.text_config, torch_dtype=config.torch_dtype, attn_implementation="flash_attention_2")
+        if is_gpu_ampere_or_later():
+            self.language_model = AutoModelForCausalLM.from_config(config.text_config, torch_dtype=config.torch_dtype, attn_implementation="flash_attention_2")
+        else:
+            self.language_model = AutoModelForCausalLM.from_config(config.text_config, torch_dtype=config.torch_dtype)
         self.pad_token_id = self.config.pad_token_id if self.config.pad_token_id is not None else self.config.text_config.pad_token_id
         assert self.pad_token_id is not None, 'provide the model with pad_token_id, this would be used to arranging new embedings'
         self.post_init()
diff --git a/tasks/eval/model_utils.py b/tasks/eval/model_utils.py
index f1a700f..4651801 100644
--- a/tasks/eval/model_utils.py
+++ b/tasks/eval/model_utils.py
@@ -10,6 +10,8 @@
 from accelerate.utils import get_balanced_memory
 
 from transformers import StoppingCriteria
+from utils.basic_utils import is_gpu_ampere_or_later
+
 class KeywordsStoppingCriteria(StoppingCriteria):
     def __init__(self, keywords, tokenizer, input_ids):
         self.keywords = keywords
@@ -45,6 +47,7 @@ def load_pllava(repo_id, num_frames, use_lora=False, weight_dir=None, lora_alpha
         kwargs.update(pooling_shape=(0,12,12)) # produce a bug if ever usen the pooling projector
     config = PllavaConfig.from_pretrained(
         repo_id if not use_lora else weight_dir,
+        use_flash_attention_2=is_gpu_ampere_or_later(),
         pooling_shape=pooling_shape,
         **kwargs,
     )
diff --git a/utils/basic_utils.py b/utils/basic_utils.py
index fb453d3..ba6bb66 100644
--- a/utils/basic_utils.py
+++ b/utils/basic_utils.py
@@ -12,12 +12,17 @@
 
 import torch
 import torch.distributed as dist
+from torch.cuda import get_device_properties
 from .distributed import is_dist_avail_and_initialized
 
 
 logger = logging.getLogger(__name__)
 
 
+def is_gpu_ampere_or_later():
+    return get_device_properties(0).major >= 8
+
+
 class SmoothedValue(object):
     """Track a series of values and provide access to smoothed values over a
     window or the global series average.

From 4e4666018431ab1a79b2443562b0a3092684bcf8 Mon Sep 17 00:00:00 2001
From: Zhanwen Chen <phil.zhanwen.chen@gmail.com>
Date: Tue, 16 Jul 2024 12:59:29 -0700
Subject: [PATCH 3/5] Bug Fix: Correct Import Package for from
 utils.basic_utils for is_gpu_ampere_or_later

---
 models/pllava/modeling_pllava.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/models/pllava/modeling_pllava.py b/models/pllava/modeling_pllava.py
index 6a8e9b1..da01f01 100644
--- a/models/pllava/modeling_pllava.py
+++ b/models/pllava/modeling_pllava.py
@@ -36,7 +36,7 @@
 
 from .configuration_pllava import PllavaConfig
 import pickle
-from model_utils import is_gpu_ampere_or_later
+from utils.basic_utils import is_gpu_ampere_or_later
 
 
 logger = logging.get_logger(__name__)

From e16d084b5c7089376b0c1d9bb6978a652913123f Mon Sep 17 00:00:00 2001
From: Zhanwen Chen <phil.zhanwen.chen@gmail.com>
Date: Sun, 28 Jul 2024 11:46:27 -0700
Subject: [PATCH 4/5] Delete install_env_pllava.md

---
 install_env_pllava.md | 59 -------------------------------------------
 1 file changed, 59 deletions(-)
 delete mode 100644 install_env_pllava.md

diff --git a/install_env_pllava.md b/install_env_pllava.md
deleted file mode 100644
index 53e0a88..0000000
--- a/install_env_pllava.md
+++ /dev/null
@@ -1,59 +0,0 @@
-# install_env_pllava.md
-
-## 1. Clone base env
-
-```bash
-conda create -n pllava --clone clean_pytorch_ffmpeg_build
-rm ${CONDA_PREFIX}/lib/libffi.7.so ${CONDA_PREFIX}/lib/libffi.so.7 # Fixes ImportError: /lib/x86_64-linux-gnu/libp11-kit.so.0: undefined symbol: ffi_type_pointer, version LIBFFI_BASE_7.0
-ln -sf /usr/lib/x86_64-linux-gnu/libstdc++.so.6 ${CONDA_PREFIX}/lib/libstdc++.so.6 # Fixes ImportError: ${CONDA_PREFIX}/bin/../lib/libstdc++.so.6: version `GLIBCXX_3.4.32' not found (required by ${CONDA_PREFIX}/lib/python3.12/site-packages/torch/lib/libtorch_python.so)
-export IMAGEIO_FFMPEG_EXE=ffmpeg
-# export IMAGEIO_FREEIMAGE_LIB=
-
-# ImageIO without ffmpeg binary (use system ffmpeg)
-pip install imageio imageio-ffmpeg --no-binary imageio-ffmpeg
-
-# OpenCV with CUDA support and system ffmpeg
-cd && git clone --recursive https://github.com/opencv/opencv-python.git && cd opencv-python
-git submodule sync
-git submodule update --init --recursive
-export CMAKE_ARGS="-DCMAKE_BUILD_TYPE=RELEASE -DWITH_CUBLAS=1 -DWITH_CUDA=ON -DWITH_NVCUVID=ON -DWITH_CUBLAS=1 -DWITH_CUDNN=ON -DOPENCV_DNN_CUDA=ON -DCUDA_ARCH_BIN=7.0 -DOPENCV_ENABLE_NONFREE=ON -DENABLE_FAST_MATH=1 -DCUDA_FAST_MATH=1 -DOPENCV_EXTRA_MODULES_PATH=${HOME}/opencv-python/opencv_contrib/modules -DCUDA_CUDA_LIBRARY=/usr/local/cuda/lib64/stubs/libcuda.so -DCUDA_nvidia-encode_LIBRARY=/usr/local/cuda/lib64/stubs/libnvidia-encode.so" #-DCUDA_CUDA_LIBRARY=/usr/local/cuda/lib64:/usr/local/cuda/lib64/stubs" #-DCUDA_nvidia-encode_LIBRARY="
-
-sudo ln -s ${CONDA_PREFIX}/lib/python3.12/site-packages/numpy/core/include/numpy /usr/include/numpy
-
-export ENABLE_HEADLESS=1
-export ENABLE_CONTRIB=1
-scp ${HOME}/Downloads/Video_Codec_SDK_12.0.16/{Interface/nvEncodeAPI.h,Lib/linux/stubs/x86_64/libnvcuvid.so,Lib/linux/stubs/x86_64/libnvidia-encode.so} my_server:~ # NOTE: on laptop
-sudo mv ~/nvEncodeAPI.h /usr/local/cuda/include
-sudo mv ~/{libnvcuvid.so,libnvidia-encode.so} /usr/local/cuda/lib64/stubs
-pip wheel . --verbose | tee install_opencv.log
-pip install opencv_contrib_python_headless-4.10.0.82-cp312-cp312-linux_x86_64.whl
-
-# PyAV without FFMPEG binary (use system ffmpeg)
-pip install av --no-binary av
-
-pip install transformers accelerate safetensors peft
-# is imageio already installed?
-pip install einops gradio moviepy
-
-# Install decord
-
-cd && git clone --recursive https://github.com/zhanwenchen/decord && cd decord
-git submodule sync
-git submodule update --init --recursive
-mkdir build && cd build
-
-cd python
-cmake .. -DUSE_CUDA=ON -DCMAKE_CUDA_ARCHITECTURES=70 -DCMAKE_BUILD_TYPE=Release
-make -j
-
-cd ../python
-pip install .
-
-```
-
-
-## Run
-
-```bash
-bash scripts/demo.sh
-```

From ec293c604b323173473bdeb352d13da0f1e7be6c Mon Sep 17 00:00:00 2001
From: ermu2001 <55656210+ermu2001@users.noreply.github.com>
Date: Wed, 12 Jun 2024 21:44:29 -0700
Subject: [PATCH 5/5] Update pllava_demo.py

update default system prompt in demo
---
 tasks/eval/demo/pllava_demo.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/tasks/eval/demo/pllava_demo.py b/tasks/eval/demo/pllava_demo.py
index 935734b..f3f78b8 100644
--- a/tasks/eval/demo/pllava_demo.py
+++ b/tasks/eval/demo/pllava_demo.py
@@ -13,10 +13,14 @@
 )
 from tasks.eval.demo import pllava_theme
 
-SYSTEM="""You are Pllava, a large vision-language assistant. 
-You are able to understand the video content that the user provides, and assist the user with a variety of tasks using natural language.
-Follow the instructions carefully and explain your answers in detail based on the provided video.
+SYSTEM="""You are a powerful Video Magic ChatBot, a large vision-language assistant. 
+You are able to understand the video content that the user provides and assist the user in a video-language related task.
+The user might provide you with the video and maybe some extra noisy information to help you out or ask you a question. Make use of the information in a proper way to be competent for the job.
+### INSTRUCTIONS:
+1. Follow the user's instruction.
+2. Be critical yet believe in yourself.
 """
+
 INIT_CONVERSATION: Conversation = conv_plain_v1.copy()