Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

added segment anything 2 #333

Open
wants to merge 7 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 4 additions & 5 deletions llama/llama-3_1-405b-instruct/model/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,8 @@
import subprocess
import uuid

from transformers import AutoTokenizer

from model.sighelper import patch
from transformers import AutoTokenizer

patch()

Expand All @@ -14,9 +13,9 @@
from vllm.engine.async_llm_engine import AsyncLLMEngine

os.environ["TOKENIZERS_PARALLELISM"] = "true"
os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = (
"spawn" # for multiprocessing to work with CUDA
)
os.environ[
"VLLM_WORKER_MULTIPROC_METHOD"
] = "spawn" # for multiprocessing to work with CUDA
logger = logging.getLogger(__name__)


Expand Down
34 changes: 34 additions & 0 deletions segment-anything-2/config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
build_commands: []
base_image:
image: alphatozeta/cuda-python:12.1.1-cudnn8-devel-ubuntu22.04
python_executable_path: /usr/bin/python3
model_name: segment-anything-2
environment_variables: {}
external_package_dirs: []
model_metadata:
example_model_input: {"image": "https://upload.wikimedia.org/wikipedia/commons/thumb/0/0c/Kiev_straatbeeld.jpg/640px-Kiev_straatbeeld.jpg", "points_per_side": 32, "pred_iou_thresh": 0.8, "stability_score_thresh": 0.95, "use_m2m": true}
python_version: py310
requirements:
- torch==2.4.0
- torchvision==0.19.0
- numpy==1.26.4
- tqdm==4.66.5
- hydra-core==1.3.2
- httpx==0.27.0
- iopath==0.1.10
- pillow==10.4.0
- matplotlib==3.9.1
- jupyter==1.0.0
- opencv-python-headless==4.8.0.74
- black==24.8.0
- usort==1.0.8.post1
- ufmt==2.7.0
resources:
accelerator: A10G
use_gpu: true
secrets: {}
system_packages:
- libgl1-mesa-glx
- libglib2.0-0
- ninja-build
- python3.10-venv
Empty file.
222 changes: 222 additions & 0 deletions segment-anything-2/model/model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,222 @@
# Prediction interface for Cog ⚙️
# https://cog.run/python

import base64
import gc
import os
import subprocess
import sys
import time
from io import BytesIO
from typing import Callable, Dict, List

import cv2
import httpx
import matplotlib.pyplot as plt
import numpy as np
import torch
from PIL import Image

sys.path.insert(1, "data")

from sam2.automatic_mask_generator import SAM2AutomaticMaskGenerator
from sam2.build_sam import build_sam2

DEVICE = "cuda"
MODEL_DIR = "checkpoints"
BASE_URL = "https://dl.fbaipublicfiles.com/segment_anything_2/072824/"
checkpoints = {
"sam2_hiera_tiny.pt": f"{BASE_URL}sam2_hiera_tiny.pt",
"sam2_hiera_small.pt": f"{BASE_URL}sam2_hiera_small.pt",
"sam2_hiera_base_plus.pt": f"{BASE_URL}sam2_hiera_base_plus.pt",
"sam2_hiera_large.pt": f"{BASE_URL}sam2_hiera_large.pt",
}


class Model:
def __init__(self, **kwargs):
"""Load the model into memory to make running multiple predictions efficient"""
self._data_dir = kwargs["data_dir"]
print(self._data_dir)
self.model_files = [
# "sam2_hiera_base_plus.pt",
# "sam2_hiera_large.pt",
"sam2_hiera_small.pt",
# "sam2_hiera_tiny.pt",
]
# models are built into the truss itself

self.model_configs = {
"tiny": (
"sam2_hiera_t.yaml",
f"{self._data_dir}/checkpoints/sam2_hiera_tiny.pt",
),
"small": (
"sam2_hiera_s.yaml",
f"{self._data_dir}/checkpoints/sam2_hiera_small.pt",
),
"base": (
"sam2_hiera_b+.yaml",
f"{self._data_dir}/checkpoints/sam2_hiera_base_plus.pt",
),
"large": (
"sam2_hiera_l.yaml",
f"{self._data_dir}/checkpoints/sam2_hiera_large.pt",
),
}

self.model_cfg, self.sam2_checkpoint = self.model_configs["small"]
functions = [self.load, self.predict]
kwargs_list = [
{},
{
"model_input": {
"image": "https://replicate.delivery/pbxt/LMbGi83qiV3QXR9fqDIzTl0P23ZWU560z1nVDtgl0paCcyYs/cars.jpg"
}
},
]

def download_file(self, url, filename):
try:
print(f"Downloading {filename} checkpoint...")
with httpx.stream("GET", url) as response:
response.raise_for_status() # Raise an error for unsuccessful status codes
# make sure its stored in checkpoints directory
os.makedirs(
os.path.dirname(f"{self._data_dir}/checkpoints"), exist_ok=True
)
filename = f"{self._data_dir}/checkpoints/{filename}"
# make sure its stored in checkpoints directory
os.makedirs(os.path.dirname(filename), exist_ok=True)
with open(filename, "wb") as file:
for chunk in response.iter_bytes():
file.write(chunk)
print(f"Successfully downloaded {filename}.")
except httpx.HTTPStatusError as e:
print(f"Failed to download checkpoint from {url}: {e}")
exit(1)

def load(self):
# run pip freeze
os.system("pip freeze")
os.system(f"pip install --no-build-isolation -e /packages")
# Download checkpoint
for model in self.model_files:
self.download_file(checkpoints.get(model), model)
# Load model here and assign to self._model.
self.sam2 = build_sam2(
self.model_cfg,
self.sam2_checkpoint,
device="cuda",
apply_postprocessing=False,
)
self.mask_generator = SAM2AutomaticMaskGenerator(self.sam2)

# Enable bfloat16 and TF32 for better performance
torch.autocast(device_type="cuda", dtype=torch.bfloat16).__enter__()
if torch.cuda.get_device_properties(0).major >= 8:
torch.backends.cuda.matmul.allow_tf32 = True
torch.backends.cudnn.allow_tf32 = True
try:
result = subprocess.run(
["nvidia-smi"], capture_output=True, text=True, check=True
)
print(result.stdout)
except subprocess.CalledProcessError as e:
print(f"Command failed with code {e.returncode}: {e.stderr}")

def predict(self, model_input):
# Run model inference here
image = model_input.get("image") # assuming image is a url
points_per_side = model_input.get("points_per_side", 32)
pred_iou_thresh = model_input.get("pred_iou_thresh", 0.88)
stability_score_thresh = model_input.get("stability_score_thresh", 0.95)
use_m2m = model_input.get("use_m2m", True)
response = httpx.get(image)
input_image = Image.open(BytesIO(response.content))
input_image = np.array(input_image.convert("RGB"))

# Configure the mask generator
self.mask_generator.points_per_side = points_per_side
self.mask_generator.pred_iou_thresh = pred_iou_thresh
self.mask_generator.stability_score_thresh = stability_score_thresh
self.mask_generator.use_m2m = use_m2m

# Generate masks
masks = self.mask_generator.generate(input_image)

# Generate and save combined colored mask
b64_results = self.return_combined_mask(input_image, masks)

# Generate and save individual black and white masks
individual_mask_paths = self.return_individual_masks(masks)
# create a list of b64_results and individual_mask_paths
b64_results = [b64_results] + individual_mask_paths
del masks
torch.cuda.empty_cache()
try:
result = subprocess.run(
["nvidia-smi"], capture_output=True, text=True, check=True
)
print(result.stdout)
except subprocess.CalledProcessError as e:
print(f"Command failed with code {e.returncode}: {e.stderr}")
return {"status": "success", "masks": b64_results}

def return_combined_mask(self, input_image, masks):
"""
Generates a combined mask image from the given input image and masks and returns a base64 encoded jpeg image
"""
buffer = BytesIO()
plt.figure(figsize=(20, 20))
plt.imshow(input_image)
self.show_anns(masks)
plt.axis("off")
plt.savefig(buffer, format="jpeg", bbox_inches="tight", pad_inches=0)
plt.close()
buffer.seek(0)
img_base64 = base64.b64encode(buffer.read()).decode("utf-8")
return img_base64

def return_individual_masks(self, masks):
individual_mask_strings = []
for i, mask in enumerate(masks):
mask_image = mask["segmentation"].astype(np.uint8) * 255

buffer = BytesIO()
Image.fromarray(mask_image).save(buffer, format="JPEG")
base64_string = base64.b64encode(buffer.getvalue()).decode("utf-8")

individual_mask_strings.append(base64_string)

return individual_mask_strings

def show_anns(self, anns):
if len(anns) == 0:
return
sorted_anns = sorted(anns, key=(lambda x: x["area"]), reverse=True)
ax = plt.gca()
ax.set_autoscale_on(False)

img = np.ones(
(
sorted_anns[0]["segmentation"].shape[0],
sorted_anns[0]["segmentation"].shape[1],
4,
)
)
img[:, :, 3] = 0
for ann in sorted_anns:
m = ann["segmentation"]
color_mask = np.concatenate([np.random.random(3), [0.5]])
img[m] = color_mask
contours, _ = cv2.findContours(
m.astype(np.uint8), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE
)
contours = [
cv2.approxPolyDP(contour, epsilon=0.01, closed=True)
for contour in contours
]
cv2.drawContours(img, contours, -1, (0, 0, 1, 0.4), thickness=1)

ax.imshow(img)
80 changes: 80 additions & 0 deletions segment-anything-2/packages/CODE_OF_CONDUCT.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
# Code of Conduct

## Our Pledge

In the interest of fostering an open and welcoming environment, we as
contributors and maintainers pledge to make participation in our project and
our community a harassment-free experience for everyone, regardless of age, body
size, disability, ethnicity, sex characteristics, gender identity and expression,
level of experience, education, socio-economic status, nationality, personal
appearance, race, religion, or sexual identity and orientation.

## Our Standards

Examples of behavior that contributes to creating a positive environment
include:

* Using welcoming and inclusive language
* Being respectful of differing viewpoints and experiences
* Gracefully accepting constructive criticism
* Focusing on what is best for the community
* Showing empathy towards other community members

Examples of unacceptable behavior by participants include:

* The use of sexualized language or imagery and unwelcome sexual attention or
advances
* Trolling, insulting/derogatory comments, and personal or political attacks
* Public or private harassment
* Publishing others' private information, such as a physical or electronic
address, without explicit permission
* Other conduct which could reasonably be considered inappropriate in a
professional setting

## Our Responsibilities

Project maintainers are responsible for clarifying the standards of acceptable
behavior and are expected to take appropriate and fair corrective action in
response to any instances of unacceptable behavior.

Project maintainers have the right and responsibility to remove, edit, or
reject comments, commits, code, wiki edits, issues, and other contributions
that are not aligned to this Code of Conduct, or to ban temporarily or
permanently any contributor for other behaviors that they deem inappropriate,
threatening, offensive, or harmful.

## Scope

This Code of Conduct applies within all project spaces, and it also applies when
an individual is representing the project or its community in public spaces.
Examples of representing a project or community include using an official
project e-mail address, posting via an official social media account, or acting
as an appointed representative at an online or offline event. Representation of
a project may be further defined and clarified by project maintainers.

This Code of Conduct also applies outside the project spaces when there is a
reasonable belief that an individual's behavior may have a negative impact on
the project or its community.

## Enforcement

Instances of abusive, harassing, or otherwise unacceptable behavior may be
reported by contacting the project team at <[email protected]>. All
complaints will be reviewed and investigated and will result in a response that
is deemed necessary and appropriate to the circumstances. The project team is
obligated to maintain confidentiality with regard to the reporter of an incident.
Further details of specific enforcement policies may be posted separately.

Project maintainers who do not follow or enforce the Code of Conduct in good
faith may face temporary or permanent repercussions as determined by other
members of the project's leadership.

## Attribution

This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html

[homepage]: https://www.contributor-covenant.org

For answers to common questions about this code of conduct, see
https://www.contributor-covenant.org/faq
31 changes: 31 additions & 0 deletions segment-anything-2/packages/CONTRIBUTING.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# Contributing to segment-anything
We want to make contributing to this project as easy and transparent as
possible.

## Pull Requests
We actively welcome your pull requests.

1. Fork the repo and create your branch from `main`.
2. If you've added code that should be tested, add tests.
3. If you've changed APIs, update the documentation.
4. Ensure the test suite passes.
5. Make sure your code lints, using the `ufmt format` command. Linting requires `black==24.2.0`, `usort==1.0.2`, and `ufmt==2.0.0b2`, which can be installed via `pip install -e ".[dev]"`.
6. If you haven't already, complete the Contributor License Agreement ("CLA").

## Contributor License Agreement ("CLA")
In order to accept your pull request, we need you to submit a CLA. You only need
to do this once to work on any of Facebook's open source projects.

Complete your CLA here: <https://code.facebook.com/cla>

## Issues
We use GitHub issues to track public bugs. Please ensure your description is
clear and has sufficient instructions to be able to reproduce the issue.

Facebook has a [bounty program](https://www.facebook.com/whitehat/) for the safe
disclosure of security bugs. In those cases, please go through the process
outlined on that page and do not file a public issue.

## License
By contributing to segment-anything, you agree that your contributions will be licensed
under the LICENSE file in the root directory of this source tree.
Loading
Loading