-
Notifications
You must be signed in to change notification settings - Fork 555
/
Copy pathdepth_to_pointcloud.py
79 lines (68 loc) · 3.38 KB
/
depth_to_pointcloud.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
# Born out of Issue 36.
# Allows the user to set up own test files to infer on (Create a folder my_test and add subfolder input and output in the metric_depth directory before running this script.)
# Make sure you have the necessary libraries
# Code by @1ssb
import argparse
import os
import glob
import torch
import numpy as np
from PIL import Image
import torchvision.transforms as transforms
import open3d as o3d
from tqdm import tqdm
from zoedepth.models.builder import build_model
from zoedepth.utils.config import get_config
# Global settings
FL = 715.0873
FY = 256 * 0.6
FX = 256 * 0.6
NYU_DATA = False
FINAL_HEIGHT = 256
FINAL_WIDTH = 256
INPUT_DIR = './my_test/input'
OUTPUT_DIR = './my_test/output'
DATASET = 'nyu' # Lets not pick a fight with the model's dataloader
def process_images(model):
if not os.path.exists(OUTPUT_DIR):
os.makedirs(OUTPUT_DIR)
image_paths = glob.glob(os.path.join(INPUT_DIR, '*.png')) + glob.glob(os.path.join(INPUT_DIR, '*.jpg'))
for image_path in tqdm(image_paths, desc="Processing Images"):
try:
color_image = Image.open(image_path).convert('RGB')
original_width, original_height = color_image.size
image_tensor = transforms.ToTensor()(color_image).unsqueeze(0).to('cuda' if torch.cuda.is_available() else 'cpu')
pred = model(image_tensor, dataset=DATASET)
if isinstance(pred, dict):
pred = pred.get('metric_depth', pred.get('out'))
elif isinstance(pred, (list, tuple)):
pred = pred[-1]
pred = pred.squeeze().detach().cpu().numpy()
# Resize color image and depth to final size
resized_color_image = color_image.resize((FINAL_WIDTH, FINAL_HEIGHT), Image.LANCZOS)
resized_pred = Image.fromarray(pred).resize((FINAL_WIDTH, FINAL_HEIGHT), Image.NEAREST)
focal_length_x, focal_length_y = (FX, FY) if not NYU_DATA else (FL, FL)
x, y = np.meshgrid(np.arange(FINAL_WIDTH), np.arange(FINAL_HEIGHT))
x = (x - FINAL_WIDTH / 2) / focal_length_x
y = (y - FINAL_HEIGHT / 2) / focal_length_y
z = np.array(resized_pred)
points = np.stack((np.multiply(x, z), np.multiply(y, z), z), axis=-1).reshape(-1, 3)
colors = np.array(resized_color_image).reshape(-1, 3) / 255.0
pcd = o3d.geometry.PointCloud()
pcd.points = o3d.utility.Vector3dVector(points)
pcd.colors = o3d.utility.Vector3dVector(colors)
o3d.io.write_point_cloud(os.path.join(OUTPUT_DIR, os.path.splitext(os.path.basename(image_path))[0] + ".ply"), pcd)
except Exception as e:
print(f"Error processing {image_path}: {e}")
def main(model_name, pretrained_resource):
config = get_config(model_name, "eval", DATASET)
config.pretrained_resource = pretrained_resource
model = build_model(config).to('cuda' if torch.cuda.is_available() else 'cpu')
model.eval()
process_images(model)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument("-m", "--model", type=str, default='zoedepth', help="Name of the model to test")
parser.add_argument("-p", "--pretrained_resource", type=str, default='local::./checkpoints/depth_anything_metric_depth_indoor.pt', help="Pretrained resource to use for fetching weights.")
args = parser.parse_args()
main(args.model, args.pretrained_resource)