Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ results/
ckpts/
imgs_dev/
hf_gdino.py

data_robi/
# Byte-compiled / optimized / DLL files
__pycache__/
**/__pycache__/
Expand Down Expand Up @@ -170,3 +170,4 @@ cython_debug/
/dinov2/
/robokit/RoboKit.egg-info/
/robokit/RoboKit.egg-info/
/datasets/bop_challenge/datasets/*
8 changes: 8 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,14 @@ Display masks, object IDs, and scores using Detectron2.
python -m src.scripts.visualize_detectron2 dataset_name=$DATASET_NAME input_file=$INPUT_FILE output_dir=$OUTPUT_DIR
```

### Inference on robi Dataset

1. train adapter
2. set NIDS-NET's adpater to true
3. inference
4. evaulation


## Real-World Robot Experiment
We test our NIDS-Net on YCBV objects using ROS with a Fetch robot. We use
1. template embeddings of synthetic images from CNOS: "ros/weight_obj_shuffle2_0501_bs32_epoch_500_adapter_descriptors_pbr.json"
Expand Down
9 changes: 6 additions & 3 deletions adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,16 +115,16 @@ def forward(self, features, labels):
dataset_name = f'insDet_{adapter_type}_0523'
temperature = 0.05
ratio = 0.6
feature_dataset = FeatureDataset(data_json='./obj_FFA/object_features_vitl14_reg.json', num_object=100) # 100 objects in total
#feature_dataset = FeatureDataset(data_json='./obj_FFA/object_features_vitl14_reg.json', num_object=100) # 100 objects in total
# Assuming 'features' is your (N, 1024) tensor
batch_size = 1024

# robo_feature_dataset = FeatureDataset(data_json='./RoboTools_obj_feat/object_features.json', num_object=20) # 20 objects in total
# ycbv_feature_dataset = FeatureDataset(data_json='./BOP_obj_feat/ycbv_object_features.json', num_object=21) # 21 objects in total
ycbv_feature_dataset = FeatureDataset(data_json='./BOP_obj_feat/ycbv_object_features.json', num_object=21) # 21 objects in total
# lmo_feature_dataset = FeatureDataset(data_json='./BOP_obj_feat/lmo_object_features.json', num_object=8)


cur_feature_dataset = feature_dataset
cur_feature_dataset = ycbv_feature_dataset

# Example training loop
input_features = 1024 # Size of the input feature vector, 1024 for large, 768 for base, 384 for small
Expand All @@ -145,6 +145,7 @@ def forward(self, features, labels):
for epoch in range(epochs):

for inputs, labels in dataloader: # in dataloader: tqdm(dataloader)

inputs = inputs.to(device)
labels = labels.to(device)
optimizer.zero_grad()
Expand Down Expand Up @@ -175,6 +176,7 @@ def forward(self, features, labels):
test_dataloader = DataLoader(cur_feature_dataset, batch_size=batch_size, shuffle=False)

adatped_features = []

for inputs, labels in test_dataloader:
inputs = inputs.to(device)
# labels = labels.to(device)
Expand All @@ -183,6 +185,7 @@ def forward(self, features, labels):
# Perform inference using the model
# Your inference code here
adatped_features.append(outputs)

adatped_features = torch.cat(adatped_features, dim=0)
print(adatped_features.size())
feat_dict = dict()
Expand Down
8 changes: 6 additions & 2 deletions configs/data/bop.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ reference_dataloader:

query_dataloader:
_target_: src.dataloader.bop.BaseBOPTest
root_dir: ${machine.root_dir}/datasets/
root_dir: /home/panda/repos/SAM-6D/SAM-6D/Data/BOP/
split:
reset_metaData: True
processing_config:
Expand Down Expand Up @@ -92,4 +92,8 @@ datasets:
tyol:
cad: tyol_models.zip
test: tyol_test_bop19.zip
obj_names: [001_obj, 002_obj, 003_obj, 004_obj, 005_obj, 006_obj, 007_obj, 008_obj, 009_obj, 010_obj, 011_obj, 012_obj, 013_obj, 014_obj, 015_obj, 016_obj, 017_obj, 018_obj, 019_obj, 020_obj, 021_obj]
obj_names: [001_obj, 002_obj, 003_obj, 004_obj, 005_obj, 006_obj, 007_obj, 008_obj, 009_obj, 010_obj, 011_obj, 012_obj, 013_obj, 014_obj, 015_obj, 016_obj, 017_obj, 018_obj, 019_obj, 020_obj, 021_obj]
robi:
cad: robi_models.zip
test: robi_test_bop19.zip
obj_names: [001_obj, 002_obj, 003_obj, 004_obj, 005_obj, 006_obj, 007_obj]
2 changes: 1 addition & 1 deletion configs/model/segmentor_model/sam.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,4 @@ segmentor_width_size: ${model.segmentor_width_size}
sam:
_target_: src.model.sam.load_sam
model_type: vit_h
checkpoint_dir: ${machine.root_dir}/pretrained/segment-anything/
checkpoint_dir: ckpts/sam_weights/
2 changes: 1 addition & 1 deletion configs/run_vis.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,4 @@ root_dir: ${machine.root_dir}/datasets/
dataset_name:
input_file:
output_dir:
conf_threshold: 0.7
conf_threshold: 0.2
3 changes: 2 additions & 1 deletion configs/user/default.yaml
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
local_root_dir: ./datasets/bop23_challenge
slurm_root_dir: ./datasets/bop23_challenge
slurm_root_dir: ./datasets/bop23_challenge
project_root_dir:
4 changes: 3 additions & 1 deletion get_object_features_via_FFA.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,4 +189,6 @@ def get_object_features_via_dataloader(output_dir, json_filename, object_dataset
#obj_features = get_object_masked_FFA_features('./obj_FFA', 'object_features_l_reg_class.json', object_dataset, encoder, img_size=img_size)

# obj_features = get_object_features_via_dataloader('./obj_FFA', 'object_features_small.json', object_dataset, encoder, img_size=img_size)
# print(obj_features.shape)
# print(obj_features.shape)


38 changes: 19 additions & 19 deletions obj_adapter.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from adapter_dataset import ObjectFeatureDataset
from utils.adapter_dataset import ObjectFeatureDataset
import torch

from torch.utils.data import DataLoader, ConcatDataset
Expand All @@ -23,22 +23,24 @@ def main(cfg : DictConfig):

### bop challenge datasets
print(os.getcwd())
lmo_bop23_feature_dataset = ObjectFeatureDataset(data_json=f'{original_cwd}/datasets/bop23_challenge/datasets/templates_pyrender/lmo/descriptors_pbr.pth',
num_object=8)
tless_bop23_feature_dataset = ObjectFeatureDataset(data_json=f'{original_cwd}/datasets/bop23_challenge/datasets/templates_pyrender/tless/descriptors_pbr.pth',
num_object=30, label_offset=8)
tudl_bop23_feature_dataset = ObjectFeatureDataset(data_json=f'{original_cwd}/datasets/bop23_challenge/datasets/templates_pyrender/tudl/descriptors_pbr.pth',
num_object=3, label_offset=38)
icbin_bop23_feature_dataset = ObjectFeatureDataset(data_json=f'{original_cwd}/datasets/bop23_challenge/datasets/templates_pyrender/icbin/descriptors_pbr.pth',
num_object=2, label_offset=41)
itodd_bop23_feature_dataset = ObjectFeatureDataset(data_json=f'{original_cwd}/datasets/bop23_challenge/datasets/templates_pyrender/itodd/descriptors_pbr.pth',
num_object=28, label_offset=43)
hb_bop23_feature_dataset = ObjectFeatureDataset(data_json=f'{original_cwd}/datasets/bop23_challenge/datasets/templates_pyrender/hb/descriptors_pbr.pth',
num_object=33, label_offset=71)
ycbv_bo23_feature_dataset = ObjectFeatureDataset(data_json=f'{original_cwd}/datasets/bop23_challenge/datasets/templates_pyrender/ycbv/descriptors_pbr.pth',
num_object=21, label_offset=104)
# lmo_bop23_feature_dataset = ObjectFeatureDataset(data_json=f'{original_cwd}/datasets/bop23_challenge/datasets/templates_pyrender/lmo/descriptors_pbr.pth',
# num_object=8)
# tless_bop23_feature_dataset = ObjectFeatureDataset(data_json=f'{original_cwd}/datasets/bop23_challenge/datasets/templates_pyrender/tless/descriptors_pbr.pth',
# num_object=30, label_offset=8)
# tudl_bop23_feature_dataset = ObjectFeatureDataset(data_json=f'{original_cwd}/datasets/bop23_challenge/datasets/templates_pyrender/tudl/descriptors_pbr.pth',
# num_object=3, label_offset=38)
# icbin_bop23_feature_dataset = ObjectFeatureDataset(data_json=f'{original_cwd}/datasets/bop23_challenge/datasets/templates_pyrender/icbin/descriptors_pbr.pth',
# num_object=2, label_offset=41)
# itodd_bop23_feature_dataset = ObjectFeatureDataset(data_json=f'{original_cwd}/datasets/bop23_challenge/datasets/templates_pyrender/itodd/descriptors_pbr.pth',
# num_object=28, label_offset=43)
# hb_bop23_feature_dataset = ObjectFeatureDataset(data_json=f'{original_cwd}/datasets/bop23_challenge/datasets/templates_pyrender/hb/descriptors_pbr.pth',
# num_object=33, label_offset=71)
# ycbv_bo23_feature_dataset = ObjectFeatureDataset(data_json=f'{original_cwd}/datasets/bop23_challenge/datasets/templates_pyrender/ycbv/descriptors_pbr.pth',
# num_object=21, label_offset=104)
robi_bop23_feature_dataset = ObjectFeatureDataset(data_json=f'{original_cwd}/datasets/bop23_challenge/datasets/templates_pyrender/robi/descriptors_pbr.pth',
num_object=7)

cur_feature_dataset = hb_bop23_feature_dataset
cur_feature_dataset = robi_bop23_feature_dataset

# Example training loop
input_features = cfg.params.input_features # Size of the input feature vector
Expand All @@ -54,9 +56,7 @@ def main(cfg : DictConfig):
epochs = cfg.params.epochs
if combine_dataset:
combined_dataset = ConcatDataset(
[lmo_bop23_feature_dataset, tless_bop23_feature_dataset, tudl_bop23_feature_dataset,
icbin_bop23_feature_dataset, itodd_bop23_feature_dataset, hb_bop23_feature_dataset,
ycbv_bo23_feature_dataset])
[robi_bop23_feature_dataset])
dataloader = DataLoader(combined_dataset, batch_size=batch_size, shuffle=shuffle)
else:
dataloader = DataLoader(cur_feature_dataset, batch_size=batch_size, shuffle=shuffle)
Expand Down
1 change: 1 addition & 0 deletions run_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ def run_inference(cfg: DictConfig):
ref_dataloader_config._target_ = "src.dataloader.bop_pbr.BOPTemplatePBR"
ref_dataloader_config.root_dir = f"{query_dataloader_config.root_dir}"
ref_dataloader_config.template_dir += f"templates_pyrender/{cfg.dataset_name}"

ref_dataset = instantiate(ref_dataloader_config)
ref_dataset.load_processed_metaData(reset_metaData=True)
else:
Expand Down
4 changes: 2 additions & 2 deletions src/dataloader/bop_pbr.py
Original file line number Diff line number Diff line change
Expand Up @@ -297,8 +297,8 @@ def __getitem__(self, idx):
]
)
dataset = BOPTemplatePBR(
root_dir="/gpfsscratch/rech/tvi/uyb58rn/datasets/bop23_challenge/datasets/lmo",
template_dir="/gpfsscratch/rech/tvi/uyb58rn/datasets/bop23_challenge/datasets/templates_pyrender/lmo",
root_dir="../../../data_robi",
template_dir="../../../data_robi/robi_pyrender",
obj_ids=None,
level_templates=1,
pose_distribution="all",
Expand Down
7 changes: 5 additions & 2 deletions src/model/detector.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,15 +185,15 @@ def __init__(
self.gdino = GroundingDINOObjectPredictor()
self.SAM = SegmentAnythingPredictor(vit_model="vit_h")
logging.info("Initialize GDINO and SAM done!")
self.use_adapter = False
self.use_adapter = True
if self.use_adapter:
self.adapter_type = 'weight'
if self.adapter_type == 'clip':
weight_name = f"bop_obj_shuffle_0507_clip_temp_0.05_epoch_500_lr_0.0001_bs_32_weights.pth"
model_path = os.path.join("./adapter_weights/bop23", weight_name)
self.adapter = ModifiedClipAdapter(1024, reduction=4, ratio=0.6).to('cuda')
else:
weight_name = f"bop_obj_shuffle_weight_0430_temp_0.05_epoch_500_lr_0.001_bs_32_weights.pth"
weight_name = f"bop_obj_shuffle_0529_weight_temp_0.05_epoch_500_lr_0.001_bs_32_weights.pth"
model_path = os.path.join("./adapter_weights/bop23", weight_name)
self.adapter = WeightAdapter(1024, reduction=4).to('cuda')
self.adapter.load_state_dict(torch.load(model_path))
Expand All @@ -208,6 +208,8 @@ def set_reference_objects(self):
start_time = time.time()
self.ref_data = {"descriptors": BatchedData(None), "cls_descriptors": BatchedData(None), "appe_descriptors": BatchedData(None)}
descriptors_path = osp.join(self.ref_dataset.template_dir, "descriptors.pth")
print("-=-=-=-=-=-=-=-")
print(descriptors_path)
# cls_descriptors_path = osp.join(self.ref_dataset.template_dir, "descriptors_cls.pth") # for cls token
if self.onboarding_config.rendering_type == "pbr":
descriptors_path = descriptors_path.replace(".pth", "_pbr.pth")
Expand All @@ -225,6 +227,7 @@ def set_reference_objects(self):
# object_features = torch.Tensor(feat_dict['features']).cuda()
# self.ref_data["descriptors"] = object_features.view(-1, 42, 1024)
# print("using adapted object features")

else:
for idx in tqdm(
range(len(self.ref_dataset)),
Expand Down
8 changes: 6 additions & 2 deletions src/scripts/visualize_detectron2.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,22 +28,26 @@ def visualize(cfg: DictConfig) -> None:
with open(cfg.input_file, 'r') as f:
dets = json.load(f)
logging.info(f'Loaded {len(dets)} detections')

dets = [det for det in dets if det['score'] > cfg.conf_threshold]
logging.info(f'Keeping only {len(dets)} detections having score > {cfg.conf_threshold}')
lmo_transform_id = {1:0, 5:1, 6:2, 8:3, 9:4,10:5,11:6,12:7}


# sort by (scene_id, frame_id)
dets = sorted(dets, key=lambda x: (x['scene_id'], x['image_id']))


list_scene_id_and_frame_id = [(det['scene_id'], det['image_id']) for det in dets]


os.makedirs(cfg.output_dir, exist_ok=True)
for idx, (scene_id, image_id) in tqdm(enumerate(list_scene_id_and_frame_id)):

if cfg.dataset_name == 'itodd':
img = Image.open(f'{cfg.root_dir}/{cfg.dataset_name}/{split}/{scene_id:06d}/gray/{image_id:06d}.tif')
img = img.convert('L')
else:
img = Image.open(f'{cfg.root_dir}/{cfg.dataset_name}/{split}/{scene_id:06d}/rgb/{image_id:06d}.png')
img = Image.open(f'{"/home/panda/repos/SAM-6D/SAM-6D/Data/BOP"}/{cfg.dataset_name}/{split}/{scene_id:06d}/rgb/{image_id:06d}.png')
rgb = img.copy()
img = np.array(img)
visualizer = CNOSVisualizer(object_names, img_size=img.shape[:2])
Expand Down
12 changes: 9 additions & 3 deletions test_gdino_sam.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,19 +21,25 @@ def main(argv):
logging.info("GDINO: Predict bounding boxes, phrases, and confidence scores")
bboxes, phrases, gdino_conf = gdino.predict(image_pil, text_prompt)

print(bboxes)

logging.info("GDINO post processing")
w, h = image_pil.size # Get image width and height
# Scale bounding boxes to match the original image size
image_pil_bboxes = gdino.bbox_to_scaled_xyxy(bboxes, w, h)


logging.info("SAM prediction")
image_pil_bboxes, masks = SAM.predict(image_pil, image_pil_bboxes)
print(image_pil_bboxes)

logging.info("Annotate the scaled image with bounding boxes, confidence scores, and labels, and display")
bbox_annotated_pil = annotate(overlay_masks(image_pil, masks), image_pil_bboxes, gdino_conf, phrases)

bbox_annotated_pil.show()



bbox_annotated_pil.save("./imgs/annotedImg.jpg")

except Exception as e:
# Handle unexpected errors
Expand All @@ -44,5 +50,5 @@ def main(argv):
# Run the main function with the input image path
# app.run(main, ['imgs/color-000078.png'])
# app.run(main, ['imgs/color-000019.png'])
app.run(main, ['imgs/clutter-test.png'])
#app.run(main, ["/home/yangxiao/Documents/datasets/some_objects/data_fetch/object1/color-000007.jpg"])
app.run(main, ['imgs/rgb_0.png'])
# app.run(main, ["/home/yangxiao/Documents/datasets/some_objects/data_fetch/object1/color-000007.jpg"])
56 changes: 56 additions & 0 deletions trans_robi_dataset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
import os
import shutil
from pathlib import Path
from tqdm import tqdm

def reorganize_dataset(root_dir):
"""
Reorganize dataset from:
robi/obj_000001/rgb_0.png, mask_0.png, *.npy
to:
robi/obj_000001/images/0.png
robi/obj_000001/masks/0.png
And remove all .npy files
"""
root_path = Path(root_dir)

# 遍历所有对象文件夹
obj_dirs = [d for d in root_path.iterdir() if d.is_dir() and d.name.startswith('obj_')]

for obj_dir in tqdm(obj_dirs, desc="Processing objects"):
# 创建新的文件夹结构
images_dir = obj_dir / 'images'
masks_dir = obj_dir / 'masks'

images_dir.mkdir(exist_ok=True)
masks_dir.mkdir(exist_ok=True)

# 删除所有.npy文件
npy_files = list(obj_dir.glob('*.npy'))
for npy_file in npy_files:
npy_file.unlink() # 删除文件

# 获取所有视角的文件
files = list(obj_dir.glob('*_*.png'))

for file_path in files:
# 解析文件名获取视角编号
if file_path.name.startswith('rgb_'):
view_num = file_path.stem.split('_')[1]
new_name = f"{view_num}.png"
shutil.move(str(file_path), str(images_dir / new_name))
elif file_path.name.startswith('mask_'):
view_num = file_path.stem.split('_')[1]
new_name = f"{view_num}.png"
shutil.move(str(file_path), str(masks_dir / new_name))

if __name__ == "__main__":
import argparse

parser = argparse.ArgumentParser(description='Reorganize dataset structure')
parser.add_argument('--root_dir', type=str, required=True, help='Root directory of the dataset')

args = parser.parse_args()

reorganize_dataset(args.root_dir)
print("Dataset reorganization completed!")
Loading