From 03488ac4a7498282eb25df1fc364cd88c8bcd1b5 Mon Sep 17 00:00:00 2001 From: generatedunixname89002005287564 Date: Wed, 25 Sep 2024 09:37:58 -0700 Subject: [PATCH] removing dead targets] fbcode//surreal/data_services/atek/atek/data_loaders/TARGETS Reviewed By: azad-meta Differential Revision: D63018396 fbshipit-source-id: b7af7a174404f6809ef4cdc9af3783ac301fb91e --- atek/data_loaders/cubercnn_model_adaptor.py | 389 -------------------- 1 file changed, 389 deletions(-) delete mode 100644 atek/data_loaders/cubercnn_model_adaptor.py diff --git a/atek/data_loaders/cubercnn_model_adaptor.py b/atek/data_loaders/cubercnn_model_adaptor.py deleted file mode 100644 index 344dc45..0000000 --- a/atek/data_loaders/cubercnn_model_adaptor.py +++ /dev/null @@ -1,389 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from typing import Dict, List, Optional - -import numpy as np - -import torch - -import webdataset as wds - -from atek.data_loaders.atek_wds_dataloader import load_atek_wds_dataset -from atek.util.atek_constants import ATEK_CATEGORY_ID_TO_NAME, ATEK_CATEGORY_NAME_TO_ID -from projectaria_tools.core.sophus import SE3 - -from webdataset.filters import pipelinefilter - - -class CubeRCNNModelAdaptor: - def __init__( - self, - # TODO: make these a DictConfig - min_bb2d_area: Optional[float] = 100, - min_bb3d_depth: Optional[float] = 0.3, - max_bb3d_depth: Optional[float] = 5.0, - ): - self.min_bb2d_area = min_bb2d_area - self.min_bb3d_depth = min_bb3d_depth - self.max_bb3d_depth = max_bb3d_depth - - @staticmethod - def get_dict_key_mapping_all(): - dict_key_mapping = { - "mfcd#camera-rgb+images": "image", - "mfcd#camera-rgb+projection_params": "camera_params", - "mfcd#camera-rgb+camera_model_name": "camera_model", - "mfcd#camera-rgb+t_device_camera": "t_device_rgbcam", - "mfcd#camera-rgb+frame_ids": "frame_id", - "mfcd#camera-rgb+capture_timestamps_ns": "timestamp_ns", - "mtd#ts_world_device": "ts_world_device", - "sequence_name": "sequence_name", - "gt_data": "gt_data", - } - return dict_key_mapping - - def atek_to_cubercnn(self, data): - """ - A helper data transform function to convert a ATEK webdataset data sample built by CubeRCNNSampleBuilder, to CubeRCNN unbatched - samples. Yield one unbatched sample a time to use the collation and batching mechanism in - the webdataset properly. - """ - for atek_wds_sample in data: - sample = {} - self._update_camera_data_in_sample(atek_wds_sample, sample) - self._update_T_world_camera(atek_wds_sample, sample) - - # Skip if no gt data - if "gt_data" in atek_wds_sample and len(atek_wds_sample["gt_data"]) > 0: - self._update_gt_data_in_sample(atek_wds_sample, sample) - - yield sample - - def _update_camera_data_in_sample(self, atek_wds_sample, sample): - """ - Initialize sample image - Process camera K-matrix information and update the sample dictionary. - """ - assert atek_wds_sample["image"].shape[0] == 1, "Only support 1 frame" - image_height, image_width = atek_wds_sample["image"].shape[2:] - - # calculate K-matrix - camera_model = atek_wds_sample["camera_model"] - assert ( - camera_model == "CameraModelType.LINEAR" - ), f"Only linear camera model supported in CubeRCNN model, this data has {camera_model} instead." - k_matrix = torch.zeros((3, 3), dtype=torch.float32) - params = atek_wds_sample["camera_params"] - k_matrix[0, 0], k_matrix[1, 1] = params[0], params[1] - k_matrix[0, 2], k_matrix[1, 2] = params[2], params[3] - k_matrix[2, 2] = 1.0 - - sample.update( - { - # rgb -> bgr - "image": atek_wds_sample["image"][0, [2, 1, 0], :, :].clone().detach(), - "K": k_matrix.tolist(), - "height": image_height, - "width": image_width, - "K_matrix": k_matrix, - "timestamp_ns": atek_wds_sample["timestamp_ns"], - "frame_id": atek_wds_sample["frame_id"], - "sequence_name": atek_wds_sample["sequence_name"], - } - ) - - def _update_T_world_camera(self, atek_wds_sample, sample): - """ - Compute world-to-camera transformation matrices, and update this field in sample dict. - """ - T_world_device = SE3.from_matrix3x4(atek_wds_sample["ts_world_device"][0]) - T_device_rgbCam = SE3.from_matrix3x4(atek_wds_sample["t_device_rgbcam"]) - T_world_rgbCam = T_world_device @ T_device_rgbCam - sample["T_world_camera"] = T_world_rgbCam.to_matrix3x4() - - def _process_2d_bbox_dict(self, bb2d_dict): - """ - Process 2D bounding boxes by rearranging the bounding box coordinates to be - in the order x0, y0, x1, y1 and calculating the area of each 2D bounding box. - """ - bb2ds_x0y0x1y1 = bb2d_dict["box_ranges"] - bb2ds_x0y0x1y1 = bb2ds_x0y0x1y1[:, [0, 2, 1, 3]] - bb2ds_area = (bb2ds_x0y0x1y1[:, 2] - bb2ds_x0y0x1y1[:, 0]) * ( - bb2ds_x0y0x1y1[:, 3] - bb2ds_x0y0x1y1[:, 1] - ) - - return bb2ds_x0y0x1y1, bb2ds_area - - def _process_3d_bbox_dict(self, bbox3d_dict, T_world_rgbCam): - """ - This function processes 3D bounding box data from a given dictionary, - extracting dimensions, calculating depths, and computing transformation - matrices relative to the camera. - """ - bb3d_dimensions = bbox3d_dict["object_dimensions"] - - bb3d_depths_list = [] - Ts_world_object_list = [] - Ts_cam_object_list = [] - for _, pose_as_tensor in enumerate(bbox3d_dict["ts_world_object"]): - T_world_object = SE3.from_matrix3x4(pose_as_tensor.numpy()) - T_cam_object = T_world_rgbCam.inverse() @ T_world_object - - # Add to lists - Ts_world_object_list.append( - torch.tensor(T_world_object.to_matrix3x4(), dtype=torch.float32) - ) - - Ts_cam_object_list.append( - torch.tensor(T_cam_object.to_matrix3x4(), dtype=torch.float32) - ) - bb3d_depths_list.append(T_cam_object.translation()[:, 2].item()) - - # Convert lists to tensors - bb3d_depths = torch.tensor(bb3d_depths_list, dtype=torch.float32) - Ts_world_object = torch.stack(Ts_world_object_list, dim=0) - Ts_cam_object = torch.stack(Ts_cam_object_list, dim=0) - - return bb3d_dimensions, bb3d_depths, Ts_world_object, Ts_cam_object - - def _update_gt_data_in_sample(self, atek_wds_sample, sample): - """ - updates the sample dictionary with filtered ground truth data for both 2D and 3D bounding boxes. - """ - from detectron2.structures import Boxes, Instances - - bbox2d_dict = atek_wds_sample["gt_data"]["obb2_gt"]["camera-rgb"] - bbox3d_dict = atek_wds_sample["gt_data"]["obb3_gt"]["camera-rgb"] - - # Instance id between obb3 and obb2 should be the same - assert torch.allclose( - bbox3d_dict["instance_ids"], bbox2d_dict["instance_ids"], atol=0 - ), "instance ids in obb2 and obb3 needs to be exactly the same!" - - category_ids = bbox3d_dict["category_ids"] - - T_world_rgbCam = SE3.from_matrix3x4(sample["T_world_camera"]) - - bb2ds_x0y0x1y1, bb2ds_area = self._process_2d_bbox_dict(bbox2d_dict) - bb3d_dimensions, bb3d_depths, Ts_world_object, Ts_cam_object = ( - self._process_3d_bbox_dict(bbox3d_dict, T_world_rgbCam) - ) - - # Filter 1: ignore category = -1, meaning "Other". - category_id_filter = category_ids > 0 # filter out -1 category = "Other" - - # Filter 2: ignore bboxes with small area - bb2d_area_filter = bb2ds_area > self.min_bb2d_area - - # Filter 3: ignore bboxes with small depth - bb3d_depth_filter = (self.min_bb3d_depth <= bb3d_depths) & ( - bb3d_depths <= self.max_bb3d_depth - ) - - # Combine all filters - final_filter = category_id_filter & bb2d_area_filter & bb3d_depth_filter - - # Apply filter to create instances - image_height = sample["height"] - image_width = sample["width"] - instances = Instances((image_height, image_width)) - instances.gt_classes = category_ids[final_filter] - instances.gt_boxes = Boxes(bb2ds_x0y0x1y1[final_filter]) - - # Create 3D bboxes - Ts_cam_object_filtered = Ts_cam_object[final_filter] - trans_cam_object_filtered = Ts_cam_object_filtered[:, :, 3] - k_matrix = sample["K_matrix"] - filtered_projection_2d = ( - k_matrix.repeat(len(trans_cam_object_filtered), 1, 1) - @ trans_cam_object_filtered.unsqueeze(-1) - ).squeeze(-1) - filtered_projection_2d = filtered_projection_2d[:, :2] / filtered_projection_2d[ - :, 2 - ].unsqueeze(-1) - instances.gt_boxes3D = torch.cat( - [ - filtered_projection_2d, # [N, 2] - bb3d_depths[final_filter].unsqueeze(-1).clone().detach(), # [N, 1] - # Omni3d has the inverted zyx dimensions - # https://github.com/facebookresearch/omni3d/blob/main/cubercnn/util/math_util.py#L144C1-L181C40 - bb3d_dimensions[final_filter].flip(-1).clone().detach(), # [N, 3] - trans_cam_object_filtered, # [N, 3] - ], - axis=-1, - ) - instances.gt_poses = Ts_cam_object_filtered[:, :, :3].clone().detach() - - # Update sample with filtered instance data - sample["instances"] = instances - sample["Ts_world_object"] = Ts_world_object[final_filter].clone().detach() - sample["object_dimensions"] = bb3d_dimensions[final_filter].clone().detach() - sample["category"] = category_ids[final_filter].clone().detach() - - @staticmethod - def cubercnn_gt_to_atek_gt( - cubercnn_dict: Dict, - T_world_camera_np: np.array, - camera_label: str = "camera-rgb", - cubercnn_id_to_atek_id: Optional[Dict[int, int]] = None, - ) -> Optional[Dict]: - """ - A helper data transform function to convert the model input (gt) dict, or output (prediction) dict from CubeRCNN format, - back to ATEK GT dict format (defined in `obb_sample_builder`, which is effectively obb3_gt_processor + obb2_gt_processor) - CubeRCNN model is ran only on one camera stream, so user should specific which camera stream to use. By default, it is "camera-rgb". - """ - cubercnn_instances = cubercnn_dict["instances"] - # Skip if no instances - if len(cubercnn_instances) == 0: - return None - - # Check if the cubercnn_dict is a prediction dict or gt dict. If it is a gt dict, - # "transfer" it to a prediction dict by filling in the pred fields. TODO: Consider another way to handle this! - pred_flag = hasattr(cubercnn_instances, "pred_classes") - if not pred_flag: - # fake pred fields using gt fields - num_instances = len(cubercnn_instances.gt_classes) - cubercnn_instances.pred_classes = cubercnn_instances.gt_classes - cubercnn_instances.pred_boxes = cubercnn_instances.gt_boxes - cubercnn_instances.pred_dimensions = cubercnn_instances.gt_boxes3D[:, 3:6] - cubercnn_instances.pred_center_cam = cubercnn_instances.gt_boxes3D[:, 6:9] - cubercnn_instances.pred_pose = cubercnn_instances.gt_poses - cubercnn_instances.scores = torch.ones(num_instances, dtype=torch.float32) - - # initialize ATEK GT dict - atek_dict = { - "obb3_gt": {}, - "obb2_gt": {}, - "scores": cubercnn_instances.scores.detach().cpu(), # tensor, shape: [num_instances], float32 - } - atek_dict["obb3_gt"][camera_label] = { - "instance_ids": None, - "category_names": None, - "category_ids": cubercnn_instances.pred_classes.detach().cpu(), - } - atek_dict["obb2_gt"][camera_label] = { - "instance_ids": None, - "category_names": None, - "category_ids": cubercnn_instances.pred_classes.detach().cpu(), - "visibility_ratios": None, - } - - # Fill in category ids - if cubercnn_id_to_atek_id is not None: - atek_id_list = [ - cubercnn_id_to_atek_id[id.item()] - for id in cubercnn_instances.pred_classes - ] - atek_dict["obb3_gt"][camera_label]["category_ids"] = torch.tensor( - atek_id_list, dtype=torch.int32 - ) - atek_dict["obb2_gt"][camera_label]["category_ids"] = torch.tensor( - atek_id_list, dtype=torch.int32 - ) - else: - atek_dict["obb3_gt"][camera_label][ - "category_ids" - ] = cubercnn_instances.pred_classes.detach().cpu() - atek_dict["obb2_gt"][camera_label][ - "category_ids" - ] = cubercnn_instances.pred_classes.detach().cpu() - - # Fill category names - atek_dict["obb3_gt"][camera_label]["category_names"] = [ - ATEK_CATEGORY_ID_TO_NAME[id.item()] - for id in atek_dict["obb3_gt"][camera_label]["category_ids"] - ] - atek_dict["obb2_gt"][camera_label]["category_names"] = [ - ATEK_CATEGORY_ID_TO_NAME[id.item()] - for id in atek_dict["obb2_gt"][camera_label]["category_ids"] - ] - - # CubeRCNN dimensions are in reversed order (zyx) compared to ATEK (xyz) - bbox3d_dim = ( - cubercnn_instances.pred_dimensions.detach().cpu() - ) # tensor, shape [num_instances, 3] - atek_dict["obb3_gt"][camera_label]["object_dimensions"] = torch.flip( - bbox3d_dim, dims=[1] - ) - - # Fill in pose - rotations = cubercnn_instances.pred_pose.detach().cpu() # [num_instances, 3, 3] - translations = ( - cubercnn_instances.pred_center_cam.detach().cpu().unsqueeze(2) - ) # [num_instances, 3, 1] - - Ts_cam_object = SE3.from_matrix3x4( - torch.cat((rotations, translations), dim=2).numpy() - ) - T_world_cam = SE3.from_matrix3x4(T_world_camera_np) - - Ts_world_object = T_world_cam @ Ts_cam_object - Ts_world_object = SE3.to_matrix3x4(Ts_world_object) # [num_instances, 3, 4] - if Ts_world_object.shape == (3, 4): - Ts_world_object = Ts_world_object.reshape(1, 3, 4) - atek_dict["obb3_gt"][camera_label]["ts_world_object"] = torch.tensor( - Ts_world_object, dtype=torch.float32 - ) - - # Fill in 2d bbox ranges - bbox2d = ( - cubercnn_instances.pred_boxes.tensor.detach().cpu() - ) # tensor, shape [num_instances, 4] - # x0,y0,x1,y1 -> x0,x1,y0,y1 - atek_dict["obb2_gt"][camera_label]["box_ranges"] = torch.stack( - (bbox2d[:, 0], bbox2d[:, 2], bbox2d[:, 1], bbox2d[:, 3]), dim=1 - ) - - return atek_dict - - -def cubercnn_collation_fn(batch): - # Simply collate as a list - return list(batch) - - -def load_atek_wds_dataset_as_cubercnn( - urls: List, batch_size: Optional[int], repeat_flag: bool, shuffle_flag: bool = False -) -> wds.WebDataset: - cubercnn_model_adaptor = CubeRCNNModelAdaptor() - - return load_atek_wds_dataset( - urls, - batch_size=batch_size, - dict_key_mapping=CubeRCNNModelAdaptor.get_dict_key_mapping_all(), - data_transform_fn=pipelinefilter(cubercnn_model_adaptor.atek_to_cubercnn)(), - collation_fn=cubercnn_collation_fn, - repeat_flag=repeat_flag, - shuffle_flag=shuffle_flag, - ) - - -def create_atek_dataloader_as_cubercnn( - urls: List[str], - batch_size: Optional[int] = None, - repeat_flag: bool = False, - shuffle_flag: bool = False, - num_workers: int = 0, -) -> torch.utils.data.DataLoader: - wds_dataset = load_atek_wds_dataset_as_cubercnn( - urls, - batch_size=batch_size, - repeat_flag=repeat_flag, - shuffle_flag=shuffle_flag, - ) - - return torch.utils.data.DataLoader( - wds_dataset, batch_size=None, num_workers=num_workers, pin_memory=True - )