diff --git a/docs/source/user_guide/tutorials/custom_tasks.md b/docs/source/user_guide/tutorials/custom_tasks.md index 73c5a6d3..5a4cfdae 100644 --- a/docs/source/user_guide/tutorials/custom_tasks.md +++ b/docs/source/user_guide/tutorials/custom_tasks.md @@ -159,7 +159,7 @@ In general one use case of setting a positive `reconfiguration_freq` value is fo ## Episode Initialization / Randomization -Task initialization and randomization is handled in the `_initalize_actors` function and is called whenever `env.reset` is called. The objective here is to set the initial states of objects, including the robot. As the task ideally should be simulatable on the GPU, batched code is unavoidable. Note that furthermore, by default everything in ManiSkill tries to stay batched, even if there is only one element. Finally, like `_load_scene` the options argument is also passed down here if needed. +Task initialization and randomization is handled in the `_initialize_episode` function and is called whenever `env.reset` is called. The objective here is to set the initial states of objects, including the robot. As the task ideally should be simulatable on the GPU, batched code is unavoidable. Note that furthermore, by default everything in ManiSkill tries to stay batched, even if there is only one element. Finally, like `_load_scene` the options argument is also passed down here if needed. An example from part of the PushCube task @@ -168,7 +168,7 @@ from mani_skill.utils.structs.pose import Pose import torch class PushCubeEnv(BaseEnv): # ... - def _initialize_actors(self, env_idx: torch.Tensor, options: dict): + def _initialize_episode(self, env_idx: torch.Tensor, options: dict): # use the torch.device context manager to automatically create tensors on CPU or CUDA depending on self.device, the device the environment runs on with torch.device(self.device): b = len(env_idx) @@ -184,7 +184,7 @@ class PushCubeEnv(BaseEnv): self.obj.set_pose(obj_pose) ``` -An `env_idx` is one of the arguments to this function, and is a list of environment IDs that need initialization. This is given as ManiSkill supports **partial resets**, where at each timestep potentially only a subset of parallel environments will undergo a reset, which calls `_initialize_actors` here. +An `env_idx` is one of the arguments to this function, and is a list of environment IDs that need initialization. This is given as ManiSkill supports **partial resets**, where at each timestep potentially only a subset of parallel environments will undergo a reset, which calls `_initialize_episode` here. Since a scene builder is used, to initialize objects to their original states, we simply call `self.table_scene.initialize(env_idx)`, a function all scene builders implement. diff --git a/examples/baselines/ppo/README.md b/examples/baselines/ppo/README.md index 1b231342..be1d7b96 100644 --- a/examples/baselines/ppo/README.md +++ b/examples/baselines/ppo/README.md @@ -79,7 +79,7 @@ python ppo.py --env_id="OpenCabinetDrawer-v1" \ ## Visual Based RL -Below is a sample of various commands for training a image-based policy with PPO that are lightly tuned. The fastest again is also PushCube-v1 which can take about 1-5 minutes and PickCube-v1 which takes 30-60 minutes. You will need to tune the `--num_envs` argument according to how much GPU memory you have as rendering visual observations uses a lot of memory. The settings below should all take less than 15GB of GPU memory. Note that while if you have enough memory you can easily increase the number of environments, this does not necessarily mean wall-time or sample efficiency improve. +Below is a sample of various commands for training a image-based policy with PPO that are lightly tuned. The fastest again is also PushCube-v1 which can take about 1-5 minutes and PickCube-v1 which takes 15-45 minutes. You will need to tune the `--num_envs` argument according to how much GPU memory you have as rendering visual observations uses a lot of memory. The settings below should all take less than 15GB of GPU memory. Note that while if you have enough memory you can easily increase the number of environments, this does not necessarily mean wall-time or sample efficiency improve. The visual PPO baseline is not guaranteed to work for tasks not tested below as some tasks do not have dense rewards yet or well tuned ones, or simply are too hard with standard PPO (or our team has not had time to verify results yet) @@ -87,11 +87,11 @@ The visual PPO baseline is not guaranteed to work for tasks not tested below as ```bash python ppo_rgb.py --env_id="PushCube-v1" \ - --num_envs=512 --update_epochs=8 --num_minibatches=16 \ + --num_envs=256 --update_epochs=8 --num_minibatches=8 \ --total_timesteps=1_000_000 --eval_freq=10 --num-steps=20 -python ppo_rgb.py --env_id="OpenCabinetDrawer-v1" \ - --num_envs=256 --update_epochs=8 --num_minibatches=16 \ - --total_timesteps=100_000_000 --num-steps=100 --num-eval-steps=100 +python ppo_rgb.py --env_id="PickCube-v1" \ + --num_envs=256 --update_epochs=8 --num_minibatches=8 \ + --total_timesteps=10_000_000 ``` To evaluate a trained policy you can run diff --git a/examples/baselines/ppo/ppo.py b/examples/baselines/ppo/ppo.py index 396cb140..15488a6d 100644 --- a/examples/baselines/ppo/ppo.py +++ b/examples/baselines/ppo/ppo.py @@ -22,7 +22,7 @@ @dataclass class Args: - exp_name: str = os.path.basename(__file__)[: -len(".py")] + exp_name: Optional[str] = None """the name of this experiment""" seed: int = 1 """seed of the experiment""" @@ -158,7 +158,11 @@ def get_action_and_value(self, x, action=None): args.batch_size = int(args.num_envs * args.num_steps) args.minibatch_size = int(args.batch_size // args.num_minibatches) args.num_iterations = args.total_timesteps // args.batch_size - run_name = f"{args.env_id}__{args.exp_name}__{args.seed}__{int(time.time())}" + if args.exp_name is None: + args.exp_name = os.path.basename(__file__)[: -len(".py")] + run_name = f"{args.env_id}__{args.exp_name}__{args.seed}__{int(time.time())}" + else: + run_name = args.exp_name writer = None if not args.evaluate: print("Running training") @@ -205,7 +209,7 @@ def get_action_and_value(self, x, action=None): if args.save_train_video_freq is not None: save_video_trigger = lambda x : (x // args.num_steps) % args.save_train_video_freq == 0 envs = RecordEpisode(envs, output_dir=f"runs/{run_name}/train_videos", save_trajectory=False, save_video_trigger=save_video_trigger, max_steps_per_video=args.num_steps, video_fps=30) - eval_envs = RecordEpisode(eval_envs, output_dir=eval_output_dir, save_trajectory=args.evaluate, max_steps_per_video=args.num_eval_steps, video_fps=30) + eval_envs = RecordEpisode(eval_envs, output_dir=eval_output_dir, save_trajectory=args.evaluate, trajectory_name="trajectory", max_steps_per_video=args.num_eval_steps, video_fps=30) envs = ManiSkillVectorEnv(envs, args.num_envs, ignore_terminations=not args.partial_reset, **env_kwargs) eval_envs = ManiSkillVectorEnv(eval_envs, args.num_eval_envs, ignore_terminations=not args.partial_reset, **env_kwargs) assert isinstance(envs.single_action_space, gym.spaces.Box), "only continuous action space is supported" diff --git a/examples/baselines/ppo/ppo_rgb.py b/examples/baselines/ppo/ppo_rgb.py index c282679e..8e62a52b 100644 --- a/examples/baselines/ppo/ppo_rgb.py +++ b/examples/baselines/ppo/ppo_rgb.py @@ -22,7 +22,7 @@ @dataclass class Args: - exp_name: str = os.path.basename(__file__)[: -len(".py")] + exp_name: Optional[str] = None """the name of this experiment""" seed: int = 1 """seed of the experiment""" @@ -193,8 +193,8 @@ def __init__(self, sample_obs): self.out_features += feature_size # for state data we simply pass it through a single linear layer - extractors["state"] = nn.Linear(state_size, 64) - self.out_features += 64 + extractors["state"] = nn.Linear(state_size, 256) + self.out_features += 256 self.extractors = nn.ModuleDict(extractors) @@ -217,12 +217,12 @@ def __init__(self, envs, sample_obs): latent_size = self.feature_net.out_features self.critic = nn.Sequential( layer_init(nn.Linear(latent_size, 512)), - nn.Tanh(), + nn.ReLU(inplace=True), layer_init(nn.Linear(512, 1)), ) self.actor_mean = nn.Sequential( layer_init(nn.Linear(latent_size, 512)), - nn.Tanh(), + nn.ReLU(inplace=True), layer_init(nn.Linear(512, np.prod(envs.unwrapped.single_action_space.shape)), std=0.01*np.sqrt(2)), ) self.actor_logstd = nn.Parameter(torch.ones(1, np.prod(envs.unwrapped.single_action_space.shape)) * -0.5) @@ -256,8 +256,11 @@ def get_action_and_value(self, x, action=None): args.batch_size = int(args.num_envs * args.num_steps) args.minibatch_size = int(args.batch_size // args.num_minibatches) args.num_iterations = args.total_timesteps // args.batch_size - run_name = f"{args.env_id}__{args.exp_name}__{args.seed}__{int(time.time())}" - + if args.exp_name is None: + args.exp_name = os.path.basename(__file__)[: -len(".py")] + run_name = f"{args.env_id}__{args.exp_name}__{args.seed}__{int(time.time())}" + else: + run_name = args.exp_name writer = None if not args.evaluate: print("Running training") @@ -297,6 +300,7 @@ def get_action_and_value(self, x, action=None): # rgbd obs mode returns a dict of data, we flatten it so there is just a rgbd key and state key envs = FlattenRGBDObservationWrapper(envs, rgb_only=True) eval_envs = FlattenRGBDObservationWrapper(eval_envs, rgb_only=True) + if isinstance(envs.action_space, gym.spaces.Dict): envs = FlattenActionSpaceWrapper(envs) eval_envs = FlattenActionSpaceWrapper(eval_envs) @@ -308,7 +312,7 @@ def get_action_and_value(self, x, action=None): if args.save_train_video_freq is not None: save_video_trigger = lambda x : (x // args.num_steps) % args.save_train_video_freq == 0 envs = RecordEpisode(envs, output_dir=f"runs/{run_name}/train_videos", save_trajectory=False, save_video_trigger=save_video_trigger, max_steps_per_video=args.num_steps, video_fps=30) - eval_envs = RecordEpisode(eval_envs, output_dir=eval_output_dir, save_trajectory=args.evaluate, max_steps_per_video=args.num_eval_steps, video_fps=30) + eval_envs = RecordEpisode(eval_envs, output_dir=eval_output_dir, save_trajectory=args.evaluate, trajectory_name="trajectory", max_steps_per_video=args.num_eval_steps, video_fps=30) envs = ManiSkillVectorEnv(envs, args.num_envs, ignore_terminations=False, **env_kwargs) eval_envs = ManiSkillVectorEnv(eval_envs, args.num_eval_envs, ignore_terminations=False, **env_kwargs) assert isinstance(envs.single_action_space, gym.spaces.Box), "only continuous action space is supported" diff --git a/mani_skill/envs/sapien_env.py b/mani_skill/envs/sapien_env.py index 421388f6..5bd46077 100644 --- a/mani_skill/envs/sapien_env.py +++ b/mani_skill/envs/sapien_env.py @@ -275,8 +275,8 @@ def __init__( self.single_observation_space self.observation_space - def _update_obs_space(self, obs: Any): - """call this function if you modify the observations returned by env.step and env.reset via an observation wrapper. The given observation must be a numpy array""" + def update_obs_space(self, obs: Any): + """call this function if you modify the observations returned by env.step and env.reset via an observation wrapper.""" self._init_raw_obs = common.to_numpy(obs) del self.single_observation_space del self.observation_space @@ -346,6 +346,10 @@ def _default_human_render_camera_configs( """Add default cameras for rendering when using render_mode='rgb_array'. These can be overriden by the user at env creation time """ return [] + @property + def scene(self): + return self._scene + @property def sim_freq(self): return self._sim_freq @@ -902,7 +906,10 @@ def _after_simulation_step(self): # Simulation and other gym interfaces # -------------------------------------------------------------------------- # def _set_scene_config(self): - physx.set_scene_config(**self.sim_cfg.scene_cfg.dict()) + # **self.sim_cfg.scene_cfg.dict() + physx.set_shape_config(contact_offset=self.sim_cfg.scene_cfg.contact_offset, rest_offset=self.sim_cfg.scene_cfg.rest_offset) + physx.set_body_config(solver_position_iterations=self.sim_cfg.scene_cfg.solver_position_iterations, solver_velocity_iterations=self.sim_cfg.scene_cfg.solver_velocity_iterations, sleep_threshold=self.sim_cfg.scene_cfg.sleep_threshold) + physx.set_scene_config(gravity=self.sim_cfg.scene_cfg.gravity, bounce_threshold=self.sim_cfg.scene_cfg.bounce_threshold, enable_pcm=self.sim_cfg.scene_cfg.enable_pcm, enable_tgs=self.sim_cfg.scene_cfg.enable_tgs, enable_ccd=self.sim_cfg.scene_cfg.enable_ccd, enable_enhanced_determinism=self.sim_cfg.scene_cfg.enable_enhanced_determinism, enable_friction_every_iteration=self.sim_cfg.scene_cfg.enable_friction_every_iteration, cpu_workers=self.sim_cfg.scene_cfg.cpu_workers ) physx.set_default_material(**self.sim_cfg.default_materials_cfg.dict()) def _setup_scene(self): @@ -1169,3 +1176,34 @@ def render(self): # scene_mesh = merge_meshes(meshes) # scene_pcd = scene_mesh.sample(num_points) # return scene_pcd + + + # Printing metrics/info + def print_sim_details(self): + sensor_settings_str = [] + for uid, cam in self._sensors.items(): + if isinstance(cam, Camera): + cfg = cam.cfg + sensor_settings_str.append(f"RGBD ({cfg.width}x{cfg.height})") + sensor_settings_str = "_".join(sensor_settings_str) + sim_backend = "gpu" if physx.is_gpu_enabled() else "cpu" + print( + "# -------------------------------------------------------------------------- #" + ) + print( + f"Task ID: {self.spec.id}, {self.num_envs} parallel environments, sim_backend={sim_backend}" + ) + print( + f"obs_mode={self.obs_mode}, control_mode={self.control_mode}" + ) + print( + f"render_mode={self.render_mode}, sensor_details={sensor_settings_str}" + ) + print( + f"sim_freq={self.sim_freq}, control_freq={self.control_freq}" + ) + print(f"observation space: {self.observation_space}") + print(f"(single) action space: {self.single_action_space}") + print( + "# -------------------------------------------------------------------------- #" + ) diff --git a/mani_skill/envs/scene.py b/mani_skill/envs/scene.py index d06f369e..22748c0d 100644 --- a/mani_skill/envs/scene.py +++ b/mani_skill/envs/scene.py @@ -521,9 +521,9 @@ def _setup_gpu(self): # As physx_system.gpu_init() was called a single physx step was also taken. So we need to reset # all the actors and articulations to their original poses as they likely have collided for actor in self.non_static_actors: - actor.set_pose(actor.inital_pose) + actor.set_pose(actor.initial_pose) for articulation in self.articulations.values(): - articulation.set_pose(articulation.inital_pose) + articulation.set_pose(articulation.initial_pose) self.px.gpu_apply_rigid_dynamic_data() self.px.gpu_apply_articulation_root_pose() diff --git a/mani_skill/envs/tasks/control/cartpole.py b/mani_skill/envs/tasks/control/cartpole.py index 3e7de906..a5caef2f 100644 --- a/mani_skill/envs/tasks/control/cartpole.py +++ b/mani_skill/envs/tasks/control/cartpole.py @@ -73,7 +73,9 @@ def __init__(self, *args, robot_uids=CartPoleRobot, **kwargs): @property def _default_sim_config(self): return SimConfig( - sim_freq=100, control_freq=100, scene_cfg=SceneConfig(solver_iterations=2) + sim_freq=100, + control_freq=100, + scene_cfg=SceneConfig(solver_position_iterations=2), ) @property diff --git a/mani_skill/envs/tasks/quadruped_run.py b/mani_skill/envs/tasks/quadruped_run.py index 0ab4bd13..9db0cce3 100644 --- a/mani_skill/envs/tasks/quadruped_run.py +++ b/mani_skill/envs/tasks/quadruped_run.py @@ -52,7 +52,7 @@ def _default_sim_config(self): max_rigid_contact_count=2**20, ), scene_cfg=SceneConfig( - solver_iterations=4, + solver_position_iterations=4, bounce_threshold=0.2, ), ) diff --git a/mani_skill/envs/tasks/tabletop/pick_clutter_ycb.py b/mani_skill/envs/tasks/tabletop/pick_clutter_ycb.py index 82ba6c5b..70ef752b 100644 --- a/mani_skill/envs/tasks/tabletop/pick_clutter_ycb.py +++ b/mani_skill/envs/tasks/tabletop/pick_clutter_ycb.py @@ -164,11 +164,11 @@ def _initialize_episode(self, env_idx: torch.Tensor, options: dict): # reset objects to original poses if b == self.num_envs: # if all envs reset - self.all_objects.pose = self.all_objects.inital_pose + self.all_objects.pose = self.all_objects.initial_pose else: # if only some envs reset, we unfortunately still have to do some mask wrangling mask = torch.isin(self.all_objects._scene_idxs, env_idx) - self.all_objects.pose = self.all_objects.inital_pose[mask] + self.all_objects.pose = self.all_objects.initial_pose[mask] def evaluate(self): return { diff --git a/mani_skill/envs/tasks/tabletop/pick_cube.py b/mani_skill/envs/tasks/tabletop/pick_cube.py index 347ac31e..b8fe03ae 100644 --- a/mani_skill/envs/tasks/tabletop/pick_cube.py +++ b/mani_skill/envs/tasks/tabletop/pick_cube.py @@ -70,8 +70,11 @@ def _initialize_episode(self, env_idx: torch.Tensor, options: dict): self.goal_site.set_pose(Pose.create_from_pq(goal_xyz)) def _get_obs_extra(self, info: Dict): + # in reality some people hack is_grasped into observations by checking if the gripper can close fully or not obs = dict( - tcp_pose=self.agent.tcp.pose.raw_pose, goal_pos=self.goal_site.pose.p + is_grasped=info["is_grasped"], + tcp_pose=self.agent.tcp.pose.raw_pose, + goal_pos=self.goal_site.pose.p, ) if "state" in self.obs_mode: obs.update( @@ -86,11 +89,13 @@ def evaluate(self): torch.linalg.norm(self.goal_site.pose.p - self.cube.pose.p, axis=1) <= self.goal_thresh ) + is_grasped = self.agent.is_grasping(self.cube) is_robot_static = self.agent.is_static(0.2) return { "success": is_obj_placed & is_robot_static, "is_obj_placed": is_obj_placed, "is_robot_static": is_robot_static, + "is_grasped": is_grasped, } def compute_dense_reward(self, obs: Any, action: torch.Tensor, info: Dict): @@ -100,7 +105,7 @@ def compute_dense_reward(self, obs: Any, action: torch.Tensor, info: Dict): reaching_reward = 1 - torch.tanh(5 * tcp_to_obj_dist) reward = reaching_reward - is_grasped = self.agent.is_grasping(self.cube) + is_grasped = info["is_grasped"] reward += is_grasped obj_to_goal_dist = torch.linalg.norm( diff --git a/mani_skill/examples/benchmarking/gpu_sim.py b/mani_skill/examples/benchmarking/gpu_sim.py index 54837e64..ad27661e 100644 --- a/mani_skill/examples/benchmarking/gpu_sim.py +++ b/mani_skill/examples/benchmarking/gpu_sim.py @@ -38,31 +38,8 @@ def main(args): else: env = gym.make_vec(args.env_id, num_envs=args.num_envs, vectorization_mode="async", vector_kwargs=dict(context="spawn"), obs_mode=args.obs_mode,) base_env = gym.make(args.env_id, obs_mode=args.obs_mode).unwrapped - sensor_settings_str = [] - for uid, cam in base_env._sensors.items(): - cfg = cam.cfg - sensor_settings_str.append(f"{cfg.width}x{cfg.height}") - sensor_settings_str = "_".join(sensor_settings_str) - print( - "# -------------------------------------------------------------------------- #" - ) - print( - f"Benchmarking ManiSkill GPU Simulation with {num_envs} parallel environments" - ) - print( - f"env_id={args.env_id}, obs_mode={args.obs_mode}, control_mode={args.control_mode}" - ) - print( - f"render_mode={args.render_mode}, sensor_details={sensor_settings_str}, save_video={args.save_video}" - ) - print( - f"sim_freq={base_env.sim_freq}, control_freq={base_env.control_freq}" - ) - print(f"observation space: {env.observation_space}") - print(f"action space: {base_env.single_action_space}") - print( - "# -------------------------------------------------------------------------- #" - ) + + base_env.print_sim_details() images = [] video_nrows = int(np.sqrt(num_envs)) with torch.inference_mode(): diff --git a/mani_skill/utils/building/actor_builder.py b/mani_skill/utils/building/actor_builder.py index 312f1ffb..173b1964 100644 --- a/mani_skill/utils/building/actor_builder.py +++ b/mani_skill/utils/building/actor_builder.py @@ -212,8 +212,10 @@ def build(self, name): and initial_pose_b == 1 and physx.is_gpu_enabled() ): - actor.inital_pose = Pose.create(initial_pose.raw_pose.repeat(num_actors, 1)) + actor.initial_pose = Pose.create( + initial_pose.raw_pose.repeat(num_actors, 1) + ) else: - actor.inital_pose = initial_pose + actor.initial_pose = initial_pose self.scene.actors[self.name] = actor return actor diff --git a/mani_skill/utils/building/articulation_builder.py b/mani_skill/utils/building/articulation_builder.py index c5c1462d..fd5466fc 100644 --- a/mani_skill/utils/building/articulation_builder.py +++ b/mani_skill/utils/building/articulation_builder.py @@ -188,6 +188,6 @@ def build( articulation: Articulation = Articulation.create_from_physx_articulations( articulations, self.scene, self.scene_idxs ) - articulation.inital_pose = initial_pose + articulation.initial_pose = initial_pose self.scene.articulations[self.name] = articulation return articulation diff --git a/mani_skill/utils/structs/actor.py b/mani_skill/utils/structs/actor.py index 36c68013..e7a18124 100644 --- a/mani_skill/utils/structs/actor.py +++ b/mani_skill/utils/structs/actor.py @@ -32,7 +32,7 @@ class Actor(PhysxRigidDynamicComponentStruct[sapien.Entity]): px_body_type: Literal["kinematic", "static", "dynamic"] = None hidden: bool = False - inital_pose: Pose = None + initial_pose: Pose = None """ The initial pose of this Actor, as defined when creating the actor via the ActorBuilder. It is necessary to track this pose to ensure the actor is still at the correct pose once gpu system is initialized. It may also be useful @@ -111,11 +111,11 @@ def merge(cls, actors: List["Actor"], name: str = None): for actor in actors: objs += actor._objs merged_scene_idxs.append(actor._scene_idxs) - _builder_initial_poses.append(actor.inital_pose.raw_pose) + _builder_initial_poses.append(actor.initial_pose.raw_pose) merged_scene_idxs = torch.concat(merged_scene_idxs) merged_actor = Actor.create_from_entities(objs, scene, merged_scene_idxs) merged_actor.name = name - merged_actor.inital_pose = Pose.create(torch.vstack(_builder_initial_poses)) + merged_actor.initial_pose = Pose.create(torch.vstack(_builder_initial_poses)) merged_actor.merged = True scene.actor_views[merged_actor.name] = merged_actor return merged_actor @@ -234,7 +234,7 @@ def pose(self) -> Pose: if self.px_body_type == "static": # NOTE (stao): usually _builder_initial_pose is just one pose, but for static objects in GPU sim we repeat it if necessary so it can be used # as part of observations if needed - return self.inital_pose + return self.initial_pose else: if self.hidden: return Pose.create(self.before_hide_pose) diff --git a/mani_skill/utils/structs/articulation.py b/mani_skill/utils/structs/articulation.py index 489dc4d7..4a1b0859 100644 --- a/mani_skill/utils/structs/articulation.py +++ b/mani_skill/utils/structs/articulation.py @@ -42,7 +42,7 @@ class Articulation(BaseStruct[physx.PhysxArticulation]): name: str = None """Name of this articulation""" - inital_pose: Pose = None + initial_pose: Pose = None """The initial pose of this articulation""" merged: bool = False diff --git a/mani_skill/utils/structs/types.py b/mani_skill/utils/structs/types.py index 94553a91..303ece70 100644 --- a/mani_skill/utils/structs/types.py +++ b/mani_skill/utils/structs/types.py @@ -46,7 +46,8 @@ class SceneConfig: bounce_threshold: float = 2.0 sleep_threshold: float = 0.005 contact_offset: float = 0.02 - solver_iterations: int = 15 + rest_offset: float = 0 + solver_position_iterations: int = 15 solver_velocity_iterations: int = 1 enable_pcm: bool = True enable_tgs: bool = True diff --git a/mani_skill/utils/wrappers/flatten.py b/mani_skill/utils/wrappers/flatten.py index 5fe90295..d0d07dec 100644 --- a/mani_skill/utils/wrappers/flatten.py +++ b/mani_skill/utils/wrappers/flatten.py @@ -21,7 +21,7 @@ def __init__(self, env, rgb_only=False) -> None: super().__init__(env) self.rgb_only = rgb_only new_obs = self.observation(common.to_cpu_tensor(self.base_env._init_raw_obs)) - self.base_env._update_obs_space(new_obs) + self.base_env.update_obs_space(new_obs) def observation(self, observation: Dict): sensor_data = observation.pop("sensor_data") @@ -47,7 +47,7 @@ class FlattenObservationWrapper(gym.ObservationWrapper): def __init__(self, env) -> None: super().__init__(env) - self.base_env._update_obs_space( + self.base_env.update_obs_space( common.flatten_state_dict(self.base_env._init_raw_obs) ) diff --git a/mani_skill/utils/wrappers/visual_encoders.py b/mani_skill/utils/wrappers/visual_encoders.py index 6e482dec..8a1c1ad3 100644 --- a/mani_skill/utils/wrappers/visual_encoders.py +++ b/mani_skill/utils/wrappers/visual_encoders.py @@ -30,7 +30,7 @@ def __init__(self, env, encoder: Literal["r3m"], encoder_cfg=dict()): ) # HWC -> CHW self.single_image_embedding_size = 512 # for resnet18 - self.base_env._update_obs_space( + self.base_env.update_obs_space( common.to_numpy( self.observation(common.to_tensor(self.base_env._init_raw_obs)) )