diff --git a/README.md b/README.md index 681dfbb..71070c5 100644 --- a/README.md +++ b/README.md @@ -258,7 +258,7 @@ Example: generate multiple parallel simulation envs with `gym.make` and record s python embodied_gen/scripts/parallel_sim.py \ --layout_file "outputs/layouts_gen/task_0000/layout.json" \ --output_dir "outputs/parallel_sim/task_0000" \ ---num_envs 20 +--num_envs 16 ``` ### 🖼️ Real-to-Sim Digital Twin diff --git a/embodied_gen/envs/pick_embodiedgen.py b/embodied_gen/envs/pick_embodiedgen.py index 9b14070..ea1cac6 100644 --- a/embodied_gen/envs/pick_embodiedgen.py +++ b/embodied_gen/envs/pick_embodiedgen.py @@ -16,7 +16,6 @@ import json import os -from copy import deepcopy import numpy as np import sapien @@ -26,6 +25,7 @@ from mani_skill.envs.sapien_env import BaseEnv from mani_skill.sensors.camera import CameraConfig from mani_skill.utils import sapien_utils from mani_skill.utils.building import actors +from mani_skill.utils.building.ground import build_ground from mani_skill.utils.registration import register_env from mani_skill.utils.structs.actor import Actor from mani_skill.utils.structs.pose import Pose @@ -78,6 +78,14 @@ class PickEmbodiedGen(BaseEnv): # Add small offset in z-axis to avoid collision. self.objs_z_offset = kwargs.pop("objs_z_offset", 0.002) self.robot_z_offset = kwargs.pop("robot_z_offset", 0.002) + self.camera_cfg = kwargs.pop("camera_cfg", None) + if self.camera_cfg is None: + self.camera_cfg = dict( + camera_eye=[0.9, 0.0, 1.1], + camera_target_pt=[0.0, 0.0, 0.9], + image_hw=[256, 256], + fovy_deg=75, + ) self.layouts = self.init_env_layouts( layout_file, num_envs, replace_objs @@ -106,22 +114,30 @@ class PickEmbodiedGen(BaseEnv): def init_env_layouts( layout_file: str, num_envs: int, replace_objs: bool ) -> list[LayoutInfo]: - layout = LayoutInfo.from_dict(json.load(open(layout_file, "r"))) layouts = [] for env_idx in range(num_envs): if replace_objs and env_idx > 0: - layout = bfs_placement(deepcopy(layout)) - layouts.append(layout) + layout_info = bfs_placement(layout_file) + else: + layout_info = json.load(open(layout_file, "r")) + layout_info = LayoutInfo.from_dict(layout_info) + + layout_path = layout_file.replace(".json", f"_env{env_idx}.json") + with open(layout_path, "w") as f: + json.dump(layout_info.to_dict(), f, indent=4) + + layouts.append(layout_path) return layouts @staticmethod def compute_robot_init_pose( - layouts: list[LayoutInfo], num_envs: int, z_offset: float = 0.0 + layouts: list[str], num_envs: int, z_offset: float = 0.0 ) -> list[list[float]]: robot_pose = [] for env_idx in range(num_envs): - layout = layouts[env_idx] + layout = json.load(open(layouts[env_idx], "r")) + layout = LayoutInfo.from_dict(layout) robot_node = layout.relation[Scene3DItemEnum.ROBOT.value] x, y, z, qx, qy, qz, qw = layout.position[robot_node] robot_pose.append([x, y, z + z_offset, qw, qx, qy, qz]) @@ -154,19 +170,27 @@ class PickEmbodiedGen(BaseEnv): @property def _default_human_render_camera_configs(self): pose = sapien_utils.look_at( - eye=[0.9, 0.0, 1.1], target=[0.0, 0.0, 0.9] + eye=self.camera_cfg["camera_eye"], + target=self.camera_cfg["camera_target_pt"], ) return CameraConfig( - "render_camera", pose, 256, 256, np.deg2rad(75), 0.01, 100 + "render_camera", + pose, + self.camera_cfg["image_hw"][1], + self.camera_cfg["image_hw"][0], + np.deg2rad(self.camera_cfg["fovy_deg"]), + 0.01, + 100, ) def _load_agent(self, options: dict): + self.ground = build_ground(self.scene) super()._load_agent(options, sapien.Pose(p=[-10, 0, 10])) def _load_scene(self, options: dict): all_objects = [] - logger.info(f"Loading assets and decomposition mesh collisions...") + logger.info(f"Loading EmbodiedGen assets...") for env_idx in range(self.num_envs): env_actors = load_assets_from_layout_file( self.scene, @@ -229,7 +253,7 @@ class PickEmbodiedGen(BaseEnv): self.agent.controller.controllers["gripper"].reset() def render_gs3d_images( - self, layouts: list[LayoutInfo], num_envs: int, init_quat: list[float] + self, layouts: list[str], num_envs: int, init_quat: list[float] ) -> dict[str, np.ndarray]: sim_coord_align = ( torch.tensor(SIM_COORD_ALIGN).to(torch.float32).to(self.device) @@ -237,12 +261,18 @@ class PickEmbodiedGen(BaseEnv): cameras = self.scene.sensors.copy() cameras.update(self.scene.human_render_cameras) - bg_node = layouts[0].relation[Scene3DItemEnum.BACKGROUND.value] - gs_path = os.path.join(layouts[0].assets[bg_node], "gs_model.ply") + # Preload the background Gaussian Splatting model. + asset_root = os.path.dirname(layouts[0]) + layout = LayoutInfo.from_dict(json.load(open(layouts[0], "r"))) + bg_node = layout.relation[Scene3DItemEnum.BACKGROUND.value] + gs_path = os.path.join( + asset_root, layout.assets[bg_node], "gs_model.ply" + ) raw_gs: GaussianOperator = GaussianOperator.load_from_ply(gs_path) bg_images = dict() for env_idx in tqdm(range(num_envs), desc="Pre-rendering Background"): - layout = layouts[env_idx] + layout = json.load(open(layouts[env_idx], "r")) + layout = LayoutInfo.from_dict(layout) x, y, z, qx, qy, qz, qw = layout.position[bg_node] qx, qy, qz, qw = quaternion_multiply([qx, qy, qz, qw], init_quat) init_pose = torch.tensor([x, y, z, qx, qy, qz, qw]) diff --git a/embodied_gen/scripts/compose_layout.py b/embodied_gen/scripts/compose_layout.py index d9051a1..3451fbc 100644 --- a/embodied_gen/scripts/compose_layout.py +++ b/embodied_gen/scripts/compose_layout.py @@ -50,10 +50,7 @@ def entrypoint(**kwargs): out_scene_path = f"{output_dir}/Iscene.glb" out_layout_path = f"{output_dir}/layout.json" - with open(args.layout_path, "r") as f: - layout_info = LayoutInfo.from_dict(json.load(f)) - - layout_info = bfs_placement(layout_info, seed=args.seed) + layout_info = bfs_placement(args.layout_path, seed=args.seed) with open(out_layout_path, "w") as f: json.dump(layout_info.to_dict(), f, indent=4) diff --git a/embodied_gen/scripts/gen_layout.py b/embodied_gen/scripts/gen_layout.py index 5df6973..5e6ec9d 100644 --- a/embodied_gen/scripts/gen_layout.py +++ b/embodied_gen/scripts/gen_layout.py @@ -119,11 +119,15 @@ def entrypoint() -> None: match_scene_path = f"{os.path.dirname(args.bg_list)}/{match_key}" bg_save_dir = os.path.join(output_root, "background") copytree(match_scene_path, bg_save_dir, dirs_exist_ok=True) - layout_info.assets[bg_node] = bg_save_dir + layout_info.assets[bg_node] = "background" # BFS layout placement. + layout_path = f"{output_root}/layout.json" + with open(layout_path, "w") as f: + json.dump(layout_info.to_dict(), f, indent=4) + layout_info = bfs_placement( - layout_info, + layout_path, limit_reach_range=True if args.insert_robot else False, seed=args.seed_layout, ) diff --git a/embodied_gen/scripts/parallel_sim.py b/embodied_gen/scripts/parallel_sim.py index 91d3fed..5bf932e 100644 --- a/embodied_gen/scripts/parallel_sim.py +++ b/embodied_gen/scripts/parallel_sim.py @@ -20,7 +20,7 @@ from embodied_gen.utils.monkey_patches import monkey_patch_maniskill monkey_patch_maniskill() import json from collections import defaultdict -from dataclasses import dataclass +from dataclasses import dataclass, field from typing import Literal import gymnasium as gym @@ -69,6 +69,18 @@ class ParallelSimConfig: reach_target_only: bool = True """Whether to only reach target without full action""" + # Camera settings + camera_eye: list[float] = field(default_factory=lambda: [0.9, 0.0, 1.1]) + """Camera eye position [x, y, z] in global coordiante system""" + camera_target_pt: list[float] = field( + default_factory=lambda: [0.0, 0.0, 0.9] + ) + """Camera target(look-at) point [x, y, z] in global coordiante system""" + image_hw: list[int] = field(default_factory=lambda: [512, 512]) + """Rendered image height and width [height, width]""" + fovy_deg: float = 75 + """Camera vertical field of view in degrees""" + def entrypoint(**kwargs): if kwargs is None or len(kwargs) == 0: @@ -83,6 +95,12 @@ def entrypoint(**kwargs): enable_shadow=cfg.enable_shadow, layout_file=cfg.layout_file, control_mode=cfg.control_mode, + camera_cfg=dict( + camera_eye=cfg.camera_eye, + camera_target_pt=cfg.camera_target_pt, + image_hw=cfg.image_hw, + fovy_deg=cfg.fovy_deg, + ), ) env = RecordEpisode( env, diff --git a/embodied_gen/scripts/simulate_sapien.py b/embodied_gen/scripts/simulate_sapien.py index 01dcd7c..a4de98b 100644 --- a/embodied_gen/scripts/simulate_sapien.py +++ b/embodied_gen/scripts/simulate_sapien.py @@ -91,17 +91,16 @@ def entrypoint(**kwargs): fovy_deg=cfg.fovy_deg, ) with open(cfg.layout_path, "r") as f: - layout_data = json.load(f) - layout_data: LayoutInfo = LayoutInfo.from_dict(layout_data) + layout_data: LayoutInfo = LayoutInfo.from_dict(json.load(f)) actors = load_assets_from_layout_file( scene_manager.scene, - layout_data, + cfg.layout_path, cfg.z_offset, cfg.init_quat, ) agent = load_mani_skill_robot( - scene_manager.scene, layout_data, cfg.control_freq + scene_manager.scene, cfg.layout_path, cfg.control_freq ) frames = defaultdict(list) @@ -134,8 +133,9 @@ def entrypoint(**kwargs): if "Foreground" not in cfg.render_keys: return + asset_root = os.path.dirname(cfg.layout_path) bg_node = layout_data.relation[Scene3DItemEnum.BACKGROUND.value] - gs_path = f"{layout_data.assets[bg_node]}/gs_model.ply" + gs_path = f"{asset_root}/{layout_data.assets[bg_node]}/gs_model.ply" gs_model: GaussianOperator = GaussianOperator.load_from_ply(gs_path) x, y, z, qx, qy, qz, qw = layout_data.position[bg_node] qx, qy, qz, qw = quaternion_multiply([qx, qy, qz, qw], cfg.init_quat) diff --git a/embodied_gen/scripts/textto3d.py b/embodied_gen/scripts/textto3d.py index 64475e3..4e96063 100644 --- a/embodied_gen/scripts/textto3d.py +++ b/embodied_gen/scripts/textto3d.py @@ -187,7 +187,7 @@ def text_to_3d(**kwargs) -> dict: logger.warning( f"Node {node}, {TXTGEN_CHECKER.__class__.__name__}: {qa_result}" ) - results["assets"][node] = f"{node_save_dir}/result" + results["assets"][node] = f"asset3d/{save_node}/result" results["quality"][node] = qa_result if qa_flag is None or qa_flag is True: diff --git a/embodied_gen/utils/geometry.py b/embodied_gen/utils/geometry.py index 92ea57f..cc48658 100644 --- a/embodied_gen/utils/geometry.py +++ b/embodied_gen/utils/geometry.py @@ -14,6 +14,7 @@ # implied. See the License for the specific language governing # permissions and limitations under the License. +import json import os import random from collections import defaultdict, deque @@ -32,7 +33,6 @@ from embodied_gen.utils.enum import LayoutInfo, Scene3DItemEnum from embodied_gen.utils.log import logger __all__ = [ - "bfs_placement", "with_seed", "matrix_to_pose", "pose_to_matrix", @@ -222,7 +222,7 @@ def check_reachable( @with_seed("seed") def bfs_placement( - layout_info: LayoutInfo, + layout_file: str, floor_margin: float = 0, beside_margin: float = 0.1, max_attempts: int = 3000, @@ -232,6 +232,8 @@ def bfs_placement( robot_dim: float = 0.12, seed: int = None, ) -> LayoutInfo: + layout_info = LayoutInfo.from_dict(json.load(open(layout_file, "r"))) + asset_dir = os.path.dirname(layout_file) object_mapping = layout_info.objs_mapping position = {} # node: [x, y, z, qx, qy, qz, qw] parent_bbox_xy = {} @@ -254,6 +256,7 @@ def bfs_placement( mesh_path = ( f"{layout_info.assets[node]}/mesh/{node.replace(' ', '_')}.obj" ) + mesh_path = os.path.join(asset_dir, mesh_path) mesh_info[node]["path"] = mesh_path mesh = trimesh.load(mesh_path) vertices = mesh.vertices diff --git a/embodied_gen/utils/monkey_patches.py b/embodied_gen/utils/monkey_patches.py index 81076cd..b5d35cf 100644 --- a/embodied_gen/utils/monkey_patches.py +++ b/embodied_gen/utils/monkey_patches.py @@ -175,7 +175,7 @@ def monkey_patch_maniskill(): seg_labels = camera.get_obs( rgb=False, depth=False, segmentation=True, position=False )["segmentation"] - masks = np.where((seg_labels.cpu() > 0), 255, 0).astype( + masks = np.where((seg_labels.cpu() > 1), 255, 0).astype( np.uint8 ) masks = torch.tensor(masks).to(color.device) diff --git a/embodied_gen/utils/simulation.py b/embodied_gen/utils/simulation.py index c069aaf..814719d 100644 --- a/embodied_gen/utils/simulation.py +++ b/embodied_gen/utils/simulation.py @@ -124,7 +124,7 @@ def load_actor_from_urdf( def load_assets_from_layout_file( scene: ManiSkillScene | sapien.Scene, - layout: LayoutInfo | str, + layout: str, z_offset: float = 0.0, init_quat: list[float] = [0, 0, 0, 1], env_idx: int = None, @@ -133,19 +133,18 @@ def load_assets_from_layout_file( Args: scene (sapien.Scene | ManiSkillScene): The SAPIEN or ManiSkill scene to load assets into. - layout (LayoutInfo): The layout information data. + layout (str): The layout file path. z_offset (float): Offset to apply to the Z-coordinate of non-context objects. init_quat (List[float]): Initial quaternion (x, y, z, w) for orientation adjustment. env_idx (int): Environment index for multi-environment setup. """ - if isinstance(layout, str) and layout.endswith(".json"): - layout = LayoutInfo.from_dict(json.load(open(layout, "r"))) - + asset_root = os.path.dirname(layout) + layout = LayoutInfo.from_dict(json.load(open(layout, "r"))) actors = dict() for node in layout.assets: file_dir = layout.assets[node] file_name = f"{node.replace(' ', '_')}.urdf" - urdf_file = os.path.join(file_dir, file_name) + urdf_file = os.path.join(asset_root, file_dir, file_name) if layout.objs_mapping[node] == Scene3DItemEnum.BACKGROUND.value: continue