chore(layout): Expose the camera setting for interface parallel_sim. (#39)

* chore(layout): Expose the camera setting for interface parallel_sim and update layout file.
This commit is contained in:
Xinjie 2025-09-10 20:58:35 +08:00 committed by GitHub
parent cf3b919b65
commit 1272b80926
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
10 changed files with 87 additions and 36 deletions

View File

@ -258,7 +258,7 @@ Example: generate multiple parallel simulation envs with `gym.make` and record s
python embodied_gen/scripts/parallel_sim.py \ python embodied_gen/scripts/parallel_sim.py \
--layout_file "outputs/layouts_gen/task_0000/layout.json" \ --layout_file "outputs/layouts_gen/task_0000/layout.json" \
--output_dir "outputs/parallel_sim/task_0000" \ --output_dir "outputs/parallel_sim/task_0000" \
--num_envs 20 --num_envs 16
``` ```
### 🖼️ Real-to-Sim Digital Twin ### 🖼️ Real-to-Sim Digital Twin

View File

@ -16,7 +16,6 @@
import json import json
import os import os
from copy import deepcopy
import numpy as np import numpy as np
import sapien import sapien
@ -26,6 +25,7 @@ from mani_skill.envs.sapien_env import BaseEnv
from mani_skill.sensors.camera import CameraConfig from mani_skill.sensors.camera import CameraConfig
from mani_skill.utils import sapien_utils from mani_skill.utils import sapien_utils
from mani_skill.utils.building import actors from mani_skill.utils.building import actors
from mani_skill.utils.building.ground import build_ground
from mani_skill.utils.registration import register_env from mani_skill.utils.registration import register_env
from mani_skill.utils.structs.actor import Actor from mani_skill.utils.structs.actor import Actor
from mani_skill.utils.structs.pose import Pose from mani_skill.utils.structs.pose import Pose
@ -78,6 +78,14 @@ class PickEmbodiedGen(BaseEnv):
# Add small offset in z-axis to avoid collision. # Add small offset in z-axis to avoid collision.
self.objs_z_offset = kwargs.pop("objs_z_offset", 0.002) self.objs_z_offset = kwargs.pop("objs_z_offset", 0.002)
self.robot_z_offset = kwargs.pop("robot_z_offset", 0.002) self.robot_z_offset = kwargs.pop("robot_z_offset", 0.002)
self.camera_cfg = kwargs.pop("camera_cfg", None)
if self.camera_cfg is None:
self.camera_cfg = dict(
camera_eye=[0.9, 0.0, 1.1],
camera_target_pt=[0.0, 0.0, 0.9],
image_hw=[256, 256],
fovy_deg=75,
)
self.layouts = self.init_env_layouts( self.layouts = self.init_env_layouts(
layout_file, num_envs, replace_objs layout_file, num_envs, replace_objs
@ -106,22 +114,30 @@ class PickEmbodiedGen(BaseEnv):
def init_env_layouts( def init_env_layouts(
layout_file: str, num_envs: int, replace_objs: bool layout_file: str, num_envs: int, replace_objs: bool
) -> list[LayoutInfo]: ) -> list[LayoutInfo]:
layout = LayoutInfo.from_dict(json.load(open(layout_file, "r")))
layouts = [] layouts = []
for env_idx in range(num_envs): for env_idx in range(num_envs):
if replace_objs and env_idx > 0: if replace_objs and env_idx > 0:
layout = bfs_placement(deepcopy(layout)) layout_info = bfs_placement(layout_file)
layouts.append(layout) else:
layout_info = json.load(open(layout_file, "r"))
layout_info = LayoutInfo.from_dict(layout_info)
layout_path = layout_file.replace(".json", f"_env{env_idx}.json")
with open(layout_path, "w") as f:
json.dump(layout_info.to_dict(), f, indent=4)
layouts.append(layout_path)
return layouts return layouts
@staticmethod @staticmethod
def compute_robot_init_pose( def compute_robot_init_pose(
layouts: list[LayoutInfo], num_envs: int, z_offset: float = 0.0 layouts: list[str], num_envs: int, z_offset: float = 0.0
) -> list[list[float]]: ) -> list[list[float]]:
robot_pose = [] robot_pose = []
for env_idx in range(num_envs): for env_idx in range(num_envs):
layout = layouts[env_idx] layout = json.load(open(layouts[env_idx], "r"))
layout = LayoutInfo.from_dict(layout)
robot_node = layout.relation[Scene3DItemEnum.ROBOT.value] robot_node = layout.relation[Scene3DItemEnum.ROBOT.value]
x, y, z, qx, qy, qz, qw = layout.position[robot_node] x, y, z, qx, qy, qz, qw = layout.position[robot_node]
robot_pose.append([x, y, z + z_offset, qw, qx, qy, qz]) robot_pose.append([x, y, z + z_offset, qw, qx, qy, qz])
@ -154,19 +170,27 @@ class PickEmbodiedGen(BaseEnv):
@property @property
def _default_human_render_camera_configs(self): def _default_human_render_camera_configs(self):
pose = sapien_utils.look_at( pose = sapien_utils.look_at(
eye=[0.9, 0.0, 1.1], target=[0.0, 0.0, 0.9] eye=self.camera_cfg["camera_eye"],
target=self.camera_cfg["camera_target_pt"],
) )
return CameraConfig( return CameraConfig(
"render_camera", pose, 256, 256, np.deg2rad(75), 0.01, 100 "render_camera",
pose,
self.camera_cfg["image_hw"][1],
self.camera_cfg["image_hw"][0],
np.deg2rad(self.camera_cfg["fovy_deg"]),
0.01,
100,
) )
def _load_agent(self, options: dict): def _load_agent(self, options: dict):
self.ground = build_ground(self.scene)
super()._load_agent(options, sapien.Pose(p=[-10, 0, 10])) super()._load_agent(options, sapien.Pose(p=[-10, 0, 10]))
def _load_scene(self, options: dict): def _load_scene(self, options: dict):
all_objects = [] all_objects = []
logger.info(f"Loading assets and decomposition mesh collisions...") logger.info(f"Loading EmbodiedGen assets...")
for env_idx in range(self.num_envs): for env_idx in range(self.num_envs):
env_actors = load_assets_from_layout_file( env_actors = load_assets_from_layout_file(
self.scene, self.scene,
@ -229,7 +253,7 @@ class PickEmbodiedGen(BaseEnv):
self.agent.controller.controllers["gripper"].reset() self.agent.controller.controllers["gripper"].reset()
def render_gs3d_images( def render_gs3d_images(
self, layouts: list[LayoutInfo], num_envs: int, init_quat: list[float] self, layouts: list[str], num_envs: int, init_quat: list[float]
) -> dict[str, np.ndarray]: ) -> dict[str, np.ndarray]:
sim_coord_align = ( sim_coord_align = (
torch.tensor(SIM_COORD_ALIGN).to(torch.float32).to(self.device) torch.tensor(SIM_COORD_ALIGN).to(torch.float32).to(self.device)
@ -237,12 +261,18 @@ class PickEmbodiedGen(BaseEnv):
cameras = self.scene.sensors.copy() cameras = self.scene.sensors.copy()
cameras.update(self.scene.human_render_cameras) cameras.update(self.scene.human_render_cameras)
bg_node = layouts[0].relation[Scene3DItemEnum.BACKGROUND.value] # Preload the background Gaussian Splatting model.
gs_path = os.path.join(layouts[0].assets[bg_node], "gs_model.ply") asset_root = os.path.dirname(layouts[0])
layout = LayoutInfo.from_dict(json.load(open(layouts[0], "r")))
bg_node = layout.relation[Scene3DItemEnum.BACKGROUND.value]
gs_path = os.path.join(
asset_root, layout.assets[bg_node], "gs_model.ply"
)
raw_gs: GaussianOperator = GaussianOperator.load_from_ply(gs_path) raw_gs: GaussianOperator = GaussianOperator.load_from_ply(gs_path)
bg_images = dict() bg_images = dict()
for env_idx in tqdm(range(num_envs), desc="Pre-rendering Background"): for env_idx in tqdm(range(num_envs), desc="Pre-rendering Background"):
layout = layouts[env_idx] layout = json.load(open(layouts[env_idx], "r"))
layout = LayoutInfo.from_dict(layout)
x, y, z, qx, qy, qz, qw = layout.position[bg_node] x, y, z, qx, qy, qz, qw = layout.position[bg_node]
qx, qy, qz, qw = quaternion_multiply([qx, qy, qz, qw], init_quat) qx, qy, qz, qw = quaternion_multiply([qx, qy, qz, qw], init_quat)
init_pose = torch.tensor([x, y, z, qx, qy, qz, qw]) init_pose = torch.tensor([x, y, z, qx, qy, qz, qw])

View File

@ -50,10 +50,7 @@ def entrypoint(**kwargs):
out_scene_path = f"{output_dir}/Iscene.glb" out_scene_path = f"{output_dir}/Iscene.glb"
out_layout_path = f"{output_dir}/layout.json" out_layout_path = f"{output_dir}/layout.json"
with open(args.layout_path, "r") as f: layout_info = bfs_placement(args.layout_path, seed=args.seed)
layout_info = LayoutInfo.from_dict(json.load(f))
layout_info = bfs_placement(layout_info, seed=args.seed)
with open(out_layout_path, "w") as f: with open(out_layout_path, "w") as f:
json.dump(layout_info.to_dict(), f, indent=4) json.dump(layout_info.to_dict(), f, indent=4)

View File

@ -119,11 +119,15 @@ def entrypoint() -> None:
match_scene_path = f"{os.path.dirname(args.bg_list)}/{match_key}" match_scene_path = f"{os.path.dirname(args.bg_list)}/{match_key}"
bg_save_dir = os.path.join(output_root, "background") bg_save_dir = os.path.join(output_root, "background")
copytree(match_scene_path, bg_save_dir, dirs_exist_ok=True) copytree(match_scene_path, bg_save_dir, dirs_exist_ok=True)
layout_info.assets[bg_node] = bg_save_dir layout_info.assets[bg_node] = "background"
# BFS layout placement. # BFS layout placement.
layout_path = f"{output_root}/layout.json"
with open(layout_path, "w") as f:
json.dump(layout_info.to_dict(), f, indent=4)
layout_info = bfs_placement( layout_info = bfs_placement(
layout_info, layout_path,
limit_reach_range=True if args.insert_robot else False, limit_reach_range=True if args.insert_robot else False,
seed=args.seed_layout, seed=args.seed_layout,
) )

View File

@ -20,7 +20,7 @@ from embodied_gen.utils.monkey_patches import monkey_patch_maniskill
monkey_patch_maniskill() monkey_patch_maniskill()
import json import json
from collections import defaultdict from collections import defaultdict
from dataclasses import dataclass from dataclasses import dataclass, field
from typing import Literal from typing import Literal
import gymnasium as gym import gymnasium as gym
@ -69,6 +69,18 @@ class ParallelSimConfig:
reach_target_only: bool = True reach_target_only: bool = True
"""Whether to only reach target without full action""" """Whether to only reach target without full action"""
# Camera settings
camera_eye: list[float] = field(default_factory=lambda: [0.9, 0.0, 1.1])
"""Camera eye position [x, y, z] in global coordiante system"""
camera_target_pt: list[float] = field(
default_factory=lambda: [0.0, 0.0, 0.9]
)
"""Camera target(look-at) point [x, y, z] in global coordiante system"""
image_hw: list[int] = field(default_factory=lambda: [512, 512])
"""Rendered image height and width [height, width]"""
fovy_deg: float = 75
"""Camera vertical field of view in degrees"""
def entrypoint(**kwargs): def entrypoint(**kwargs):
if kwargs is None or len(kwargs) == 0: if kwargs is None or len(kwargs) == 0:
@ -83,6 +95,12 @@ def entrypoint(**kwargs):
enable_shadow=cfg.enable_shadow, enable_shadow=cfg.enable_shadow,
layout_file=cfg.layout_file, layout_file=cfg.layout_file,
control_mode=cfg.control_mode, control_mode=cfg.control_mode,
camera_cfg=dict(
camera_eye=cfg.camera_eye,
camera_target_pt=cfg.camera_target_pt,
image_hw=cfg.image_hw,
fovy_deg=cfg.fovy_deg,
),
) )
env = RecordEpisode( env = RecordEpisode(
env, env,

View File

@ -91,17 +91,16 @@ def entrypoint(**kwargs):
fovy_deg=cfg.fovy_deg, fovy_deg=cfg.fovy_deg,
) )
with open(cfg.layout_path, "r") as f: with open(cfg.layout_path, "r") as f:
layout_data = json.load(f) layout_data: LayoutInfo = LayoutInfo.from_dict(json.load(f))
layout_data: LayoutInfo = LayoutInfo.from_dict(layout_data)
actors = load_assets_from_layout_file( actors = load_assets_from_layout_file(
scene_manager.scene, scene_manager.scene,
layout_data, cfg.layout_path,
cfg.z_offset, cfg.z_offset,
cfg.init_quat, cfg.init_quat,
) )
agent = load_mani_skill_robot( agent = load_mani_skill_robot(
scene_manager.scene, layout_data, cfg.control_freq scene_manager.scene, cfg.layout_path, cfg.control_freq
) )
frames = defaultdict(list) frames = defaultdict(list)
@ -134,8 +133,9 @@ def entrypoint(**kwargs):
if "Foreground" not in cfg.render_keys: if "Foreground" not in cfg.render_keys:
return return
asset_root = os.path.dirname(cfg.layout_path)
bg_node = layout_data.relation[Scene3DItemEnum.BACKGROUND.value] bg_node = layout_data.relation[Scene3DItemEnum.BACKGROUND.value]
gs_path = f"{layout_data.assets[bg_node]}/gs_model.ply" gs_path = f"{asset_root}/{layout_data.assets[bg_node]}/gs_model.ply"
gs_model: GaussianOperator = GaussianOperator.load_from_ply(gs_path) gs_model: GaussianOperator = GaussianOperator.load_from_ply(gs_path)
x, y, z, qx, qy, qz, qw = layout_data.position[bg_node] x, y, z, qx, qy, qz, qw = layout_data.position[bg_node]
qx, qy, qz, qw = quaternion_multiply([qx, qy, qz, qw], cfg.init_quat) qx, qy, qz, qw = quaternion_multiply([qx, qy, qz, qw], cfg.init_quat)

View File

@ -187,7 +187,7 @@ def text_to_3d(**kwargs) -> dict:
logger.warning( logger.warning(
f"Node {node}, {TXTGEN_CHECKER.__class__.__name__}: {qa_result}" f"Node {node}, {TXTGEN_CHECKER.__class__.__name__}: {qa_result}"
) )
results["assets"][node] = f"{node_save_dir}/result" results["assets"][node] = f"asset3d/{save_node}/result"
results["quality"][node] = qa_result results["quality"][node] = qa_result
if qa_flag is None or qa_flag is True: if qa_flag is None or qa_flag is True:

View File

@ -14,6 +14,7 @@
# implied. See the License for the specific language governing # implied. See the License for the specific language governing
# permissions and limitations under the License. # permissions and limitations under the License.
import json
import os import os
import random import random
from collections import defaultdict, deque from collections import defaultdict, deque
@ -32,7 +33,6 @@ from embodied_gen.utils.enum import LayoutInfo, Scene3DItemEnum
from embodied_gen.utils.log import logger from embodied_gen.utils.log import logger
__all__ = [ __all__ = [
"bfs_placement",
"with_seed", "with_seed",
"matrix_to_pose", "matrix_to_pose",
"pose_to_matrix", "pose_to_matrix",
@ -222,7 +222,7 @@ def check_reachable(
@with_seed("seed") @with_seed("seed")
def bfs_placement( def bfs_placement(
layout_info: LayoutInfo, layout_file: str,
floor_margin: float = 0, floor_margin: float = 0,
beside_margin: float = 0.1, beside_margin: float = 0.1,
max_attempts: int = 3000, max_attempts: int = 3000,
@ -232,6 +232,8 @@ def bfs_placement(
robot_dim: float = 0.12, robot_dim: float = 0.12,
seed: int = None, seed: int = None,
) -> LayoutInfo: ) -> LayoutInfo:
layout_info = LayoutInfo.from_dict(json.load(open(layout_file, "r")))
asset_dir = os.path.dirname(layout_file)
object_mapping = layout_info.objs_mapping object_mapping = layout_info.objs_mapping
position = {} # node: [x, y, z, qx, qy, qz, qw] position = {} # node: [x, y, z, qx, qy, qz, qw]
parent_bbox_xy = {} parent_bbox_xy = {}
@ -254,6 +256,7 @@ def bfs_placement(
mesh_path = ( mesh_path = (
f"{layout_info.assets[node]}/mesh/{node.replace(' ', '_')}.obj" f"{layout_info.assets[node]}/mesh/{node.replace(' ', '_')}.obj"
) )
mesh_path = os.path.join(asset_dir, mesh_path)
mesh_info[node]["path"] = mesh_path mesh_info[node]["path"] = mesh_path
mesh = trimesh.load(mesh_path) mesh = trimesh.load(mesh_path)
vertices = mesh.vertices vertices = mesh.vertices

View File

@ -175,7 +175,7 @@ def monkey_patch_maniskill():
seg_labels = camera.get_obs( seg_labels = camera.get_obs(
rgb=False, depth=False, segmentation=True, position=False rgb=False, depth=False, segmentation=True, position=False
)["segmentation"] )["segmentation"]
masks = np.where((seg_labels.cpu() > 0), 255, 0).astype( masks = np.where((seg_labels.cpu() > 1), 255, 0).astype(
np.uint8 np.uint8
) )
masks = torch.tensor(masks).to(color.device) masks = torch.tensor(masks).to(color.device)

View File

@ -124,7 +124,7 @@ def load_actor_from_urdf(
def load_assets_from_layout_file( def load_assets_from_layout_file(
scene: ManiSkillScene | sapien.Scene, scene: ManiSkillScene | sapien.Scene,
layout: LayoutInfo | str, layout: str,
z_offset: float = 0.0, z_offset: float = 0.0,
init_quat: list[float] = [0, 0, 0, 1], init_quat: list[float] = [0, 0, 0, 1],
env_idx: int = None, env_idx: int = None,
@ -133,19 +133,18 @@ def load_assets_from_layout_file(
Args: Args:
scene (sapien.Scene | ManiSkillScene): The SAPIEN or ManiSkill scene to load assets into. scene (sapien.Scene | ManiSkillScene): The SAPIEN or ManiSkill scene to load assets into.
layout (LayoutInfo): The layout information data. layout (str): The layout file path.
z_offset (float): Offset to apply to the Z-coordinate of non-context objects. z_offset (float): Offset to apply to the Z-coordinate of non-context objects.
init_quat (List[float]): Initial quaternion (x, y, z, w) for orientation adjustment. init_quat (List[float]): Initial quaternion (x, y, z, w) for orientation adjustment.
env_idx (int): Environment index for multi-environment setup. env_idx (int): Environment index for multi-environment setup.
""" """
if isinstance(layout, str) and layout.endswith(".json"): asset_root = os.path.dirname(layout)
layout = LayoutInfo.from_dict(json.load(open(layout, "r"))) layout = LayoutInfo.from_dict(json.load(open(layout, "r")))
actors = dict() actors = dict()
for node in layout.assets: for node in layout.assets:
file_dir = layout.assets[node] file_dir = layout.assets[node]
file_name = f"{node.replace(' ', '_')}.urdf" file_name = f"{node.replace(' ', '_')}.urdf"
urdf_file = os.path.join(file_dir, file_name) urdf_file = os.path.join(asset_root, file_dir, file_name)
if layout.objs_mapping[node] == Scene3DItemEnum.BACKGROUND.value: if layout.objs_mapping[node] == Scene3DItemEnum.BACKGROUND.value:
continue continue