chore(layout): Expose the camera setting for interface parallel_sim. (#39)

* chore(layout): Expose the camera setting for interface parallel_sim and update layout file.
This commit is contained in:
Xinjie 2025-09-10 20:58:35 +08:00 committed by GitHub
parent cf3b919b65
commit 1272b80926
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
10 changed files with 87 additions and 36 deletions

View File

@ -258,7 +258,7 @@ Example: generate multiple parallel simulation envs with `gym.make` and record s
python embodied_gen/scripts/parallel_sim.py \
--layout_file "outputs/layouts_gen/task_0000/layout.json" \
--output_dir "outputs/parallel_sim/task_0000" \
--num_envs 20
--num_envs 16
```
### 🖼️ Real-to-Sim Digital Twin

View File

@ -16,7 +16,6 @@
import json
import os
from copy import deepcopy
import numpy as np
import sapien
@ -26,6 +25,7 @@ from mani_skill.envs.sapien_env import BaseEnv
from mani_skill.sensors.camera import CameraConfig
from mani_skill.utils import sapien_utils
from mani_skill.utils.building import actors
from mani_skill.utils.building.ground import build_ground
from mani_skill.utils.registration import register_env
from mani_skill.utils.structs.actor import Actor
from mani_skill.utils.structs.pose import Pose
@ -78,6 +78,14 @@ class PickEmbodiedGen(BaseEnv):
# Add small offset in z-axis to avoid collision.
self.objs_z_offset = kwargs.pop("objs_z_offset", 0.002)
self.robot_z_offset = kwargs.pop("robot_z_offset", 0.002)
self.camera_cfg = kwargs.pop("camera_cfg", None)
if self.camera_cfg is None:
self.camera_cfg = dict(
camera_eye=[0.9, 0.0, 1.1],
camera_target_pt=[0.0, 0.0, 0.9],
image_hw=[256, 256],
fovy_deg=75,
)
self.layouts = self.init_env_layouts(
layout_file, num_envs, replace_objs
@ -106,22 +114,30 @@ class PickEmbodiedGen(BaseEnv):
def init_env_layouts(
layout_file: str, num_envs: int, replace_objs: bool
) -> list[LayoutInfo]:
layout = LayoutInfo.from_dict(json.load(open(layout_file, "r")))
layouts = []
for env_idx in range(num_envs):
if replace_objs and env_idx > 0:
layout = bfs_placement(deepcopy(layout))
layouts.append(layout)
layout_info = bfs_placement(layout_file)
else:
layout_info = json.load(open(layout_file, "r"))
layout_info = LayoutInfo.from_dict(layout_info)
layout_path = layout_file.replace(".json", f"_env{env_idx}.json")
with open(layout_path, "w") as f:
json.dump(layout_info.to_dict(), f, indent=4)
layouts.append(layout_path)
return layouts
@staticmethod
def compute_robot_init_pose(
layouts: list[LayoutInfo], num_envs: int, z_offset: float = 0.0
layouts: list[str], num_envs: int, z_offset: float = 0.0
) -> list[list[float]]:
robot_pose = []
for env_idx in range(num_envs):
layout = layouts[env_idx]
layout = json.load(open(layouts[env_idx], "r"))
layout = LayoutInfo.from_dict(layout)
robot_node = layout.relation[Scene3DItemEnum.ROBOT.value]
x, y, z, qx, qy, qz, qw = layout.position[robot_node]
robot_pose.append([x, y, z + z_offset, qw, qx, qy, qz])
@ -154,19 +170,27 @@ class PickEmbodiedGen(BaseEnv):
@property
def _default_human_render_camera_configs(self):
pose = sapien_utils.look_at(
eye=[0.9, 0.0, 1.1], target=[0.0, 0.0, 0.9]
eye=self.camera_cfg["camera_eye"],
target=self.camera_cfg["camera_target_pt"],
)
return CameraConfig(
"render_camera", pose, 256, 256, np.deg2rad(75), 0.01, 100
"render_camera",
pose,
self.camera_cfg["image_hw"][1],
self.camera_cfg["image_hw"][0],
np.deg2rad(self.camera_cfg["fovy_deg"]),
0.01,
100,
)
def _load_agent(self, options: dict):
self.ground = build_ground(self.scene)
super()._load_agent(options, sapien.Pose(p=[-10, 0, 10]))
def _load_scene(self, options: dict):
all_objects = []
logger.info(f"Loading assets and decomposition mesh collisions...")
logger.info(f"Loading EmbodiedGen assets...")
for env_idx in range(self.num_envs):
env_actors = load_assets_from_layout_file(
self.scene,
@ -229,7 +253,7 @@ class PickEmbodiedGen(BaseEnv):
self.agent.controller.controllers["gripper"].reset()
def render_gs3d_images(
self, layouts: list[LayoutInfo], num_envs: int, init_quat: list[float]
self, layouts: list[str], num_envs: int, init_quat: list[float]
) -> dict[str, np.ndarray]:
sim_coord_align = (
torch.tensor(SIM_COORD_ALIGN).to(torch.float32).to(self.device)
@ -237,12 +261,18 @@ class PickEmbodiedGen(BaseEnv):
cameras = self.scene.sensors.copy()
cameras.update(self.scene.human_render_cameras)
bg_node = layouts[0].relation[Scene3DItemEnum.BACKGROUND.value]
gs_path = os.path.join(layouts[0].assets[bg_node], "gs_model.ply")
# Preload the background Gaussian Splatting model.
asset_root = os.path.dirname(layouts[0])
layout = LayoutInfo.from_dict(json.load(open(layouts[0], "r")))
bg_node = layout.relation[Scene3DItemEnum.BACKGROUND.value]
gs_path = os.path.join(
asset_root, layout.assets[bg_node], "gs_model.ply"
)
raw_gs: GaussianOperator = GaussianOperator.load_from_ply(gs_path)
bg_images = dict()
for env_idx in tqdm(range(num_envs), desc="Pre-rendering Background"):
layout = layouts[env_idx]
layout = json.load(open(layouts[env_idx], "r"))
layout = LayoutInfo.from_dict(layout)
x, y, z, qx, qy, qz, qw = layout.position[bg_node]
qx, qy, qz, qw = quaternion_multiply([qx, qy, qz, qw], init_quat)
init_pose = torch.tensor([x, y, z, qx, qy, qz, qw])

View File

@ -50,10 +50,7 @@ def entrypoint(**kwargs):
out_scene_path = f"{output_dir}/Iscene.glb"
out_layout_path = f"{output_dir}/layout.json"
with open(args.layout_path, "r") as f:
layout_info = LayoutInfo.from_dict(json.load(f))
layout_info = bfs_placement(layout_info, seed=args.seed)
layout_info = bfs_placement(args.layout_path, seed=args.seed)
with open(out_layout_path, "w") as f:
json.dump(layout_info.to_dict(), f, indent=4)

View File

@ -119,11 +119,15 @@ def entrypoint() -> None:
match_scene_path = f"{os.path.dirname(args.bg_list)}/{match_key}"
bg_save_dir = os.path.join(output_root, "background")
copytree(match_scene_path, bg_save_dir, dirs_exist_ok=True)
layout_info.assets[bg_node] = bg_save_dir
layout_info.assets[bg_node] = "background"
# BFS layout placement.
layout_path = f"{output_root}/layout.json"
with open(layout_path, "w") as f:
json.dump(layout_info.to_dict(), f, indent=4)
layout_info = bfs_placement(
layout_info,
layout_path,
limit_reach_range=True if args.insert_robot else False,
seed=args.seed_layout,
)

View File

@ -20,7 +20,7 @@ from embodied_gen.utils.monkey_patches import monkey_patch_maniskill
monkey_patch_maniskill()
import json
from collections import defaultdict
from dataclasses import dataclass
from dataclasses import dataclass, field
from typing import Literal
import gymnasium as gym
@ -69,6 +69,18 @@ class ParallelSimConfig:
reach_target_only: bool = True
"""Whether to only reach target without full action"""
# Camera settings
camera_eye: list[float] = field(default_factory=lambda: [0.9, 0.0, 1.1])
"""Camera eye position [x, y, z] in global coordiante system"""
camera_target_pt: list[float] = field(
default_factory=lambda: [0.0, 0.0, 0.9]
)
"""Camera target(look-at) point [x, y, z] in global coordiante system"""
image_hw: list[int] = field(default_factory=lambda: [512, 512])
"""Rendered image height and width [height, width]"""
fovy_deg: float = 75
"""Camera vertical field of view in degrees"""
def entrypoint(**kwargs):
if kwargs is None or len(kwargs) == 0:
@ -83,6 +95,12 @@ def entrypoint(**kwargs):
enable_shadow=cfg.enable_shadow,
layout_file=cfg.layout_file,
control_mode=cfg.control_mode,
camera_cfg=dict(
camera_eye=cfg.camera_eye,
camera_target_pt=cfg.camera_target_pt,
image_hw=cfg.image_hw,
fovy_deg=cfg.fovy_deg,
),
)
env = RecordEpisode(
env,

View File

@ -91,17 +91,16 @@ def entrypoint(**kwargs):
fovy_deg=cfg.fovy_deg,
)
with open(cfg.layout_path, "r") as f:
layout_data = json.load(f)
layout_data: LayoutInfo = LayoutInfo.from_dict(layout_data)
layout_data: LayoutInfo = LayoutInfo.from_dict(json.load(f))
actors = load_assets_from_layout_file(
scene_manager.scene,
layout_data,
cfg.layout_path,
cfg.z_offset,
cfg.init_quat,
)
agent = load_mani_skill_robot(
scene_manager.scene, layout_data, cfg.control_freq
scene_manager.scene, cfg.layout_path, cfg.control_freq
)
frames = defaultdict(list)
@ -134,8 +133,9 @@ def entrypoint(**kwargs):
if "Foreground" not in cfg.render_keys:
return
asset_root = os.path.dirname(cfg.layout_path)
bg_node = layout_data.relation[Scene3DItemEnum.BACKGROUND.value]
gs_path = f"{layout_data.assets[bg_node]}/gs_model.ply"
gs_path = f"{asset_root}/{layout_data.assets[bg_node]}/gs_model.ply"
gs_model: GaussianOperator = GaussianOperator.load_from_ply(gs_path)
x, y, z, qx, qy, qz, qw = layout_data.position[bg_node]
qx, qy, qz, qw = quaternion_multiply([qx, qy, qz, qw], cfg.init_quat)

View File

@ -187,7 +187,7 @@ def text_to_3d(**kwargs) -> dict:
logger.warning(
f"Node {node}, {TXTGEN_CHECKER.__class__.__name__}: {qa_result}"
)
results["assets"][node] = f"{node_save_dir}/result"
results["assets"][node] = f"asset3d/{save_node}/result"
results["quality"][node] = qa_result
if qa_flag is None or qa_flag is True:

View File

@ -14,6 +14,7 @@
# implied. See the License for the specific language governing
# permissions and limitations under the License.
import json
import os
import random
from collections import defaultdict, deque
@ -32,7 +33,6 @@ from embodied_gen.utils.enum import LayoutInfo, Scene3DItemEnum
from embodied_gen.utils.log import logger
__all__ = [
"bfs_placement",
"with_seed",
"matrix_to_pose",
"pose_to_matrix",
@ -222,7 +222,7 @@ def check_reachable(
@with_seed("seed")
def bfs_placement(
layout_info: LayoutInfo,
layout_file: str,
floor_margin: float = 0,
beside_margin: float = 0.1,
max_attempts: int = 3000,
@ -232,6 +232,8 @@ def bfs_placement(
robot_dim: float = 0.12,
seed: int = None,
) -> LayoutInfo:
layout_info = LayoutInfo.from_dict(json.load(open(layout_file, "r")))
asset_dir = os.path.dirname(layout_file)
object_mapping = layout_info.objs_mapping
position = {} # node: [x, y, z, qx, qy, qz, qw]
parent_bbox_xy = {}
@ -254,6 +256,7 @@ def bfs_placement(
mesh_path = (
f"{layout_info.assets[node]}/mesh/{node.replace(' ', '_')}.obj"
)
mesh_path = os.path.join(asset_dir, mesh_path)
mesh_info[node]["path"] = mesh_path
mesh = trimesh.load(mesh_path)
vertices = mesh.vertices

View File

@ -175,7 +175,7 @@ def monkey_patch_maniskill():
seg_labels = camera.get_obs(
rgb=False, depth=False, segmentation=True, position=False
)["segmentation"]
masks = np.where((seg_labels.cpu() > 0), 255, 0).astype(
masks = np.where((seg_labels.cpu() > 1), 255, 0).astype(
np.uint8
)
masks = torch.tensor(masks).to(color.device)

View File

@ -124,7 +124,7 @@ def load_actor_from_urdf(
def load_assets_from_layout_file(
scene: ManiSkillScene | sapien.Scene,
layout: LayoutInfo | str,
layout: str,
z_offset: float = 0.0,
init_quat: list[float] = [0, 0, 0, 1],
env_idx: int = None,
@ -133,19 +133,18 @@ def load_assets_from_layout_file(
Args:
scene (sapien.Scene | ManiSkillScene): The SAPIEN or ManiSkill scene to load assets into.
layout (LayoutInfo): The layout information data.
layout (str): The layout file path.
z_offset (float): Offset to apply to the Z-coordinate of non-context objects.
init_quat (List[float]): Initial quaternion (x, y, z, w) for orientation adjustment.
env_idx (int): Environment index for multi-environment setup.
"""
if isinstance(layout, str) and layout.endswith(".json"):
asset_root = os.path.dirname(layout)
layout = LayoutInfo.from_dict(json.load(open(layout, "r")))
actors = dict()
for node in layout.assets:
file_dir = layout.assets[node]
file_name = f"{node.replace(' ', '_')}.urdf"
urdf_file = os.path.join(file_dir, file_name)
urdf_file = os.path.join(asset_root, file_dir, file_name)
if layout.objs_mapping[node] == Scene3DItemEnum.BACKGROUND.value:
continue