fix(layout): Fix layout axis order after urdf origin rotation change. (#41)

This commit is contained in:
Xinjie 2025-09-26 17:23:01 +08:00 committed by GitHub
parent ee03a089b1
commit a34b1dacea
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 73 additions and 23 deletions

View File

@ -37,6 +37,7 @@ def decompose_convex_coacd(
params: dict, params: dict,
verbose: bool = False, verbose: bool = False,
auto_scale: bool = True, auto_scale: bool = True,
scale_factor: float = 1.0,
) -> None: ) -> None:
coacd.set_log_level("info" if verbose else "warn") coacd.set_log_level("info" if verbose else "warn")
@ -53,6 +54,7 @@ def decompose_convex_coacd(
rescale = visual_mesh_shape / convex_mesh_shape rescale = visual_mesh_shape / convex_mesh_shape
combined.vertices *= rescale combined.vertices *= rescale
combined.vertices *= scale_factor
combined.export(outfile) combined.export(outfile)
@ -71,6 +73,7 @@ def decompose_convex_mesh(
merge: bool = True, merge: bool = True,
seed: int = 0, seed: int = 0,
auto_scale: bool = True, auto_scale: bool = True,
scale_factor: float = 1.005,
verbose: bool = False, verbose: bool = False,
) -> str: ) -> str:
"""Decompose a mesh into convex parts using the CoACD algorithm.""" """Decompose a mesh into convex parts using the CoACD algorithm."""
@ -95,7 +98,9 @@ def decompose_convex_mesh(
) )
try: try:
decompose_convex_coacd(filename, outfile, params, verbose, auto_scale) decompose_convex_coacd(
filename, outfile, params, verbose, auto_scale, scale_factor
)
if os.path.exists(outfile): if os.path.exists(outfile):
return outfile return outfile
except Exception as e: except Exception as e:
@ -106,7 +111,7 @@ def decompose_convex_mesh(
try: try:
params["preprocess_mode"] = "on" params["preprocess_mode"] = "on"
decompose_convex_coacd( decompose_convex_coacd(
filename, outfile, params, verbose, auto_scale filename, outfile, params, verbose, auto_scale, scale_factor
) )
if os.path.exists(outfile): if os.path.exists(outfile):
return outfile return outfile

View File

@ -74,7 +74,9 @@ class PickEmbodiedGen(BaseEnv):
layout_file = kwargs.pop("layout_file", None) layout_file = kwargs.pop("layout_file", None)
replace_objs = kwargs.pop("replace_objs", True) replace_objs = kwargs.pop("replace_objs", True)
self.enable_grasp = kwargs.pop("enable_grasp", False) self.enable_grasp = kwargs.pop("enable_grasp", False)
self.init_quat = kwargs.pop("init_quat", [0.7071, 0, 0, 0.7071]) self.init_3dgs_quat = kwargs.pop(
"init_3dgs_quat", [0.7071, 0, 0, 0.7071]
)
# Add small offset in z-axis to avoid collision. # Add small offset in z-axis to avoid collision.
self.objs_z_offset = kwargs.pop("objs_z_offset", 0.002) self.objs_z_offset = kwargs.pop("objs_z_offset", 0.002)
self.robot_z_offset = kwargs.pop("robot_z_offset", 0.002) self.robot_z_offset = kwargs.pop("robot_z_offset", 0.002)
@ -107,7 +109,7 @@ class PickEmbodiedGen(BaseEnv):
self.bg_images = dict() self.bg_images = dict()
if self.render_mode == "hybrid": if self.render_mode == "hybrid":
self.bg_images = self.render_gs3d_images( self.bg_images = self.render_gs3d_images(
self.layouts, num_envs, self.init_quat self.layouts, num_envs, self.init_3dgs_quat
) )
@staticmethod @staticmethod

View File

@ -77,9 +77,9 @@ LAYOUT_DISASSEMBLE_PROMPT = f"""
- {Scene3DItemEnum.MANIPULATED_OBJS} and {Scene3DItemEnum.DISTRACTOR_OBJS} must be common - {Scene3DItemEnum.MANIPULATED_OBJS} and {Scene3DItemEnum.DISTRACTOR_OBJS} must be common
household or office items or furniture, not abstract concepts, not too small like needle. household or office items or furniture, not abstract concepts, not too small like needle.
- If the input includes a plural or grouped object (e.g., "pens", "bottles", "plates", "fruit"), - If the input includes a plural or grouped object (e.g., "pens", "bottles", "plates", "fruit"),
you must decompose it into multiple individual instances (e.g., ["pen", "pen"], ["apple", "pear"]). you must decompose it into multiple individual instances (e.g., ["pen1", "pen2"], ["apple", "pear"]).
- Containers that hold objects (e.g., "bowl of apples", "box of tools") must - Containers that hold objects (e.g., "bowl of apples", "box of tools") must
be separated into individual items (e.g., ["bowl", "apple", "apple"]). be separated into individual items (e.g., ["bowl", "apple1", "apple2"]).
- Do not include transparent objects such as "glass", "plastic", etc. - Do not include transparent objects such as "glass", "plastic", etc.
- The output must be in compact JSON format and use Markdown syntax, just like the output in the example below. - The output must be in compact JSON format and use Markdown syntax, just like the output in the example below.
@ -170,7 +170,7 @@ LAYOUT_DISASSEMBLE_PROMPT = f"""
"robot": "franka", "robot": "franka",
"background": "office", "background": "office",
"context": "table", "context": "table",
"manipulated_objs": ["pen", "pen", "grey bowl"], "manipulated_objs": ["pen1", "pen2", "grey bowl"],
"distractor_objs": ["notepad", "cup"] "distractor_objs": ["notepad", "cup"]
}} }}
``` ```

View File

@ -16,6 +16,7 @@
import json import json
import os import os
import shutil
from dataclasses import dataclass from dataclasses import dataclass
import tyro import tyro
@ -51,6 +52,12 @@ def entrypoint(**kwargs):
out_layout_path = f"{output_dir}/layout.json" out_layout_path = f"{output_dir}/layout.json"
layout_info = bfs_placement(args.layout_path, seed=args.seed) layout_info = bfs_placement(args.layout_path, seed=args.seed)
origin_dir = os.path.dirname(args.layout_path)
for key in layout_info.assets:
src = f"{origin_dir}/{layout_info.assets[key]}"
dst = f"{output_dir}/{layout_info.assets[key]}"
shutil.copytree(src, dst, dirs_exist_ok=True)
with open(out_layout_path, "w") as f: with open(out_layout_path, "w") as f:
json.dump(layout_info.to_dict(), f, indent=4) json.dump(layout_info.to_dict(), f, indent=4)

View File

@ -49,7 +49,7 @@ class SapienSimConfig:
sim_freq: int = 200 sim_freq: int = 200
sim_step: int = 400 sim_step: int = 400
z_offset: float = 0.004 z_offset: float = 0.004
init_quat: list[float] = field( init_3dgs_quat: list[float] = field(
default_factory=lambda: [0.7071, 0, 0, 0.7071] default_factory=lambda: [0.7071, 0, 0, 0.7071]
) # xyzw ) # xyzw
device: str = "cuda" device: str = "cuda"
@ -137,7 +137,7 @@ def entrypoint(**kwargs):
gs_path = f"{asset_root}/{layout_data.assets[bg_node]}/gs_model.ply" gs_path = f"{asset_root}/{layout_data.assets[bg_node]}/gs_model.ply"
gs_model: GaussianOperator = GaussianOperator.load_from_ply(gs_path) gs_model: GaussianOperator = GaussianOperator.load_from_ply(gs_path)
x, y, z, qx, qy, qz, qw = layout_data.position[bg_node] x, y, z, qx, qy, qz, qw = layout_data.position[bg_node]
qx, qy, qz, qw = quaternion_multiply([qx, qy, qz, qw], cfg.init_quat) qx, qy, qz, qw = quaternion_multiply([qx, qy, qz, qw], cfg.init_3dgs_quat)
init_pose = torch.tensor([x, y, z, qx, qy, qz, qw]) init_pose = torch.tensor([x, y, z, qx, qy, qz, qw])
gs_model = gs_model.get_gaussians(instance_pose=init_pose) gs_model = gs_model.get_gaussians(instance_pose=init_pose)

View File

@ -80,7 +80,7 @@ def pose_to_matrix(pose: list[float]) -> np.ndarray:
def compute_xy_bbox( def compute_xy_bbox(
vertices: np.ndarray, col_x: int = 0, col_y: int = 2 vertices: np.ndarray, col_x: int = 0, col_y: int = 1
) -> list[float]: ) -> list[float]:
x_vals = vertices[:, col_x] x_vals = vertices[:, col_x]
y_vals = vertices[:, col_y] y_vals = vertices[:, col_y]
@ -139,11 +139,14 @@ def compute_convex_hull_path(
z_threshold: float = 0.05, z_threshold: float = 0.05,
interp_per_edge: int = 3, interp_per_edge: int = 3,
margin: float = -0.02, margin: float = -0.02,
x_axis: int = 0,
y_axis: int = 1,
z_axis: int = 2,
) -> Path: ) -> Path:
top_vertices = vertices[ top_vertices = vertices[
vertices[:, 1] > vertices[:, 1].max() - z_threshold vertices[:, z_axis] > vertices[:, z_axis].max() - z_threshold
] ]
top_xy = top_vertices[:, [0, 2]] top_xy = top_vertices[:, [x_axis, y_axis]]
if len(top_xy) < 3: if len(top_xy) < 3:
raise ValueError("Not enough points to form a convex hull") raise ValueError("Not enough points to form a convex hull")
@ -184,11 +187,11 @@ def all_corners_inside(hull: Path, box: list, threshold: int = 3) -> bool:
def compute_axis_rotation_quat( def compute_axis_rotation_quat(
axis: Literal["x", "y", "z"], angle_rad: float axis: Literal["x", "y", "z"], angle_rad: float
) -> list[float]: ) -> list[float]:
if axis.lower() == 'x': if axis.lower() == "x":
q = Quaternion(axis=[1, 0, 0], angle=angle_rad) q = Quaternion(axis=[1, 0, 0], angle=angle_rad)
elif axis.lower() == 'y': elif axis.lower() == "y":
q = Quaternion(axis=[0, 1, 0], angle=angle_rad) q = Quaternion(axis=[0, 1, 0], angle=angle_rad)
elif axis.lower() == 'z': elif axis.lower() == "z":
q = Quaternion(axis=[0, 0, 1], angle=angle_rad) q = Quaternion(axis=[0, 0, 1], angle=angle_rad)
else: else:
raise ValueError(f"Unsupported axis '{axis}', must be one of x, y, z") raise ValueError(f"Unsupported axis '{axis}', must be one of x, y, z")
@ -226,12 +229,34 @@ def bfs_placement(
floor_margin: float = 0, floor_margin: float = 0,
beside_margin: float = 0.1, beside_margin: float = 0.1,
max_attempts: int = 3000, max_attempts: int = 3000,
init_rpy: tuple = (1.5708, 0.0, 0.0),
rotate_objs: bool = True, rotate_objs: bool = True,
rotate_bg: bool = True, rotate_bg: bool = True,
rotate_context: bool = True,
limit_reach_range: bool = True, limit_reach_range: bool = True,
robot_dim: float = 0.12, robot_dim: float = 0.12,
seed: int = None, seed: int = None,
) -> LayoutInfo: ) -> LayoutInfo:
"""Place objects in the layout using BFS traversal.
Args:
layout_file: Path to the JSON file defining the layout structure and assets.
floor_margin: Z-offset for the background object, typically for objects placed on the floor.
beside_margin: Minimum margin for objects placed 'beside' their parent, used when 'on' placement fails.
max_attempts: Maximum number of attempts to find a non-overlapping position for an object.
init_rpy: Initial Roll-Pitch-Yaw rotation rad applied to all object meshes to align the mesh's
coordinate system with the world's (e.g., Z-up).
rotate_objs: If True, apply a random rotation around the Z-axis for manipulated and distractor objects.
rotate_bg: If True, apply a random rotation around the Y-axis for the background object.
rotate_context: If True, apply a random rotation around the Z-axis for the context object.
limit_reach_range: If True, enforce a check that manipulated objects are within the robot's reach.
robot_dim: The approximate dimension (e.g., diameter) of the robot for box representation.
seed: Random seed for reproducible placement.
Returns:
A :class:`LayoutInfo` object containing the objects and their final computed 7D poses
([x, y, z, qx, qy, qz, qw]).
"""
layout_info = LayoutInfo.from_dict(json.load(open(layout_file, "r"))) layout_info = LayoutInfo.from_dict(json.load(open(layout_file, "r")))
asset_dir = os.path.dirname(layout_file) asset_dir = os.path.dirname(layout_file)
object_mapping = layout_info.objs_mapping object_mapping = layout_info.objs_mapping
@ -259,13 +284,23 @@ def bfs_placement(
mesh_path = os.path.join(asset_dir, mesh_path) mesh_path = os.path.join(asset_dir, mesh_path)
mesh_info[node]["path"] = mesh_path mesh_info[node]["path"] = mesh_path
mesh = trimesh.load(mesh_path) mesh = trimesh.load(mesh_path)
vertices = mesh.vertices rotation = R.from_euler("xyz", init_rpy, degrees=False)
z1 = np.percentile(vertices[:, 1], 1) vertices = mesh.vertices @ rotation.as_matrix().T
z2 = np.percentile(vertices[:, 1], 99) z1 = np.percentile(vertices[:, 2], 1)
z2 = np.percentile(vertices[:, 2], 99)
if object_mapping[node] == Scene3DItemEnum.CONTEXT.value: if object_mapping[node] == Scene3DItemEnum.CONTEXT.value:
object_quat = [0, 0, 0, 1] object_quat = [0, 0, 0, 1]
if rotate_context:
angle_rad = np.random.uniform(0, 2 * np.pi)
object_quat = compute_axis_rotation_quat(
axis="z", angle_rad=angle_rad
)
rotation = R.from_quat(object_quat).as_matrix()
vertices = vertices @ rotation.T
mesh_info[node]["surface"] = compute_convex_hull_path(vertices) mesh_info[node]["surface"] = compute_convex_hull_path(vertices)
# Put robot in the CONTEXT edge. # Put robot in the CONTEXT edge.
x, y = random.choice(mesh_info[node]["surface"].vertices) x, y = random.choice(mesh_info[node]["surface"].vertices)
theta = np.arctan2(y, x) theta = np.arctan2(y, x)
@ -288,9 +323,7 @@ def bfs_placement(
axis="z", angle_rad=angle_rad axis="z", angle_rad=angle_rad
) )
rotation = R.from_quat(object_quat).as_matrix() rotation = R.from_quat(object_quat).as_matrix()
vertices = np.dot(mesh.vertices, rotation.T) vertices = vertices @ rotation.T
z1 = np.percentile(vertices[:, 1], 1)
z2 = np.percentile(vertices[:, 1], 99)
x1, x2, y1, y2 = compute_xy_bbox(vertices) x1, x2, y1, y2 = compute_xy_bbox(vertices)
mesh_info[node]["pose"] = [x1, x2, y1, y2, z1, z2, *object_quat] mesh_info[node]["pose"] = [x1, x2, y1, y2, z1, z2, *object_quat]

View File

@ -552,9 +552,8 @@ class SemanticMatcher(BaseChecker):
def test_semantic_matcher( def test_semantic_matcher(
bg_file: str = "outputs/bg_scenes/bg_scene_list.txt", bg_file: str = "outputs/bg_scenes/scene_list.txt",
): ):
bg_file = "outputs/bg_scenes/bg_scene_list.txt"
scene_dict = {} scene_dict = {}
with open(bg_file, "r") as f: with open(bg_file, "r") as f:
for line in f: for line in f:

View File

@ -23,3 +23,7 @@ layout-cli --task_descs "Place the pen in the mug on the desk" \
--bg_list "outputs/bg_scenes/scene_list.txt" \ --bg_list "outputs/bg_scenes/scene_list.txt" \
--output_root "${output_dir}/layouts_gen" --insert_robot --output_root "${output_dir}/layouts_gen" --insert_robot
python embodied_gen/scripts/compose_layout.py \
--layout_path "outputs/layouts_gen/task_0000/layout.json" \
--output_dir "outputs/layouts_gen/task_0000/recompose" --insert_robot