fix(layout): Fix layout axis order after urdf origin rotation change. (#41)

This commit is contained in:
Xinjie 2025-09-26 17:23:01 +08:00 committed by GitHub
parent ee03a089b1
commit a34b1dacea
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 73 additions and 23 deletions

View File

@ -37,6 +37,7 @@ def decompose_convex_coacd(
params: dict,
verbose: bool = False,
auto_scale: bool = True,
scale_factor: float = 1.0,
) -> None:
coacd.set_log_level("info" if verbose else "warn")
@ -53,6 +54,7 @@ def decompose_convex_coacd(
rescale = visual_mesh_shape / convex_mesh_shape
combined.vertices *= rescale
combined.vertices *= scale_factor
combined.export(outfile)
@ -71,6 +73,7 @@ def decompose_convex_mesh(
merge: bool = True,
seed: int = 0,
auto_scale: bool = True,
scale_factor: float = 1.005,
verbose: bool = False,
) -> str:
"""Decompose a mesh into convex parts using the CoACD algorithm."""
@ -95,7 +98,9 @@ def decompose_convex_mesh(
)
try:
decompose_convex_coacd(filename, outfile, params, verbose, auto_scale)
decompose_convex_coacd(
filename, outfile, params, verbose, auto_scale, scale_factor
)
if os.path.exists(outfile):
return outfile
except Exception as e:
@ -106,7 +111,7 @@ def decompose_convex_mesh(
try:
params["preprocess_mode"] = "on"
decompose_convex_coacd(
filename, outfile, params, verbose, auto_scale
filename, outfile, params, verbose, auto_scale, scale_factor
)
if os.path.exists(outfile):
return outfile

View File

@ -74,7 +74,9 @@ class PickEmbodiedGen(BaseEnv):
layout_file = kwargs.pop("layout_file", None)
replace_objs = kwargs.pop("replace_objs", True)
self.enable_grasp = kwargs.pop("enable_grasp", False)
self.init_quat = kwargs.pop("init_quat", [0.7071, 0, 0, 0.7071])
self.init_3dgs_quat = kwargs.pop(
"init_3dgs_quat", [0.7071, 0, 0, 0.7071]
)
# Add small offset in z-axis to avoid collision.
self.objs_z_offset = kwargs.pop("objs_z_offset", 0.002)
self.robot_z_offset = kwargs.pop("robot_z_offset", 0.002)
@ -107,7 +109,7 @@ class PickEmbodiedGen(BaseEnv):
self.bg_images = dict()
if self.render_mode == "hybrid":
self.bg_images = self.render_gs3d_images(
self.layouts, num_envs, self.init_quat
self.layouts, num_envs, self.init_3dgs_quat
)
@staticmethod

View File

@ -77,9 +77,9 @@ LAYOUT_DISASSEMBLE_PROMPT = f"""
- {Scene3DItemEnum.MANIPULATED_OBJS} and {Scene3DItemEnum.DISTRACTOR_OBJS} must be common
household or office items or furniture, not abstract concepts, not too small like needle.
- If the input includes a plural or grouped object (e.g., "pens", "bottles", "plates", "fruit"),
you must decompose it into multiple individual instances (e.g., ["pen", "pen"], ["apple", "pear"]).
you must decompose it into multiple individual instances (e.g., ["pen1", "pen2"], ["apple", "pear"]).
- Containers that hold objects (e.g., "bowl of apples", "box of tools") must
be separated into individual items (e.g., ["bowl", "apple", "apple"]).
be separated into individual items (e.g., ["bowl", "apple1", "apple2"]).
- Do not include transparent objects such as "glass", "plastic", etc.
- The output must be in compact JSON format and use Markdown syntax, just like the output in the example below.
@ -170,7 +170,7 @@ LAYOUT_DISASSEMBLE_PROMPT = f"""
"robot": "franka",
"background": "office",
"context": "table",
"manipulated_objs": ["pen", "pen", "grey bowl"],
"manipulated_objs": ["pen1", "pen2", "grey bowl"],
"distractor_objs": ["notepad", "cup"]
}}
```

View File

@ -16,6 +16,7 @@
import json
import os
import shutil
from dataclasses import dataclass
import tyro
@ -51,6 +52,12 @@ def entrypoint(**kwargs):
out_layout_path = f"{output_dir}/layout.json"
layout_info = bfs_placement(args.layout_path, seed=args.seed)
origin_dir = os.path.dirname(args.layout_path)
for key in layout_info.assets:
src = f"{origin_dir}/{layout_info.assets[key]}"
dst = f"{output_dir}/{layout_info.assets[key]}"
shutil.copytree(src, dst, dirs_exist_ok=True)
with open(out_layout_path, "w") as f:
json.dump(layout_info.to_dict(), f, indent=4)

View File

@ -49,7 +49,7 @@ class SapienSimConfig:
sim_freq: int = 200
sim_step: int = 400
z_offset: float = 0.004
init_quat: list[float] = field(
init_3dgs_quat: list[float] = field(
default_factory=lambda: [0.7071, 0, 0, 0.7071]
) # xyzw
device: str = "cuda"
@ -137,7 +137,7 @@ def entrypoint(**kwargs):
gs_path = f"{asset_root}/{layout_data.assets[bg_node]}/gs_model.ply"
gs_model: GaussianOperator = GaussianOperator.load_from_ply(gs_path)
x, y, z, qx, qy, qz, qw = layout_data.position[bg_node]
qx, qy, qz, qw = quaternion_multiply([qx, qy, qz, qw], cfg.init_quat)
qx, qy, qz, qw = quaternion_multiply([qx, qy, qz, qw], cfg.init_3dgs_quat)
init_pose = torch.tensor([x, y, z, qx, qy, qz, qw])
gs_model = gs_model.get_gaussians(instance_pose=init_pose)

View File

@ -80,7 +80,7 @@ def pose_to_matrix(pose: list[float]) -> np.ndarray:
def compute_xy_bbox(
vertices: np.ndarray, col_x: int = 0, col_y: int = 2
vertices: np.ndarray, col_x: int = 0, col_y: int = 1
) -> list[float]:
x_vals = vertices[:, col_x]
y_vals = vertices[:, col_y]
@ -139,11 +139,14 @@ def compute_convex_hull_path(
z_threshold: float = 0.05,
interp_per_edge: int = 3,
margin: float = -0.02,
x_axis: int = 0,
y_axis: int = 1,
z_axis: int = 2,
) -> Path:
top_vertices = vertices[
vertices[:, 1] > vertices[:, 1].max() - z_threshold
vertices[:, z_axis] > vertices[:, z_axis].max() - z_threshold
]
top_xy = top_vertices[:, [0, 2]]
top_xy = top_vertices[:, [x_axis, y_axis]]
if len(top_xy) < 3:
raise ValueError("Not enough points to form a convex hull")
@ -184,11 +187,11 @@ def all_corners_inside(hull: Path, box: list, threshold: int = 3) -> bool:
def compute_axis_rotation_quat(
axis: Literal["x", "y", "z"], angle_rad: float
) -> list[float]:
if axis.lower() == 'x':
if axis.lower() == "x":
q = Quaternion(axis=[1, 0, 0], angle=angle_rad)
elif axis.lower() == 'y':
elif axis.lower() == "y":
q = Quaternion(axis=[0, 1, 0], angle=angle_rad)
elif axis.lower() == 'z':
elif axis.lower() == "z":
q = Quaternion(axis=[0, 0, 1], angle=angle_rad)
else:
raise ValueError(f"Unsupported axis '{axis}', must be one of x, y, z")
@ -226,12 +229,34 @@ def bfs_placement(
floor_margin: float = 0,
beside_margin: float = 0.1,
max_attempts: int = 3000,
init_rpy: tuple = (1.5708, 0.0, 0.0),
rotate_objs: bool = True,
rotate_bg: bool = True,
rotate_context: bool = True,
limit_reach_range: bool = True,
robot_dim: float = 0.12,
seed: int = None,
) -> LayoutInfo:
"""Place objects in the layout using BFS traversal.
Args:
layout_file: Path to the JSON file defining the layout structure and assets.
floor_margin: Z-offset for the background object, typically for objects placed on the floor.
beside_margin: Minimum margin for objects placed 'beside' their parent, used when 'on' placement fails.
max_attempts: Maximum number of attempts to find a non-overlapping position for an object.
init_rpy: Initial Roll-Pitch-Yaw rotation rad applied to all object meshes to align the mesh's
coordinate system with the world's (e.g., Z-up).
rotate_objs: If True, apply a random rotation around the Z-axis for manipulated and distractor objects.
rotate_bg: If True, apply a random rotation around the Y-axis for the background object.
rotate_context: If True, apply a random rotation around the Z-axis for the context object.
limit_reach_range: If True, enforce a check that manipulated objects are within the robot's reach.
robot_dim: The approximate dimension (e.g., diameter) of the robot for box representation.
seed: Random seed for reproducible placement.
Returns:
A :class:`LayoutInfo` object containing the objects and their final computed 7D poses
([x, y, z, qx, qy, qz, qw]).
"""
layout_info = LayoutInfo.from_dict(json.load(open(layout_file, "r")))
asset_dir = os.path.dirname(layout_file)
object_mapping = layout_info.objs_mapping
@ -259,13 +284,23 @@ def bfs_placement(
mesh_path = os.path.join(asset_dir, mesh_path)
mesh_info[node]["path"] = mesh_path
mesh = trimesh.load(mesh_path)
vertices = mesh.vertices
z1 = np.percentile(vertices[:, 1], 1)
z2 = np.percentile(vertices[:, 1], 99)
rotation = R.from_euler("xyz", init_rpy, degrees=False)
vertices = mesh.vertices @ rotation.as_matrix().T
z1 = np.percentile(vertices[:, 2], 1)
z2 = np.percentile(vertices[:, 2], 99)
if object_mapping[node] == Scene3DItemEnum.CONTEXT.value:
object_quat = [0, 0, 0, 1]
if rotate_context:
angle_rad = np.random.uniform(0, 2 * np.pi)
object_quat = compute_axis_rotation_quat(
axis="z", angle_rad=angle_rad
)
rotation = R.from_quat(object_quat).as_matrix()
vertices = vertices @ rotation.T
mesh_info[node]["surface"] = compute_convex_hull_path(vertices)
# Put robot in the CONTEXT edge.
x, y = random.choice(mesh_info[node]["surface"].vertices)
theta = np.arctan2(y, x)
@ -288,9 +323,7 @@ def bfs_placement(
axis="z", angle_rad=angle_rad
)
rotation = R.from_quat(object_quat).as_matrix()
vertices = np.dot(mesh.vertices, rotation.T)
z1 = np.percentile(vertices[:, 1], 1)
z2 = np.percentile(vertices[:, 1], 99)
vertices = vertices @ rotation.T
x1, x2, y1, y2 = compute_xy_bbox(vertices)
mesh_info[node]["pose"] = [x1, x2, y1, y2, z1, z2, *object_quat]

View File

@ -552,9 +552,8 @@ class SemanticMatcher(BaseChecker):
def test_semantic_matcher(
bg_file: str = "outputs/bg_scenes/bg_scene_list.txt",
bg_file: str = "outputs/bg_scenes/scene_list.txt",
):
bg_file = "outputs/bg_scenes/bg_scene_list.txt"
scene_dict = {}
with open(bg_file, "r") as f:
for line in f:

View File

@ -23,3 +23,7 @@ layout-cli --task_descs "Place the pen in the mug on the desk" \
--bg_list "outputs/bg_scenes/scene_list.txt" \
--output_root "${output_dir}/layouts_gen" --insert_robot
python embodied_gen/scripts/compose_layout.py \
--layout_path "outputs/layouts_gen/task_0000/layout.json" \
--output_dir "outputs/layouts_gen/task_0000/recompose" --insert_robot