fix(layout): Fix layout axis order after urdf origin rotation change. (#41)

2025-09-26 17:23:01 +08:00 · 2025-09-26 17:23:01 +08:00 · a34b1dacea
commit a34b1dacea
parent ee03a089b1
8 changed files with 73 additions and 23 deletions
--- a/embodied_gen/data/convex_decomposer.py
+++ b/embodied_gen/data/convex_decomposer.py
@ -37,6 +37,7 @@ def decompose_convex_coacd(
    params: dict,
    verbose: bool = False,
    auto_scale: bool = True,
    scale_factor: float = 1.0,
 ) -> None:
    coacd.set_log_level("info" if verbose else "warn")
@ -53,6 +54,7 @@ def decompose_convex_coacd(
        rescale = visual_mesh_shape / convex_mesh_shape
        combined.vertices *= rescale
    combined.vertices *= scale_factor
    combined.export(outfile)
@ -71,6 +73,7 @@ def decompose_convex_mesh(
    merge: bool = True,
    seed: int = 0,
    auto_scale: bool = True,
    scale_factor: float = 1.005,
    verbose: bool = False,
 ) -> str:
    """Decompose a mesh into convex parts using the CoACD algorithm."""
@ -95,7 +98,9 @@ def decompose_convex_mesh(
    )
    try:
-        decompose_convex_coacd(filename, outfile, params, verbose, auto_scale)
+        decompose_convex_coacd(
            filename, outfile, params, verbose, auto_scale, scale_factor
        )
        if os.path.exists(outfile):
            return outfile
    except Exception as e:
@ -106,7 +111,7 @@ def decompose_convex_mesh(
        try:
            params["preprocess_mode"] = "on"
            decompose_convex_coacd(
-                filename, outfile, params, verbose, auto_scale
+                filename, outfile, params, verbose, auto_scale, scale_factor
            )
            if os.path.exists(outfile):
                return outfile
--- a/embodied_gen/envs/pick_embodiedgen.py
+++ b/embodied_gen/envs/pick_embodiedgen.py
@ -74,7 +74,9 @@ class PickEmbodiedGen(BaseEnv):
        layout_file = kwargs.pop("layout_file", None)
        replace_objs = kwargs.pop("replace_objs", True)
        self.enable_grasp = kwargs.pop("enable_grasp", False)
-        self.init_quat = kwargs.pop("init_quat", [0.7071, 0, 0, 0.7071])
+        self.init_3dgs_quat = kwargs.pop(
            "init_3dgs_quat", [0.7071, 0, 0, 0.7071]
        )
        # Add small offset in z-axis to avoid collision.
        self.objs_z_offset = kwargs.pop("objs_z_offset", 0.002)
        self.robot_z_offset = kwargs.pop("robot_z_offset", 0.002)
@ -107,7 +109,7 @@ class PickEmbodiedGen(BaseEnv):
        self.bg_images = dict()
        if self.render_mode == "hybrid":
            self.bg_images = self.render_gs3d_images(
-                self.layouts, num_envs, self.init_quat
+                self.layouts, num_envs, self.init_3dgs_quat
            )
    @staticmethod
--- a/embodied_gen/models/layout.py
+++ b/embodied_gen/models/layout.py
@ -77,9 +77,9 @@ LAYOUT_DISASSEMBLE_PROMPT = f"""
    - {Scene3DItemEnum.MANIPULATED_OBJS} and {Scene3DItemEnum.DISTRACTOR_OBJS} must be common
        household or office items or furniture, not abstract concepts, not too small like needle.
    - If the input includes a plural or grouped object (e.g., "pens", "bottles", "plates", "fruit"),
-        you must decompose it into multiple individual instances (e.g., ["pen", "pen"], ["apple", "pear"]).
+        you must decompose it into multiple individual instances (e.g., ["pen1", "pen2"], ["apple", "pear"]).
    - Containers that hold objects (e.g., "bowl of apples", "box of tools") must
-        be separated into individual items (e.g., ["bowl", "apple", "apple"]).
+        be separated into individual items (e.g., ["bowl", "apple1", "apple2"]).
    - Do not include transparent objects such as "glass", "plastic", etc.
    - The output must be in compact JSON format and use Markdown syntax, just like the output in the example below.
@ -170,7 +170,7 @@ LAYOUT_DISASSEMBLE_PROMPT = f"""
        "robot": "franka",
        "background": "office",
        "context": "table",
-        "manipulated_objs": ["pen", "pen", "grey bowl"],
+        "manipulated_objs": ["pen1", "pen2", "grey bowl"],
        "distractor_objs": ["notepad", "cup"]
    }}
    ```
--- a/embodied_gen/scripts/compose_layout.py
+++ b/embodied_gen/scripts/compose_layout.py
@ -16,6 +16,7 @@
 import json
 import os
 import shutil
 from dataclasses import dataclass
 import tyro
@ -51,6 +52,12 @@ def entrypoint(**kwargs):
    out_layout_path = f"{output_dir}/layout.json"
    layout_info = bfs_placement(args.layout_path, seed=args.seed)
    origin_dir = os.path.dirname(args.layout_path)
    for key in layout_info.assets:
        src = f"{origin_dir}/{layout_info.assets[key]}"
        dst = f"{output_dir}/{layout_info.assets[key]}"
        shutil.copytree(src, dst, dirs_exist_ok=True)
    with open(out_layout_path, "w") as f:
        json.dump(layout_info.to_dict(), f, indent=4)
--- a/embodied_gen/scripts/simulate_sapien.py
+++ b/embodied_gen/scripts/simulate_sapien.py
@ -49,7 +49,7 @@ class SapienSimConfig:
    sim_freq: int = 200
    sim_step: int = 400
    z_offset: float = 0.004
-    init_quat: list[float] = field(
+    init_3dgs_quat: list[float] = field(
        default_factory=lambda: [0.7071, 0, 0, 0.7071]
    )  # xyzw
    device: str = "cuda"
@ -137,7 +137,7 @@ def entrypoint(**kwargs):
    gs_path = f"{asset_root}/{layout_data.assets[bg_node]}/gs_model.ply"
    gs_model: GaussianOperator = GaussianOperator.load_from_ply(gs_path)
    x, y, z, qx, qy, qz, qw = layout_data.position[bg_node]
-    qx, qy, qz, qw = quaternion_multiply([qx, qy, qz, qw], cfg.init_quat)
+    qx, qy, qz, qw = quaternion_multiply([qx, qy, qz, qw], cfg.init_3dgs_quat)
    init_pose = torch.tensor([x, y, z, qx, qy, qz, qw])
    gs_model = gs_model.get_gaussians(instance_pose=init_pose)
--- a/embodied_gen/utils/geometry.py
+++ b/embodied_gen/utils/geometry.py
@ -80,7 +80,7 @@ def pose_to_matrix(pose: list[float]) -> np.ndarray:
 def compute_xy_bbox(
-    vertices: np.ndarray, col_x: int = 0, col_y: int = 2
+    vertices: np.ndarray, col_x: int = 0, col_y: int = 1
 ) -> list[float]:
    x_vals = vertices[:, col_x]
    y_vals = vertices[:, col_y]
@ -139,11 +139,14 @@ def compute_convex_hull_path(
    z_threshold: float = 0.05,
    interp_per_edge: int = 3,
    margin: float = -0.02,
    x_axis: int = 0,
    y_axis: int = 1,
    z_axis: int = 2,
 ) -> Path:
    top_vertices = vertices[
-        vertices[:, 1] > vertices[:, 1].max() - z_threshold
+        vertices[:, z_axis] > vertices[:, z_axis].max() - z_threshold
    ]
-    top_xy = top_vertices[:, [0, 2]]
+    top_xy = top_vertices[:, [x_axis, y_axis]]
    if len(top_xy) < 3:
        raise ValueError("Not enough points to form a convex hull")
@ -184,11 +187,11 @@ def all_corners_inside(hull: Path, box: list, threshold: int = 3) -> bool:
 def compute_axis_rotation_quat(
    axis: Literal["x", "y", "z"], angle_rad: float
 ) -> list[float]:
-    if axis.lower() == 'x':
+    if axis.lower() == "x":
        q = Quaternion(axis=[1, 0, 0], angle=angle_rad)
-    elif axis.lower() == 'y':
+    elif axis.lower() == "y":
        q = Quaternion(axis=[0, 1, 0], angle=angle_rad)
-    elif axis.lower() == 'z':
+    elif axis.lower() == "z":
        q = Quaternion(axis=[0, 0, 1], angle=angle_rad)
    else:
        raise ValueError(f"Unsupported axis '{axis}', must be one of x, y, z")
@ -226,12 +229,34 @@ def bfs_placement(
    floor_margin: float = 0,
    beside_margin: float = 0.1,
    max_attempts: int = 3000,
    init_rpy: tuple = (1.5708, 0.0, 0.0),
    rotate_objs: bool = True,
    rotate_bg: bool = True,
    rotate_context: bool = True,
    limit_reach_range: bool = True,
    robot_dim: float = 0.12,
    seed: int = None,
 ) -> LayoutInfo:
    """Place objects in the layout using BFS traversal.
    Args:
        layout_file: Path to the JSON file defining the layout structure and assets.
        floor_margin: Z-offset for the background object, typically for objects placed on the floor.
        beside_margin: Minimum margin for objects placed 'beside' their parent, used when 'on' placement fails.
        max_attempts: Maximum number of attempts to find a non-overlapping position for an object.
        init_rpy: Initial Roll-Pitch-Yaw rotation rad applied to all object meshes to align the mesh's
            coordinate system with the world's (e.g., Z-up).
        rotate_objs: If True, apply a random rotation around the Z-axis for manipulated and distractor objects.
        rotate_bg: If True, apply a random rotation around the Y-axis for the background object.
        rotate_context: If True, apply a random rotation around the Z-axis for the context object.
        limit_reach_range: If True, enforce a check that manipulated objects are within the robot's reach.
        robot_dim: The approximate dimension (e.g., diameter) of the robot for box representation.
        seed: Random seed for reproducible placement.
    Returns:
        A :class:`LayoutInfo` object containing the objects and their final computed 7D poses
        ([x, y, z, qx, qy, qz, qw]).
    """
    layout_info = LayoutInfo.from_dict(json.load(open(layout_file, "r")))
    asset_dir = os.path.dirname(layout_file)
    object_mapping = layout_info.objs_mapping
@ -259,13 +284,23 @@ def bfs_placement(
        mesh_path = os.path.join(asset_dir, mesh_path)
        mesh_info[node]["path"] = mesh_path
        mesh = trimesh.load(mesh_path)
-        vertices = mesh.vertices
+        rotation = R.from_euler("xyz", init_rpy, degrees=False)
-        z1 = np.percentile(vertices[:, 1], 1)
+        vertices = mesh.vertices @ rotation.as_matrix().T
-        z2 = np.percentile(vertices[:, 1], 99)
+        z1 = np.percentile(vertices[:, 2], 1)
        z2 = np.percentile(vertices[:, 2], 99)
        if object_mapping[node] == Scene3DItemEnum.CONTEXT.value:
            object_quat = [0, 0, 0, 1]
            if rotate_context:
                angle_rad = np.random.uniform(0, 2 * np.pi)
                object_quat = compute_axis_rotation_quat(
                    axis="z", angle_rad=angle_rad
                )
                rotation = R.from_quat(object_quat).as_matrix()
                vertices = vertices @ rotation.T
            mesh_info[node]["surface"] = compute_convex_hull_path(vertices)
            # Put robot in the CONTEXT edge.
            x, y = random.choice(mesh_info[node]["surface"].vertices)
            theta = np.arctan2(y, x)
@ -288,9 +323,7 @@ def bfs_placement(
                axis="z", angle_rad=angle_rad
            )
            rotation = R.from_quat(object_quat).as_matrix()
-            vertices = np.dot(mesh.vertices, rotation.T)
+            vertices = vertices @ rotation.T
            z1 = np.percentile(vertices[:, 1], 1)
            z2 = np.percentile(vertices[:, 1], 99)
        x1, x2, y1, y2 = compute_xy_bbox(vertices)
        mesh_info[node]["pose"] = [x1, x2, y1, y2, z1, z2, *object_quat]
--- a/embodied_gen/validators/quality_checkers.py
+++ b/embodied_gen/validators/quality_checkers.py
@ -552,9 +552,8 @@ class SemanticMatcher(BaseChecker):
 def test_semantic_matcher(
-    bg_file: str = "outputs/bg_scenes/bg_scene_list.txt",
+    bg_file: str = "outputs/bg_scenes/scene_list.txt",
 ):
    bg_file = "outputs/bg_scenes/bg_scene_list.txt"
    scene_dict = {}
    with open(bg_file, "r") as f:
        for line in f:
--- a/tests/test_integration/test_pipeline.sh
+++ b/tests/test_integration/test_pipeline.sh
@ -23,3 +23,7 @@ layout-cli --task_descs "Place the pen in the mug on the desk" \
 --bg_list "outputs/bg_scenes/scene_list.txt" \
 --output_root "${output_dir}/layouts_gen" --insert_robot
 python embodied_gen/scripts/compose_layout.py \
 --layout_path "outputs/layouts_gen/task_0000/layout.json" \
 --output_dir "outputs/layouts_gen/task_0000/recompose" --insert_robot