diff --git a/embodied_gen/data/convex_decomposer.py b/embodied_gen/data/convex_decomposer.py index 648f72a..57af815 100644 --- a/embodied_gen/data/convex_decomposer.py +++ b/embodied_gen/data/convex_decomposer.py @@ -37,6 +37,7 @@ def decompose_convex_coacd( params: dict, verbose: bool = False, auto_scale: bool = True, + scale_factor: float = 1.0, ) -> None: coacd.set_log_level("info" if verbose else "warn") @@ -53,6 +54,7 @@ def decompose_convex_coacd( rescale = visual_mesh_shape / convex_mesh_shape combined.vertices *= rescale + combined.vertices *= scale_factor combined.export(outfile) @@ -71,6 +73,7 @@ def decompose_convex_mesh( merge: bool = True, seed: int = 0, auto_scale: bool = True, + scale_factor: float = 1.005, verbose: bool = False, ) -> str: """Decompose a mesh into convex parts using the CoACD algorithm.""" @@ -95,7 +98,9 @@ def decompose_convex_mesh( ) try: - decompose_convex_coacd(filename, outfile, params, verbose, auto_scale) + decompose_convex_coacd( + filename, outfile, params, verbose, auto_scale, scale_factor + ) if os.path.exists(outfile): return outfile except Exception as e: @@ -106,7 +111,7 @@ def decompose_convex_mesh( try: params["preprocess_mode"] = "on" decompose_convex_coacd( - filename, outfile, params, verbose, auto_scale + filename, outfile, params, verbose, auto_scale, scale_factor ) if os.path.exists(outfile): return outfile diff --git a/embodied_gen/envs/pick_embodiedgen.py b/embodied_gen/envs/pick_embodiedgen.py index b8fde92..b654bcc 100644 --- a/embodied_gen/envs/pick_embodiedgen.py +++ b/embodied_gen/envs/pick_embodiedgen.py @@ -74,7 +74,9 @@ class PickEmbodiedGen(BaseEnv): layout_file = kwargs.pop("layout_file", None) replace_objs = kwargs.pop("replace_objs", True) self.enable_grasp = kwargs.pop("enable_grasp", False) - self.init_quat = kwargs.pop("init_quat", [0.7071, 0, 0, 0.7071]) + self.init_3dgs_quat = kwargs.pop( + "init_3dgs_quat", [0.7071, 0, 0, 0.7071] + ) # Add small offset in z-axis to avoid collision. self.objs_z_offset = kwargs.pop("objs_z_offset", 0.002) self.robot_z_offset = kwargs.pop("robot_z_offset", 0.002) @@ -107,7 +109,7 @@ class PickEmbodiedGen(BaseEnv): self.bg_images = dict() if self.render_mode == "hybrid": self.bg_images = self.render_gs3d_images( - self.layouts, num_envs, self.init_quat + self.layouts, num_envs, self.init_3dgs_quat ) @staticmethod diff --git a/embodied_gen/models/layout.py b/embodied_gen/models/layout.py index 97ea38b..c3e9f23 100644 --- a/embodied_gen/models/layout.py +++ b/embodied_gen/models/layout.py @@ -77,9 +77,9 @@ LAYOUT_DISASSEMBLE_PROMPT = f""" - {Scene3DItemEnum.MANIPULATED_OBJS} and {Scene3DItemEnum.DISTRACTOR_OBJS} must be common household or office items or furniture, not abstract concepts, not too small like needle. - If the input includes a plural or grouped object (e.g., "pens", "bottles", "plates", "fruit"), - you must decompose it into multiple individual instances (e.g., ["pen", "pen"], ["apple", "pear"]). + you must decompose it into multiple individual instances (e.g., ["pen1", "pen2"], ["apple", "pear"]). - Containers that hold objects (e.g., "bowl of apples", "box of tools") must - be separated into individual items (e.g., ["bowl", "apple", "apple"]). + be separated into individual items (e.g., ["bowl", "apple1", "apple2"]). - Do not include transparent objects such as "glass", "plastic", etc. - The output must be in compact JSON format and use Markdown syntax, just like the output in the example below. @@ -170,7 +170,7 @@ LAYOUT_DISASSEMBLE_PROMPT = f""" "robot": "franka", "background": "office", "context": "table", - "manipulated_objs": ["pen", "pen", "grey bowl"], + "manipulated_objs": ["pen1", "pen2", "grey bowl"], "distractor_objs": ["notepad", "cup"] }} ``` diff --git a/embodied_gen/scripts/compose_layout.py b/embodied_gen/scripts/compose_layout.py index 4875574..0dae279 100644 --- a/embodied_gen/scripts/compose_layout.py +++ b/embodied_gen/scripts/compose_layout.py @@ -16,6 +16,7 @@ import json import os +import shutil from dataclasses import dataclass import tyro @@ -51,6 +52,12 @@ def entrypoint(**kwargs): out_layout_path = f"{output_dir}/layout.json" layout_info = bfs_placement(args.layout_path, seed=args.seed) + origin_dir = os.path.dirname(args.layout_path) + for key in layout_info.assets: + src = f"{origin_dir}/{layout_info.assets[key]}" + dst = f"{output_dir}/{layout_info.assets[key]}" + shutil.copytree(src, dst, dirs_exist_ok=True) + with open(out_layout_path, "w") as f: json.dump(layout_info.to_dict(), f, indent=4) diff --git a/embodied_gen/scripts/simulate_sapien.py b/embodied_gen/scripts/simulate_sapien.py index f62c950..8f25c47 100644 --- a/embodied_gen/scripts/simulate_sapien.py +++ b/embodied_gen/scripts/simulate_sapien.py @@ -49,7 +49,7 @@ class SapienSimConfig: sim_freq: int = 200 sim_step: int = 400 z_offset: float = 0.004 - init_quat: list[float] = field( + init_3dgs_quat: list[float] = field( default_factory=lambda: [0.7071, 0, 0, 0.7071] ) # xyzw device: str = "cuda" @@ -137,7 +137,7 @@ def entrypoint(**kwargs): gs_path = f"{asset_root}/{layout_data.assets[bg_node]}/gs_model.ply" gs_model: GaussianOperator = GaussianOperator.load_from_ply(gs_path) x, y, z, qx, qy, qz, qw = layout_data.position[bg_node] - qx, qy, qz, qw = quaternion_multiply([qx, qy, qz, qw], cfg.init_quat) + qx, qy, qz, qw = quaternion_multiply([qx, qy, qz, qw], cfg.init_3dgs_quat) init_pose = torch.tensor([x, y, z, qx, qy, qz, qw]) gs_model = gs_model.get_gaussians(instance_pose=init_pose) diff --git a/embodied_gen/utils/geometry.py b/embodied_gen/utils/geometry.py index b1059ea..c265db6 100644 --- a/embodied_gen/utils/geometry.py +++ b/embodied_gen/utils/geometry.py @@ -80,7 +80,7 @@ def pose_to_matrix(pose: list[float]) -> np.ndarray: def compute_xy_bbox( - vertices: np.ndarray, col_x: int = 0, col_y: int = 2 + vertices: np.ndarray, col_x: int = 0, col_y: int = 1 ) -> list[float]: x_vals = vertices[:, col_x] y_vals = vertices[:, col_y] @@ -139,11 +139,14 @@ def compute_convex_hull_path( z_threshold: float = 0.05, interp_per_edge: int = 3, margin: float = -0.02, + x_axis: int = 0, + y_axis: int = 1, + z_axis: int = 2, ) -> Path: top_vertices = vertices[ - vertices[:, 1] > vertices[:, 1].max() - z_threshold + vertices[:, z_axis] > vertices[:, z_axis].max() - z_threshold ] - top_xy = top_vertices[:, [0, 2]] + top_xy = top_vertices[:, [x_axis, y_axis]] if len(top_xy) < 3: raise ValueError("Not enough points to form a convex hull") @@ -184,11 +187,11 @@ def all_corners_inside(hull: Path, box: list, threshold: int = 3) -> bool: def compute_axis_rotation_quat( axis: Literal["x", "y", "z"], angle_rad: float ) -> list[float]: - if axis.lower() == 'x': + if axis.lower() == "x": q = Quaternion(axis=[1, 0, 0], angle=angle_rad) - elif axis.lower() == 'y': + elif axis.lower() == "y": q = Quaternion(axis=[0, 1, 0], angle=angle_rad) - elif axis.lower() == 'z': + elif axis.lower() == "z": q = Quaternion(axis=[0, 0, 1], angle=angle_rad) else: raise ValueError(f"Unsupported axis '{axis}', must be one of x, y, z") @@ -226,12 +229,34 @@ def bfs_placement( floor_margin: float = 0, beside_margin: float = 0.1, max_attempts: int = 3000, + init_rpy: tuple = (1.5708, 0.0, 0.0), rotate_objs: bool = True, rotate_bg: bool = True, + rotate_context: bool = True, limit_reach_range: bool = True, robot_dim: float = 0.12, seed: int = None, ) -> LayoutInfo: + """Place objects in the layout using BFS traversal. + + Args: + layout_file: Path to the JSON file defining the layout structure and assets. + floor_margin: Z-offset for the background object, typically for objects placed on the floor. + beside_margin: Minimum margin for objects placed 'beside' their parent, used when 'on' placement fails. + max_attempts: Maximum number of attempts to find a non-overlapping position for an object. + init_rpy: Initial Roll-Pitch-Yaw rotation rad applied to all object meshes to align the mesh's + coordinate system with the world's (e.g., Z-up). + rotate_objs: If True, apply a random rotation around the Z-axis for manipulated and distractor objects. + rotate_bg: If True, apply a random rotation around the Y-axis for the background object. + rotate_context: If True, apply a random rotation around the Z-axis for the context object. + limit_reach_range: If True, enforce a check that manipulated objects are within the robot's reach. + robot_dim: The approximate dimension (e.g., diameter) of the robot for box representation. + seed: Random seed for reproducible placement. + + Returns: + A :class:`LayoutInfo` object containing the objects and their final computed 7D poses + ([x, y, z, qx, qy, qz, qw]). + """ layout_info = LayoutInfo.from_dict(json.load(open(layout_file, "r"))) asset_dir = os.path.dirname(layout_file) object_mapping = layout_info.objs_mapping @@ -259,13 +284,23 @@ def bfs_placement( mesh_path = os.path.join(asset_dir, mesh_path) mesh_info[node]["path"] = mesh_path mesh = trimesh.load(mesh_path) - vertices = mesh.vertices - z1 = np.percentile(vertices[:, 1], 1) - z2 = np.percentile(vertices[:, 1], 99) + rotation = R.from_euler("xyz", init_rpy, degrees=False) + vertices = mesh.vertices @ rotation.as_matrix().T + z1 = np.percentile(vertices[:, 2], 1) + z2 = np.percentile(vertices[:, 2], 99) if object_mapping[node] == Scene3DItemEnum.CONTEXT.value: object_quat = [0, 0, 0, 1] + if rotate_context: + angle_rad = np.random.uniform(0, 2 * np.pi) + object_quat = compute_axis_rotation_quat( + axis="z", angle_rad=angle_rad + ) + rotation = R.from_quat(object_quat).as_matrix() + vertices = vertices @ rotation.T + mesh_info[node]["surface"] = compute_convex_hull_path(vertices) + # Put robot in the CONTEXT edge. x, y = random.choice(mesh_info[node]["surface"].vertices) theta = np.arctan2(y, x) @@ -288,9 +323,7 @@ def bfs_placement( axis="z", angle_rad=angle_rad ) rotation = R.from_quat(object_quat).as_matrix() - vertices = np.dot(mesh.vertices, rotation.T) - z1 = np.percentile(vertices[:, 1], 1) - z2 = np.percentile(vertices[:, 1], 99) + vertices = vertices @ rotation.T x1, x2, y1, y2 = compute_xy_bbox(vertices) mesh_info[node]["pose"] = [x1, x2, y1, y2, z1, z2, *object_quat] diff --git a/embodied_gen/validators/quality_checkers.py b/embodied_gen/validators/quality_checkers.py index d7642c4..e289310 100644 --- a/embodied_gen/validators/quality_checkers.py +++ b/embodied_gen/validators/quality_checkers.py @@ -552,9 +552,8 @@ class SemanticMatcher(BaseChecker): def test_semantic_matcher( - bg_file: str = "outputs/bg_scenes/bg_scene_list.txt", + bg_file: str = "outputs/bg_scenes/scene_list.txt", ): - bg_file = "outputs/bg_scenes/bg_scene_list.txt" scene_dict = {} with open(bg_file, "r") as f: for line in f: diff --git a/tests/test_integration/test_pipeline.sh b/tests/test_integration/test_pipeline.sh index 12cca6f..7135723 100644 --- a/tests/test_integration/test_pipeline.sh +++ b/tests/test_integration/test_pipeline.sh @@ -23,3 +23,7 @@ layout-cli --task_descs "Place the pen in the mug on the desk" \ --bg_list "outputs/bg_scenes/scene_list.txt" \ --output_root "${output_dir}/layouts_gen" --insert_robot + +python embodied_gen/scripts/compose_layout.py \ +--layout_path "outputs/layouts_gen/task_0000/layout.json" \ +--output_dir "outputs/layouts_gen/task_0000/recompose" --insert_robot \ No newline at end of file