feat(texture): Optimize backprojected texture quality and add texture-cli. (#38)

* feat(sim): Add auto scale in convex decomposition. * feat(texture): Optimize back-projected texture quality. * feat(texture): Add `texture-cli`.
2025-09-08 11:15:04 +08:00 · 2025-09-08 11:15:04 +08:00 · cf3b919b65
commit cf3b919b65
parent 768d1fbb1d
18 changed files with 266 additions and 67 deletions
--- a/README.md
+++ b/README.md
@ -147,15 +147,12 @@ python apps/texture_edit.py
 ### ⚡ API
 Support Chinese and English prompts.
 ```sh
-bash embodied_gen/scripts/texture_gen.sh \
+texture-cli --mesh_path "apps/assets/example_texture/meshes/robot_text.obj" \
-    --mesh_path "apps/assets/example_texture/meshes/robot_text.obj" \
+"apps/assets/example_texture/meshes/horse.obj" \
-    --prompt "举着牌子的写实风格机器人，大眼睛，牌子上写着“Hello”的文字" \
+--prompt "举着牌子的写实风格机器人，大眼睛，牌子上写着“Hello”的文字" \
-    --output_root "outputs/texture_gen/robot_text"
+"A gray horse head with flying mane and brown eyes" \
-
+--output_root "outputs/texture_gen" \
-bash embodied_gen/scripts/texture_gen.sh \
+--seed 0
    --mesh_path "apps/assets/example_texture/meshes/horse.obj" \
    --prompt "A gray horse head with flying mane and brown eyes" \
    --output_root "outputs/texture_gen/gray_horse"
 ```
 ---
@ -185,7 +182,7 @@ CUDA_VISIBLE_DEVICES=0 scene3d-cli \
 🚧 *Coming Soon*
-<img src="apps/assets/articulate.gif" alt="articulate" style="width: 430px;">
+<img src="apps/assets/articulate.gif" alt="articulate" style="width: 500px;">
 ---
--- a/apps/common.py
+++ b/apps/common.py
@ -503,7 +503,12 @@ def extract_3d_representations_v2(
        device="cpu",
    )
    color_path = os.path.join(user_dir, "color.png")
-    render_gs_api(aligned_gs_path, color_path)
+    render_gs_api(
        input_gs=aligned_gs_path,
        output_path=color_path,
        elevation=[20, -10, 60, -50],
        num_images=12,
    )
    mesh = trimesh.Trimesh(
        vertices=mesh_model.vertices.cpu().numpy(),
@ -524,6 +529,8 @@ def extract_3d_representations_v2(
        skip_fix_mesh=False,
        delight=enable_delight,
        texture_wh=[texture_size, texture_size],
        elevation=[20, -10, 60, -50],
        num_images=12,
    )
    mesh_glb_path = os.path.join(user_dir, f"{filename}.glb")
--- a/embodied_gen/data/backproject_v2.py
+++ b/embodied_gen/data/backproject_v2.py
@ -33,6 +33,7 @@ from embodied_gen.data.mesh_operator import MeshFixer
 from embodied_gen.data.utils import (
    CameraSetting,
    DiffrastRender,
    as_list,
    get_images_from_grid,
    init_kal_camera,
    normalize_vertices_array,
@ -41,6 +42,7 @@ from embodied_gen.data.utils import (
 )
 from embodied_gen.models.delight_model import DelightingModel
 from embodied_gen.models.sr_model import ImageRealESRGAN
 from embodied_gen.utils.process_media import vcat_pil_images
 logging.basicConfig(
    format="%(asctime)s - %(levelname)s - %(message)s", level=logging.INFO
@ -541,8 +543,9 @@ def parse_args():
    parser = argparse.ArgumentParser(description="Backproject texture")
    parser.add_argument(
        "--color_path",
        nargs="+",
        type=str,
-        help="Multiview color image in 6x512x512 file path",
+        help="Multiview color image in grid file paths",
    )
    parser.add_argument(
        "--mesh_path",
@ -559,7 +562,7 @@ def parse_args():
    )
    parser.add_argument(
        "--elevation",
-        nargs=2,
+        nargs="+",
        type=float,
        default=[20.0, -10.0],
        help="Elevation angles for the camera (default: [20.0, -10.0])",
@ -647,19 +650,23 @@ def entrypoint(
        fov=math.radians(args.fov),
        device=args.device,
    )
    view_weights = [1, 0.1, 0.02, 0.1, 1, 0.02]
-    color_grid = Image.open(args.color_path)
+    args.color_path = as_list(args.color_path)
-    if args.delight:
+    if args.delight and delight_model is None:
        if delight_model is None:
        delight_model = DelightingModel()
-        save_dir = os.path.dirname(args.output_path)
+
-        os.makedirs(save_dir, exist_ok=True)
+    color_grid = [Image.open(color_path) for color_path in args.color_path]
    color_grid = vcat_pil_images(color_grid, image_mode="RGBA")
    if args.delight:
        color_grid = delight_model(color_grid)
        if not args.no_save_delight_img:
-            color_grid.save(f"{save_dir}/color_grid_delight.png")
+            save_dir = os.path.dirname(args.output_path)
            os.makedirs(save_dir, exist_ok=True)
            color_grid.save(f"{save_dir}/color_delight.png")
    multiviews = get_images_from_grid(color_grid, img_size=512)
    view_weights = [1, 0.1, 0.02, 0.1, 1, 0.02]
    view_weights += [0.01] * (len(multiviews) - len(view_weights))
    # Use RealESRGAN_x4plus for x4 (512->2048) image super resolution.
    if imagesr_model is None:
@ -688,7 +695,7 @@ def entrypoint(
    texture_backer = TextureBacker(
        camera_params=camera_params,
        view_weights=view_weights,
-        render_wh=camera_params.resolution_hw,
+        render_wh=args.resolution_hw,
        texture_wh=args.texture_wh,
        smooth_texture=not args.no_smooth_texture,
    )
--- a/embodied_gen/data/differentiable_render.py
+++ b/embodied_gen/data/differentiable_render.py
@ -503,7 +503,7 @@ def parse_args():
        help="Whether to generate global normal .mp4 rendering file.",
    )
    parser.add_argument(
-        "--prompts",
+        "--video_prompts",
        type=str,
        nargs="+",
        default=None,
@ -579,7 +579,7 @@ def entrypoint(**kwargs) -> None:
        mesh_path=args.mesh_path,
        output_root=args.output_root,
        uuid=args.uuid,
-        prompts=args.prompts,
+        prompts=args.video_prompts,
    )
    return
--- a/embodied_gen/data/utils.py
+++ b/embodied_gen/data/utils.py
@ -28,7 +28,7 @@ import numpy as np
 import nvdiffrast.torch as dr
 import torch
 import torch.nn.functional as F
-from PIL import Image
+from PIL import Image, ImageEnhance
 try:
    from kolors.models.modeling_chatglm import ChatGLMModel
@ -698,6 +698,8 @@ def as_list(obj):
        return obj
    elif isinstance(obj, set):
        return list(obj)
    elif obj is None:
        return obj
    else:
        return [obj]
@ -742,6 +744,8 @@ def _compute_az_el_by_camera_params(
 ):
    num_view = camera_params.num_images // len(camera_params.elevation)
    view_interval = 2 * np.pi / num_view / 2
    if num_view == 1:
        view_interval = np.pi / 2
    azimuths = []
    elevations = []
    for idx, el in enumerate(camera_params.elevation):
@ -758,8 +762,13 @@ def _compute_az_el_by_camera_params(
    return azimuths, elevations
-def init_kal_camera(camera_params: CameraSetting) -> Camera:
+def init_kal_camera(
-    azimuths, elevations = _compute_az_el_by_camera_params(camera_params)
+    camera_params: CameraSetting,
    flip_az: bool = False,
 ) -> Camera:
    azimuths, elevations = _compute_az_el_by_camera_params(
        camera_params, flip_az
    )
    cam_pts = _compute_cam_pts_by_az_el(
        azimuths, elevations, camera_params.distance
    )
@ -856,13 +865,38 @@ def get_images_from_grid(
        image = Image.open(image)
    view_images = np.array(image)
-    view_images = np.concatenate(
+    height, width, _ = view_images.shape
-        [view_images[:img_size, ...], view_images[img_size:, ...]], axis=1
+    rows = height // img_size
-    )
+    cols = width // img_size
-    images = np.split(view_images, view_images.shape[1] // img_size, axis=1)
+    blocks = []
-    images = [Image.fromarray(img) for img in images]
+    for i in range(rows):
        for j in range(cols):
            block = view_images[
                i * img_size : (i + 1) * img_size,
                j * img_size : (j + 1) * img_size,
                :,
            ]
            blocks.append(Image.fromarray(block))
-    return images
+    return blocks
 def enhance_image(
    image: Image.Image,
    contrast_factor: float = 1.3,
    color_factor: float = 1.2,
    brightness_factor: float = 0.95,
 ) -> Image.Image:
    enhancer_contrast = ImageEnhance.Contrast(image)
    img_contrasted = enhancer_contrast.enhance(contrast_factor)
    enhancer_color = ImageEnhance.Color(img_contrasted)
    img_colored = enhancer_color.enhance(color_factor)
    enhancer_brightness = ImageEnhance.Brightness(img_colored)
    enhanced_image = enhancer_brightness.enhance(brightness_factor)
    return enhanced_image
 def post_process_texture(texture: np.ndarray, iter: int = 1) -> np.ndarray:
@ -872,7 +906,14 @@ def post_process_texture(texture: np.ndarray, iter: int = 1) -> np.ndarray:
            texture, d=5, sigmaColor=20, sigmaSpace=20
        )
-    return texture
+    texture = enhance_image(
        image=Image.fromarray(texture),
        contrast_factor=1.3,
        color_factor=1.2,
        brightness_factor=0.95,
    )
    return np.array(texture)
 def quat_mult(q1, q2):
--- a/embodied_gen/models/delight_model.py
+++ b/embodied_gen/models/delight_model.py
@ -29,6 +29,7 @@ from diffusers import (
 from huggingface_hub import snapshot_download
 from PIL import Image
 from embodied_gen.models.segment_model import RembgRemover
 from embodied_gen.utils.log import logger
 __all__ = [
    "DelightingModel",
@ -84,6 +85,7 @@ class DelightingModel(object):
    def _lazy_init_pipeline(self):
        if self.pipeline is None:
            logger.info("Loading Delighting Model...")
            pipeline = StableDiffusionInstructPix2PixPipeline.from_pretrained(
                self.model_path,
                torch_dtype=torch.float16,
--- a/embodied_gen/models/layout.py
+++ b/embodied_gen/models/layout.py
@ -43,7 +43,7 @@ __all__ = [
 ]
-DISTRACTOR_NUM = 3  # Maximum number of distractor objects allowed
+DISTRACTOR_NUM = 2  # Maximum number of distractor objects allowed
 LAYOUT_DISASSEMBLE_PROMPT = f"""
    You are an intelligent 3D scene planner. Given a natural language
    description of a robotic task, output a structured description of
--- a/embodied_gen/models/texture_model.py
+++ b/embodied_gen/models/texture_model.py
@ -29,6 +29,7 @@ from kolors.pipelines.pipeline_controlnet_xl_kolors_img2img import (
 )
 from transformers import CLIPImageProcessor, CLIPVisionModelWithProjection
 from embodied_gen.models.text_model import download_kolors_weights
 from embodied_gen.utils.log import logger
 __all__ = [
    "build_texture_gen_pipe",
@ -42,7 +43,7 @@ def build_texture_gen_pipe(
    device: str = "cuda",
 ) -> DiffusionPipeline:
    download_kolors_weights(f"{base_ckpt_dir}/Kolors")
-
+    logger.info(f"Load Kolors weights...")
    tokenizer = ChatGLMTokenizer.from_pretrained(
        f"{base_ckpt_dir}/Kolors/text_encoder"
    )
--- a/embodied_gen/scripts/gen_texture.py
+++ b/embodied_gen/scripts/gen_texture.py
@ -0,0 +1,123 @@
 import os
 import shutil
 from dataclasses import dataclass
 import tyro
 from embodied_gen.data.backproject_v2 import entrypoint as backproject_api
 from embodied_gen.data.differentiable_render import entrypoint as drender_api
 from embodied_gen.data.utils import as_list
 from embodied_gen.models.delight_model import DelightingModel
 from embodied_gen.models.sr_model import ImageRealESRGAN
 from embodied_gen.scripts.render_mv import (
    build_texture_gen_pipe,
 )
 from embodied_gen.scripts.render_mv import infer_pipe as render_mv_api
 from embodied_gen.utils.log import logger
@dataclass
 class TextureGenConfig:
    mesh_path: str | list[str]
    prompt: str | list[str]
    output_root: str
    controlnet_cond_scale: float = 0.7
    guidance_scale: float = 9
    strength: float = 0.9
    num_inference_steps: int = 40
    delight: bool = True
    seed: int = 0
    base_ckpt_dir: str = "./weights"
    texture_size: int = 2048
    ip_adapt_scale: float = 0.0
    ip_img_path: str | list[str] | None = None
 def entrypoint() -> None:
    cfg = tyro.cli(TextureGenConfig)
    cfg.mesh_path = as_list(cfg.mesh_path)
    cfg.prompt = as_list(cfg.prompt)
    cfg.ip_img_path = as_list(cfg.ip_img_path)
    assert len(cfg.mesh_path) == len(cfg.prompt)
    # Pre-load models.
    if cfg.ip_adapt_scale > 0:
        PIPELINE = build_texture_gen_pipe(
            base_ckpt_dir="./weights",
            ip_adapt_scale=cfg.ip_adapt_scale,
            device="cuda",
        )
    else:
        PIPELINE = build_texture_gen_pipe(
            base_ckpt_dir="./weights",
            ip_adapt_scale=0,
            device="cuda",
        )
    DELIGHT = None
    if cfg.delight:
        DELIGHT = DelightingModel()
    IMAGESR_MODEL = ImageRealESRGAN(outscale=4)
    for idx in range(len(cfg.mesh_path)):
        mesh_path = cfg.mesh_path[idx]
        prompt = cfg.prompt[idx]
        uuid = os.path.splitext(os.path.basename(mesh_path))[0]
        output_root = os.path.join(cfg.output_root, uuid)
        drender_api(
            mesh_path=mesh_path,
            output_root=f"{output_root}/condition",
            uuid=uuid,
        )
        render_mv_api(
            index_file=f"{output_root}/condition/index.json",
            controlnet_cond_scale=cfg.controlnet_cond_scale,
            guidance_scale=cfg.guidance_scale,
            strength=cfg.strength,
            num_inference_steps=cfg.num_inference_steps,
            ip_adapt_scale=cfg.ip_adapt_scale,
            ip_img_path=(
                None if cfg.ip_img_path is None else cfg.ip_img_path[idx]
            ),
            prompt=prompt,
            save_dir=f"{output_root}/multi_view",
            sub_idxs=[[0, 1, 2], [3, 4, 5]],
            pipeline=PIPELINE,
            seed=cfg.seed,
        )
        textured_mesh = backproject_api(
            delight_model=DELIGHT,
            imagesr_model=IMAGESR_MODEL,
            mesh_path=mesh_path,
            color_path=f"{output_root}/multi_view/color_sample0.png",
            output_path=f"{output_root}/texture_mesh/{uuid}.obj",
            save_glb_path=f"{output_root}/texture_mesh/{uuid}.glb",
            skip_fix_mesh=True,
            delight=cfg.delight,
            no_save_delight_img=True,
            texture_wh=[cfg.texture_size, cfg.texture_size],
        )
        drender_api(
            mesh_path=f"{output_root}/texture_mesh/{uuid}.obj",
            output_root=f"{output_root}/texture_mesh",
            uuid=uuid,
            num_images=90,
            elevation=[20],
            with_mtl=True,
            gen_color_mp4=True,
            pbr_light_factor=1.2,
        )
        # Re-organize folders
        shutil.rmtree(f"{output_root}/condition")
        shutil.copy(
            f"{output_root}/texture_mesh/{uuid}/color.mp4",
            f"{output_root}/color.mp4",
        )
        shutil.rmtree(f"{output_root}/texture_mesh/{uuid}")
        logger.info(
            f"Successfully generate textured mesh in {output_root}/texture_mesh"
        )
 if __name__ == "__main__":
    entrypoint()
--- a/embodied_gen/scripts/imageto3d.py
+++ b/embodied_gen/scripts/imageto3d.py
@ -108,6 +108,9 @@ def parse_args():
        default=2,
    )
    parser.add_argument("--disable_decompose_convex", action="store_true")
    parser.add_argument(
        "--texture_wh", type=int, nargs=2, default=[2048, 2048]
    )
    args, unknown = parser.parse_known_args()
    return args
@ -209,11 +212,17 @@ def entrypoint(**kwargs):
                    device="cpu",
                )
                color_path = os.path.join(output_root, "color.png")
-                render_gs_api(aligned_gs_path, color_path)
+                render_gs_api(
-
+                    input_gs=aligned_gs_path,
-                geo_flag, geo_result = GEO_CHECKER(
+                    output_path=color_path,
-                    [color_path], text=asset_node
+                    elevation=[20, -10, 60, -50],
                    num_images=12,
                )
                color_img = Image.open(color_path)
                keep_height = int(color_img.height * 2 / 3)
                crop_img = color_img.crop((0, 0, color_img.width, keep_height))
                geo_flag, geo_result = GEO_CHECKER([crop_img], text=asset_node)
                logger.warning(
                    f"{GEO_CHECKER.__class__.__name__}: {geo_result} for {seg_path}"
                )
@ -246,7 +255,9 @@ def entrypoint(**kwargs):
                output_path=mesh_obj_path,
                skip_fix_mesh=False,
                delight=True,
-                texture_wh=[2048, 2048],
+                texture_wh=args.texture_wh,
                elevation=[20, -10, 60, -50],
                num_images=12,
            )
            mesh_glb_path = os.path.join(output_root, f"{filename}.glb")
--- a/embodied_gen/scripts/render_gs.py
+++ b/embodied_gen/scripts/render_gs.py
@ -18,12 +18,11 @@
 import argparse
 import logging
 import math
 import os
 import cv2
 import numpy as np
 import spaces
 import torch
 from PIL import Image
 from tqdm import tqdm
 from embodied_gen.data.utils import (
    CameraSetting,
@ -31,6 +30,7 @@ from embodied_gen.data.utils import (
    normalize_vertices_array,
 )
 from embodied_gen.models.gs_model import GaussianOperator
 from embodied_gen.utils.process_media import combine_images_to_grid
 logging.basicConfig(
    format="%(asctime)s - %(levelname)s - %(message)s", level=logging.INFO
@ -104,7 +104,7 @@ def load_gs_model(
    # Normalize vertices to [-1, 1], center to (0, 0, 0).
    _, scale, center = normalize_vertices_array(gs_model._means)
    scale, center = float(scale), center.tolist()
-    transpose = [*[-v for v in center], *pre_quat]
+    transpose = [*[v for v in center], *pre_quat]
    instance_pose = torch.tensor(transpose).to(gs_model.device)
    gs_model = gs_model.get_gaussians(instance_pose=instance_pose)
    gs_model.rescale(scale)
@ -113,12 +113,11 @@ def load_gs_model(
@spaces.GPU
-def entrypoint(input_gs: str = None, output_path: str = None) -> None:
+def entrypoint(**kwargs) -> None:
    args = parse_args()
-    if isinstance(input_gs, str):
+    for k, v in kwargs.items():
-        args.input_gs = input_gs
+        if hasattr(args, k) and v is not None:
-    if isinstance(output_path, str):
+            setattr(args, k, v)
        args.output_path = output_path
    # Setup camera parameters
    camera_params = CameraSetting(
@ -129,7 +128,7 @@ def entrypoint(input_gs: str = None, output_path: str = None) -> None:
        fov=math.radians(args.fov),
        device=args.device,
    )
-    camera = init_kal_camera(camera_params)
+    camera = init_kal_camera(camera_params, flip_az=True)
    matrix_mv = camera.view_matrix()  # (n_cam 4 4) world2cam
    matrix_mv[:, :3, 3] = -matrix_mv[:, :3, 3]
    w2cs = matrix_mv.to(camera_params.device)
@ -153,21 +152,11 @@ def entrypoint(input_gs: str = None, output_path: str = None) -> None:
            (args.image_size, args.image_size),
            interpolation=cv2.INTER_AREA,
        )
-        images.append(color)
+        color = cv2.cvtColor(color, cv2.COLOR_BGRA2RGBA)
        images.append(Image.fromarray(color))
-    # Cat color images into grid image and save.
+    combine_images_to_grid(images, image_mode="RGBA")[0].save(args.output_path)
    select_idxs = [[0, 2, 1], [5, 4, 3]]  # fix order for 6 views
    grid_image = []
    for row_idxs in select_idxs:
        row_image = []
        for row_idx in row_idxs:
            row_image.append(images[row_idx])
        row_image = np.concatenate(row_image, axis=1)
        grid_image.append(row_image)
    grid_image = np.concatenate(grid_image, axis=0)
    os.makedirs(os.path.dirname(args.output_path), exist_ok=True)
    cv2.imwrite(args.output_path, grid_image)
    logger.info(f"Saved grid image to {args.output_path}")
--- a/embodied_gen/scripts/simulate_sapien.py
+++ b/embodied_gen/scripts/simulate_sapien.py
@ -170,7 +170,8 @@ def entrypoint(**kwargs):
        for node in actions:
            if actions[node] is None:
                continue
-            for action in tqdm(actions[node]):
+            logger.info(f"Render SIM grasping in camera {idx} for {node}...")
            for action in actions[node]:
                grasp_frames = scene_manager.step_action(
                    agent,
                    torch.Tensor(action[None, ...]),
--- a/embodied_gen/scripts/texture_gen.sh
+++ b/embodied_gen/scripts/texture_gen.sh
@ -28,6 +28,7 @@ if [[ -z "$mesh_path" || -z "$prompt" || -z "$output_root" ]]; then
    exit 1
 fi
 echo "Will be deprecated, recommended to use 'texture-cli' instead."
 uuid=$(basename "$output_root")
 # Step 1: drender-cli for condition rendering
 drender-cli --mesh_path ${mesh_path} \
--- a/embodied_gen/utils/process_media.py
+++ b/embodied_gen/utils/process_media.py
@ -49,6 +49,7 @@ __all__ = [
    "is_image_file",
    "parse_text_prompts",
    "check_object_edge_truncated",
    "vcat_pil_images",
 ]
@ -166,6 +167,7 @@ def combine_images_to_grid(
    images: list[str | Image.Image],
    cat_row_col: tuple[int, int] = None,
    target_wh: tuple[int, int] = (512, 512),
    image_mode: str = "RGB",
 ) -> list[Image.Image]:
    n_images = len(images)
    if n_images == 1:
@ -178,13 +180,13 @@ def combine_images_to_grid(
        n_row, n_col = cat_row_col
    images = [
-        Image.open(p).convert("RGB") if isinstance(p, str) else p
+        Image.open(p).convert(image_mode) if isinstance(p, str) else p
        for p in images
    ]
    images = [img.resize(target_wh) for img in images]
    grid_w, grid_h = n_col * target_wh[0], n_row * target_wh[1]
-    grid = Image.new("RGB", (grid_w, grid_h), (0, 0, 0))
+    grid = Image.new(image_mode, (grid_w, grid_h), (0, 0, 0))
    for idx, img in enumerate(images):
        row, col = divmod(idx, n_col)
@ -435,6 +437,21 @@ def check_object_edge_truncated(
    return not (top or bottom or left or right)
 def vcat_pil_images(
    images: list[Image.Image], image_mode: str = "RGB"
 ) -> Image.Image:
    widths, heights = zip(*(img.size for img in images))
    total_height = sum(heights)
    max_width = max(widths)
    new_image = Image.new(image_mode, (max_width, total_height))
    y_offset = 0
    for image in images:
        new_image.paste(image, (0, y_offset))
        y_offset += image.size[1]
    return new_image
 if __name__ == "__main__":
    image_paths = [
        "outputs/layouts_sim/task_0000/images/pen.png",
--- a/embodied_gen/validators/quality_checkers.py
+++ b/embodied_gen/validators/quality_checkers.py
@ -249,7 +249,7 @@ class SemanticConsistChecker(BaseChecker):
                fewer than four legs or if the legs are unevenly distributed, are not allowed. Do not assume
                hidden legs unless they are clearly visible.)
            - Geometric completeness is required: the object must not have missing, truncated, or cropped parts.
-            - The image must contain exactly one object. Multiple distinct objects are not allowed.
+            - The image must contain exactly one object. Multiple distinct objects (e.g. multiple pens) are not allowed.
                A single composite object (e.g., a chair with legs) is acceptable.
            - The object should be shown from a slightly angled (three-quarter) perspective,
                not a flat, front-facing view showing only one surface.
--- a/embodied_gen/validators/urdf_convertor.py
+++ b/embodied_gen/validators/urdf_convertor.py
@ -266,7 +266,7 @@ class URDFGenerator(object):
            if self.decompose_convex:
                try:
                    d_params = dict(
-                        threshold=0.05, max_convex_hull=64, verbose=False
+                        threshold=0.05, max_convex_hull=100, verbose=False
                    )
                    filename = f"{os.path.splitext(obj_name)[0]}_collision.ply"
                    output_path = os.path.join(mesh_folder, filename)
--- a/pyproject.toml
+++ b/pyproject.toml
@ -31,6 +31,7 @@ drender-cli = "embodied_gen.data.differentiable_render:entrypoint"
 backproject-cli = "embodied_gen.data.backproject_v2:entrypoint"
 img3d-cli = "embodied_gen.scripts.imageto3d:entrypoint"
 text3d-cli = "embodied_gen.scripts.textto3d:text_to_3d"
 texture-cli = "embodied_gen.scripts.gen_texture:entrypoint"
 scene3d-cli = "embodied_gen.scripts.gen_scene3d:entrypoint"
 layout-cli = "embodied_gen.scripts.gen_layout:entrypoint"
 sim-cli = "embodied_gen.scripts.simulate_sapien:entrypoint"
--- a/tests/test_examples/test_quality_checkers.py
+++ b/tests/test_examples/test_quality_checkers.py
@ -142,6 +142,7 @@ def test_semantic_checker(semantic_checker):
        ("desk", "outputs/utest_cases/semantic_checker/task_0016_desk.png"),
        ("shelf", "outputs/utest_cases/semantic_checker/task_0018_shelf.png"),
        ("table", "outputs/utest_cases/semantic_checker/task_0000_table.png"),
        ("pen", "outputs/layouts_gens2/task_0000/images/pen_raw.png"),
    ]
    for test_case in test_cases:
        flag, result = semantic_checker(