feat(urdf): Improve the scale restoration logic to make it more robust.(#17)

Improve the scale restoration logic to make it more robust.
This commit is contained in:
Xinjie 2025-06-27 00:39:42 +08:00 committed by GitHub
parent 52983c8de2
commit e8de0e44df
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 55 additions and 37 deletions

View File

@ -40,6 +40,8 @@ from common import (
) )
with gr.Blocks(delete_cache=(43200, 43200), theme=custom_theme) as demo: with gr.Blocks(delete_cache=(43200, 43200), theme=custom_theme) as demo:
gr.HTML(image_css, visible=False)
gr.HTML(lighting_css, visible=False)
gr.Markdown( gr.Markdown(
""" """
## ***EmbodiedGen***: Image-to-3D Asset ## ***EmbodiedGen***: Image-to-3D Asset
@ -54,21 +56,18 @@ with gr.Blocks(delete_cache=(43200, 43200), theme=custom_theme) as demo:
<a href="https://github.com/HorizonRobotics/EmbodiedGen"> <a href="https://github.com/HorizonRobotics/EmbodiedGen">
<img alt="💻 GitHub" src="https://img.shields.io/badge/GitHub-000000?logo=github"> <img alt="💻 GitHub" src="https://img.shields.io/badge/GitHub-000000?logo=github">
</a> </a>
<a href="https://www.youtube.com/watch?v=SnHhzHeb_aI"> <a href="https://www.youtube.com/watch?v=rG4odybuJRk">
<img alt="🎥 Video" src="https://img.shields.io/badge/🎥-Video-red"> <img alt="🎥 Video" src="https://img.shields.io/badge/🎥-Video-red">
</a> </a>
</p> </p>
🖼 Generate physically plausible 3D asset from single input image. 🖼 Generate physically plausible 3D asset from single input image.
""".format( """.format(
VERSION=VERSION VERSION=VERSION
), ),
elem_classes=["header"], elem_classes=["header"],
) )
gr.HTML(image_css)
gr.HTML(lighting_css)
with gr.Row(): with gr.Row():
with gr.Column(scale=2): with gr.Column(scale=2):
with gr.Tabs() as input_tabs: with gr.Tabs() as input_tabs:
@ -239,9 +238,8 @@ with gr.Blocks(delete_cache=(43200, 43200), theme=custom_theme) as demo:
) )
gr.Markdown( gr.Markdown(
""" NOTE: If `Asset Attributes` are provided, the provided """ NOTE: If `Asset Attributes` are provided, it will guide
properties will be used; otherwise, the GPT-preset properties GPT to perform physical attributes restoration. \n
will be applied. \n
The `Download URDF` file is restored to the real scale and The `Download URDF` file is restored to the real scale and
has quality inspection, open with an editor to view details. has quality inspection, open with an editor to view details.
""" """
@ -279,6 +277,7 @@ with gr.Blocks(delete_cache=(43200, 43200), theme=custom_theme) as demo:
examples_per_page=10, examples_per_page=10,
) )
with gr.Column(scale=1): with gr.Column(scale=1):
gr.Markdown("<br>")
video_output = gr.Video( video_output = gr.Video(
label="Generated 3D Asset", label="Generated 3D Asset",
autoplay=True, autoplay=True,

View File

@ -40,6 +40,8 @@ from common import (
) )
with gr.Blocks(delete_cache=(43200, 43200), theme=custom_theme) as demo: with gr.Blocks(delete_cache=(43200, 43200), theme=custom_theme) as demo:
gr.HTML(image_css, visible=False)
gr.HTML(lighting_css, visible=False)
gr.Markdown( gr.Markdown(
""" """
## ***EmbodiedGen***: Text-to-3D Asset ## ***EmbodiedGen***: Text-to-3D Asset
@ -54,20 +56,18 @@ with gr.Blocks(delete_cache=(43200, 43200), theme=custom_theme) as demo:
<a href="https://github.com/HorizonRobotics/EmbodiedGen"> <a href="https://github.com/HorizonRobotics/EmbodiedGen">
<img alt="💻 GitHub" src="https://img.shields.io/badge/GitHub-000000?logo=github"> <img alt="💻 GitHub" src="https://img.shields.io/badge/GitHub-000000?logo=github">
</a> </a>
<a href="https://www.youtube.com/watch?v=SnHhzHeb_aI"> <a href="https://www.youtube.com/watch?v=rG4odybuJRk">
<img alt="🎥 Video" src="https://img.shields.io/badge/🎥-Video-red"> <img alt="🎥 Video" src="https://img.shields.io/badge/🎥-Video-red">
</a> </a>
</p> </p>
📝 Create 3D assets from text descriptions for a wide range of geometry and styles. 📝 Create 3D assets from text descriptions for a wide range of geometry and styles.
""".format( """.format(
VERSION=VERSION VERSION=VERSION
), ),
elem_classes=["header"], elem_classes=["header"],
) )
gr.HTML(image_css)
gr.HTML(lighting_css)
with gr.Row(): with gr.Row():
with gr.Column(scale=1): with gr.Column(scale=1):
raw_image_cache = gr.Image( raw_image_cache = gr.Image(
@ -267,8 +267,8 @@ with gr.Blocks(delete_cache=(43200, 43200), theme=custom_theme) as demo:
visible=False, visible=False,
) )
gr.Markdown( gr.Markdown(
"The generated image may be of poor quality due to auto " "Generated image may be poor quality due to auto seg."
"segmentation. Try adjusting the text prompt or seed." "Retry by adjusting text prompt, seed or switch seg model in `Image Gen Settings`."
) )
with gr.Row(): with gr.Row():
video_output = gr.Video( video_output = gr.Video(

View File

@ -50,6 +50,8 @@ def active_btn_by_content(mesh_content: gr.Model3D, text_content: gr.Textbox):
with gr.Blocks(delete_cache=(43200, 43200), theme=custom_theme) as demo: with gr.Blocks(delete_cache=(43200, 43200), theme=custom_theme) as demo:
gr.HTML(image_css, visible=False)
gr.HTML(lighting_css, visible=False)
gr.Markdown( gr.Markdown(
""" """
## ***EmbodiedGen***: Texture Generation ## ***EmbodiedGen***: Texture Generation
@ -64,30 +66,33 @@ with gr.Blocks(delete_cache=(43200, 43200), theme=custom_theme) as demo:
<a href="https://github.com/HorizonRobotics/EmbodiedGen"> <a href="https://github.com/HorizonRobotics/EmbodiedGen">
<img alt="💻 GitHub" src="https://img.shields.io/badge/GitHub-000000?logo=github"> <img alt="💻 GitHub" src="https://img.shields.io/badge/GitHub-000000?logo=github">
</a> </a>
<a href="https://www.youtube.com/watch?v=SnHhzHeb_aI"> <a href="https://www.youtube.com/watch?v=rG4odybuJRk">
<img alt="🎥 Video" src="https://img.shields.io/badge/🎥-Video-red"> <img alt="🎥 Video" src="https://img.shields.io/badge/🎥-Video-red">
</a> </a>
</p> </p>
🎨 Generate visually rich textures for 3D mesh. 🎨 Generate visually rich textures for 3D mesh.
""".format( """.format(
VERSION=VERSION VERSION=VERSION
), ),
elem_classes=["header"], elem_classes=["header"],
) )
gr.HTML(image_css)
gr.HTML(lighting_css)
with gr.Row(): with gr.Row():
with gr.Column(scale=1): with gr.Column(scale=1):
gr.Markdown(
"You can select input in `Mesh Gallery` at page bottom."
)
mesh_input = gr.Model3D( mesh_input = gr.Model3D(
label="Upload Mesh File(.obj or .glb)", height=300 label="Upload Mesh File(.obj or .glb)", height=270
) )
local_mesh = gr.Textbox(visible=False) local_mesh = gr.Textbox(visible=False)
text_prompt = gr.Textbox( text_prompt = gr.Textbox(
label="Text Prompt (Chinese or English)", label="Text Prompt (Chinese or English)",
placeholder="Input text prompt here", placeholder="Input text prompt here",
) )
gr.Markdown("<br>")
ip_image = gr.Image( ip_image = gr.Image(
label="Reference Image(optional)", label="Reference Image(optional)",
format="png", format="png",
@ -97,8 +102,8 @@ with gr.Blocks(delete_cache=(43200, 43200), theme=custom_theme) as demo:
elem_classes=["image_fit"], elem_classes=["image_fit"],
) )
gr.Markdown( gr.Markdown(
"Note: The `reference image` is optional. If provided, please " "Note: The `reference image` is optional. If provided, "
"increase the `Condition Scale` in Generation Settings." "increase `Condition Scale` in Generation Settings."
) )
with gr.Accordion(label="Generation Settings", open=False): with gr.Accordion(label="Generation Settings", open=False):
@ -139,12 +144,6 @@ with gr.Blocks(delete_cache=(43200, 43200), theme=custom_theme) as demo:
512, 2048, label="Video Resolution", value=512, step=256 512, 2048, label="Video Resolution", value=512, step=256
) )
generate_mv_btn = gr.Button(
"🎨 1. Generate MV Images(~1min)",
variant="primary",
interactive=False,
)
with gr.Column(scale=3): with gr.Column(scale=3):
with gr.Row(): with gr.Row():
image_sample1 = gr.Image( image_sample1 = gr.Image(
@ -194,10 +193,10 @@ with gr.Blocks(delete_cache=(43200, 43200), theme=custom_theme) as demo:
visible=False, visible=False,
) )
gr.Markdown( # gr.Markdown(
"Note: Select samples with consistent textures from various " # "Note: Select samples with consistent textures from various "
"perspectives and no obvious reflections." # "perspectives and no obvious reflections."
) # )
with gr.Row(): with gr.Row():
with gr.Column(scale=1): with gr.Column(scale=1):
with gr.Row(): with gr.Row():
@ -222,6 +221,11 @@ with gr.Blocks(delete_cache=(43200, 43200), theme=custom_theme) as demo:
) )
with gr.Column(scale=1): with gr.Column(scale=1):
generate_mv_btn = gr.Button(
"🎨 1. Generate MV Images(~1min)",
variant="primary",
interactive=False,
)
texture_bake_btn = gr.Button( texture_bake_btn = gr.Button(
"🛠️ 2. Texture Baking(~2min)", "🛠️ 2. Texture Baking(~2min)",
variant="primary", variant="primary",
@ -237,7 +241,7 @@ with gr.Blocks(delete_cache=(43200, 43200), theme=custom_theme) as demo:
mesh_output = gr.Model3D( mesh_output = gr.Model3D(
label="Mesh Edit Result", label="Mesh Edit Result",
clear_color=[0.8, 0.8, 0.8, 1], clear_color=[0.8, 0.8, 0.8, 1],
height=380, height=340,
interactive=False, interactive=False,
elem_id="lighter_mesh", elem_id="lighter_mesh",
) )
@ -246,7 +250,7 @@ with gr.Blocks(delete_cache=(43200, 43200), theme=custom_theme) as demo:
label="Mesh Edit Video", label="Mesh Edit Video",
autoplay=True, autoplay=True,
loop=True, loop=True,
height=380, height=340,
) )
with gr.Row(): with gr.Row():

View File

@ -53,6 +53,9 @@ __all__ = [
] ]
PROMPT_APPEND = "Full view of one {}, no cropping, centered, no occlusion, isolated product photo, matte, 3D style, on a plain clean surface"
def download_kolors_weights(local_dir: str = "weights/Kolors") -> None: def download_kolors_weights(local_dir: str = "weights/Kolors") -> None:
logger.info(f"Download kolors weights from huggingface...") logger.info(f"Download kolors weights from huggingface...")
os.makedirs(local_dir, exist_ok=True) os.makedirs(local_dir, exist_ok=True)
@ -179,8 +182,9 @@ def text2img_gen(
ip_image_size: int = 512, ip_image_size: int = 512,
seed: int = None, seed: int = None,
) -> list[Image.Image]: ) -> list[Image.Image]:
prompt = "Single " + prompt + ", in the center of the image" # prompt = "Single " + prompt + ", in the center of the image"
prompt += ", high quality, high resolution, best quality, white background, 3D style" # noqa # prompt += ", high quality, high resolution, best quality, white background, 3D style" # noqa
prompt = PROMPT_APPEND.format(prompt.strip())
logger.info(f"Processing prompt: {prompt}") logger.info(f"Processing prompt: {prompt}")
generator = None generator = None

View File

@ -102,6 +102,7 @@ class URDFGenerator(object):
view_desc view_desc
+ """of the 3D object asset, + """of the 3D object asset,
category: {category}. category: {category}.
You are an expert in 3D object analysis and physical property estimation.
Give the category of this object asset (within 3 words), Give the category of this object asset (within 3 words),
(if category is already provided, use it directly), (if category is already provided, use it directly),
accurately describe this 3D object asset (within 15 words), accurately describe this 3D object asset (within 15 words),
@ -109,9 +110,19 @@ class URDFGenerator(object):
weight range (unit: kilogram), the average static friction weight range (unit: kilogram), the average static friction
coefficient of the object relative to rubber and the average coefficient of the object relative to rubber and the average
dynamic friction coefficient of the object relative to rubber. dynamic friction coefficient of the object relative to rubber.
Return response format as shown in Example. Return response format as shown in Output Example.
Example: IMPORTANT:
Inputed images are orthographic projection showing the front, left, right and back views,
the first image is always the front view. Use the object's pose and orientation in the
rendered images to estimate its **true vertical height as it appears in the image**,
not the real-world length or width of the object.
For example:
- A pen standing upright in the front view vertical height: 0.15-0.2 m
- A pen lying horizontally in the front view vertical height: 0.01-0.02 m
(based on its thickness in the image)
Output Example:
Category: cup Category: cup
Description: shiny golden cup with floral design Description: shiny golden cup with floral design
Height: 0.1-0.15 m Height: 0.1-0.15 m

View File

@ -30,7 +30,7 @@ realesrgan==0.3.0
pydantic==2.9.2 pydantic==2.9.2
vtk==9.3.1 vtk==9.3.1
spaces spaces
utils3d@git+https://github.com/EasternJournalist/utils3d.git@9a4eb15e4021b67b12c460c7057d642626897ec8 utils3d@git+https://github.com/EasternJournalist/utils3d.git#egg=9a4eb15
clip@git+https://github.com/openai/CLIP.git clip@git+https://github.com/openai/CLIP.git
kolors@git+https://github.com/Kwai-Kolors/Kolors.git#egg=038818d kolors@git+https://github.com/Kwai-Kolors/Kolors.git#egg=038818d
segment-anything@git+https://github.com/facebookresearch/segment-anything.git#egg=dca509f segment-anything@git+https://github.com/facebookresearch/segment-anything.git#egg=dca509f