feat(urdf): Improve the scale restoration logic to make it more robust.(#17)
Improve the scale restoration logic to make it more robust.
This commit is contained in:
parent
52983c8de2
commit
e8de0e44df
@ -40,6 +40,8 @@ from common import (
|
||||
)
|
||||
|
||||
with gr.Blocks(delete_cache=(43200, 43200), theme=custom_theme) as demo:
|
||||
gr.HTML(image_css, visible=False)
|
||||
gr.HTML(lighting_css, visible=False)
|
||||
gr.Markdown(
|
||||
"""
|
||||
## ***EmbodiedGen***: Image-to-3D Asset
|
||||
@ -54,21 +56,18 @@ with gr.Blocks(delete_cache=(43200, 43200), theme=custom_theme) as demo:
|
||||
<a href="https://github.com/HorizonRobotics/EmbodiedGen">
|
||||
<img alt="💻 GitHub" src="https://img.shields.io/badge/GitHub-000000?logo=github">
|
||||
</a>
|
||||
<a href="https://www.youtube.com/watch?v=SnHhzHeb_aI">
|
||||
<a href="https://www.youtube.com/watch?v=rG4odybuJRk">
|
||||
<img alt="🎥 Video" src="https://img.shields.io/badge/🎥-Video-red">
|
||||
</a>
|
||||
</p>
|
||||
|
||||
🖼️ Generate physically plausible 3D asset from single input image.
|
||||
|
||||
""".format(
|
||||
VERSION=VERSION
|
||||
),
|
||||
elem_classes=["header"],
|
||||
)
|
||||
|
||||
gr.HTML(image_css)
|
||||
gr.HTML(lighting_css)
|
||||
with gr.Row():
|
||||
with gr.Column(scale=2):
|
||||
with gr.Tabs() as input_tabs:
|
||||
@ -239,9 +238,8 @@ with gr.Blocks(delete_cache=(43200, 43200), theme=custom_theme) as demo:
|
||||
)
|
||||
|
||||
gr.Markdown(
|
||||
""" NOTE: If `Asset Attributes` are provided, the provided
|
||||
properties will be used; otherwise, the GPT-preset properties
|
||||
will be applied. \n
|
||||
""" NOTE: If `Asset Attributes` are provided, it will guide
|
||||
GPT to perform physical attributes restoration. \n
|
||||
The `Download URDF` file is restored to the real scale and
|
||||
has quality inspection, open with an editor to view details.
|
||||
"""
|
||||
@ -279,6 +277,7 @@ with gr.Blocks(delete_cache=(43200, 43200), theme=custom_theme) as demo:
|
||||
examples_per_page=10,
|
||||
)
|
||||
with gr.Column(scale=1):
|
||||
gr.Markdown("<br>")
|
||||
video_output = gr.Video(
|
||||
label="Generated 3D Asset",
|
||||
autoplay=True,
|
||||
|
||||
@ -40,6 +40,8 @@ from common import (
|
||||
)
|
||||
|
||||
with gr.Blocks(delete_cache=(43200, 43200), theme=custom_theme) as demo:
|
||||
gr.HTML(image_css, visible=False)
|
||||
gr.HTML(lighting_css, visible=False)
|
||||
gr.Markdown(
|
||||
"""
|
||||
## ***EmbodiedGen***: Text-to-3D Asset
|
||||
@ -54,20 +56,18 @@ with gr.Blocks(delete_cache=(43200, 43200), theme=custom_theme) as demo:
|
||||
<a href="https://github.com/HorizonRobotics/EmbodiedGen">
|
||||
<img alt="💻 GitHub" src="https://img.shields.io/badge/GitHub-000000?logo=github">
|
||||
</a>
|
||||
<a href="https://www.youtube.com/watch?v=SnHhzHeb_aI">
|
||||
<a href="https://www.youtube.com/watch?v=rG4odybuJRk">
|
||||
<img alt="🎥 Video" src="https://img.shields.io/badge/🎥-Video-red">
|
||||
</a>
|
||||
</p>
|
||||
|
||||
📝 Create 3D assets from text descriptions for a wide range of geometry and styles.
|
||||
|
||||
""".format(
|
||||
VERSION=VERSION
|
||||
),
|
||||
elem_classes=["header"],
|
||||
)
|
||||
gr.HTML(image_css)
|
||||
gr.HTML(lighting_css)
|
||||
|
||||
with gr.Row():
|
||||
with gr.Column(scale=1):
|
||||
raw_image_cache = gr.Image(
|
||||
@ -267,8 +267,8 @@ with gr.Blocks(delete_cache=(43200, 43200), theme=custom_theme) as demo:
|
||||
visible=False,
|
||||
)
|
||||
gr.Markdown(
|
||||
"The generated image may be of poor quality due to auto "
|
||||
"segmentation. Try adjusting the text prompt or seed."
|
||||
"Generated image may be poor quality due to auto seg."
|
||||
"Retry by adjusting text prompt, seed or switch seg model in `Image Gen Settings`."
|
||||
)
|
||||
with gr.Row():
|
||||
video_output = gr.Video(
|
||||
|
||||
@ -50,6 +50,8 @@ def active_btn_by_content(mesh_content: gr.Model3D, text_content: gr.Textbox):
|
||||
|
||||
|
||||
with gr.Blocks(delete_cache=(43200, 43200), theme=custom_theme) as demo:
|
||||
gr.HTML(image_css, visible=False)
|
||||
gr.HTML(lighting_css, visible=False)
|
||||
gr.Markdown(
|
||||
"""
|
||||
## ***EmbodiedGen***: Texture Generation
|
||||
@ -64,30 +66,33 @@ with gr.Blocks(delete_cache=(43200, 43200), theme=custom_theme) as demo:
|
||||
<a href="https://github.com/HorizonRobotics/EmbodiedGen">
|
||||
<img alt="💻 GitHub" src="https://img.shields.io/badge/GitHub-000000?logo=github">
|
||||
</a>
|
||||
<a href="https://www.youtube.com/watch?v=SnHhzHeb_aI">
|
||||
<a href="https://www.youtube.com/watch?v=rG4odybuJRk">
|
||||
<img alt="🎥 Video" src="https://img.shields.io/badge/🎥-Video-red">
|
||||
</a>
|
||||
</p>
|
||||
|
||||
🎨 Generate visually rich textures for 3D mesh.
|
||||
|
||||
""".format(
|
||||
VERSION=VERSION
|
||||
),
|
||||
elem_classes=["header"],
|
||||
)
|
||||
gr.HTML(image_css)
|
||||
gr.HTML(lighting_css)
|
||||
|
||||
with gr.Row():
|
||||
with gr.Column(scale=1):
|
||||
gr.Markdown(
|
||||
"You can select input in `Mesh Gallery` at page bottom."
|
||||
)
|
||||
mesh_input = gr.Model3D(
|
||||
label="Upload Mesh File(.obj or .glb)", height=300
|
||||
label="Upload Mesh File(.obj or .glb)", height=270
|
||||
)
|
||||
local_mesh = gr.Textbox(visible=False)
|
||||
text_prompt = gr.Textbox(
|
||||
label="Text Prompt (Chinese or English)",
|
||||
placeholder="Input text prompt here",
|
||||
)
|
||||
gr.Markdown("<br>")
|
||||
|
||||
ip_image = gr.Image(
|
||||
label="Reference Image(optional)",
|
||||
format="png",
|
||||
@ -97,8 +102,8 @@ with gr.Blocks(delete_cache=(43200, 43200), theme=custom_theme) as demo:
|
||||
elem_classes=["image_fit"],
|
||||
)
|
||||
gr.Markdown(
|
||||
"Note: The `reference image` is optional. If provided, please "
|
||||
"increase the `Condition Scale` in Generation Settings."
|
||||
"Note: The `reference image` is optional. If provided, "
|
||||
"increase `Condition Scale` in Generation Settings."
|
||||
)
|
||||
|
||||
with gr.Accordion(label="Generation Settings", open=False):
|
||||
@ -139,12 +144,6 @@ with gr.Blocks(delete_cache=(43200, 43200), theme=custom_theme) as demo:
|
||||
512, 2048, label="Video Resolution", value=512, step=256
|
||||
)
|
||||
|
||||
generate_mv_btn = gr.Button(
|
||||
"🎨 1. Generate MV Images(~1min)",
|
||||
variant="primary",
|
||||
interactive=False,
|
||||
)
|
||||
|
||||
with gr.Column(scale=3):
|
||||
with gr.Row():
|
||||
image_sample1 = gr.Image(
|
||||
@ -194,10 +193,10 @@ with gr.Blocks(delete_cache=(43200, 43200), theme=custom_theme) as demo:
|
||||
visible=False,
|
||||
)
|
||||
|
||||
gr.Markdown(
|
||||
"Note: Select samples with consistent textures from various "
|
||||
"perspectives and no obvious reflections."
|
||||
)
|
||||
# gr.Markdown(
|
||||
# "Note: Select samples with consistent textures from various "
|
||||
# "perspectives and no obvious reflections."
|
||||
# )
|
||||
with gr.Row():
|
||||
with gr.Column(scale=1):
|
||||
with gr.Row():
|
||||
@ -222,6 +221,11 @@ with gr.Blocks(delete_cache=(43200, 43200), theme=custom_theme) as demo:
|
||||
)
|
||||
|
||||
with gr.Column(scale=1):
|
||||
generate_mv_btn = gr.Button(
|
||||
"🎨 1. Generate MV Images(~1min)",
|
||||
variant="primary",
|
||||
interactive=False,
|
||||
)
|
||||
texture_bake_btn = gr.Button(
|
||||
"🛠️ 2. Texture Baking(~2min)",
|
||||
variant="primary",
|
||||
@ -237,7 +241,7 @@ with gr.Blocks(delete_cache=(43200, 43200), theme=custom_theme) as demo:
|
||||
mesh_output = gr.Model3D(
|
||||
label="Mesh Edit Result",
|
||||
clear_color=[0.8, 0.8, 0.8, 1],
|
||||
height=380,
|
||||
height=340,
|
||||
interactive=False,
|
||||
elem_id="lighter_mesh",
|
||||
)
|
||||
@ -246,7 +250,7 @@ with gr.Blocks(delete_cache=(43200, 43200), theme=custom_theme) as demo:
|
||||
label="Mesh Edit Video",
|
||||
autoplay=True,
|
||||
loop=True,
|
||||
height=380,
|
||||
height=340,
|
||||
)
|
||||
|
||||
with gr.Row():
|
||||
|
||||
@ -53,6 +53,9 @@ __all__ = [
|
||||
]
|
||||
|
||||
|
||||
PROMPT_APPEND = "Full view of one {}, no cropping, centered, no occlusion, isolated product photo, matte, 3D style, on a plain clean surface"
|
||||
|
||||
|
||||
def download_kolors_weights(local_dir: str = "weights/Kolors") -> None:
|
||||
logger.info(f"Download kolors weights from huggingface...")
|
||||
os.makedirs(local_dir, exist_ok=True)
|
||||
@ -179,8 +182,9 @@ def text2img_gen(
|
||||
ip_image_size: int = 512,
|
||||
seed: int = None,
|
||||
) -> list[Image.Image]:
|
||||
prompt = "Single " + prompt + ", in the center of the image"
|
||||
prompt += ", high quality, high resolution, best quality, white background, 3D style" # noqa
|
||||
# prompt = "Single " + prompt + ", in the center of the image"
|
||||
# prompt += ", high quality, high resolution, best quality, white background, 3D style" # noqa
|
||||
prompt = PROMPT_APPEND.format(prompt.strip())
|
||||
logger.info(f"Processing prompt: {prompt}")
|
||||
|
||||
generator = None
|
||||
|
||||
@ -102,6 +102,7 @@ class URDFGenerator(object):
|
||||
view_desc
|
||||
+ """of the 3D object asset,
|
||||
category: {category}.
|
||||
You are an expert in 3D object analysis and physical property estimation.
|
||||
Give the category of this object asset (within 3 words),
|
||||
(if category is already provided, use it directly),
|
||||
accurately describe this 3D object asset (within 15 words),
|
||||
@ -109,9 +110,19 @@ class URDFGenerator(object):
|
||||
weight range (unit: kilogram), the average static friction
|
||||
coefficient of the object relative to rubber and the average
|
||||
dynamic friction coefficient of the object relative to rubber.
|
||||
Return response format as shown in Example.
|
||||
Return response format as shown in Output Example.
|
||||
|
||||
Example:
|
||||
IMPORTANT:
|
||||
Inputed images are orthographic projection showing the front, left, right and back views,
|
||||
the first image is always the front view. Use the object's pose and orientation in the
|
||||
rendered images to estimate its **true vertical height as it appears in the image**,
|
||||
not the real-world length or width of the object.
|
||||
For example:
|
||||
- A pen standing upright in the front view → vertical height: 0.15-0.2 m
|
||||
- A pen lying horizontally in the front view → vertical height: 0.01-0.02 m
|
||||
(based on its thickness in the image)
|
||||
|
||||
Output Example:
|
||||
Category: cup
|
||||
Description: shiny golden cup with floral design
|
||||
Height: 0.1-0.15 m
|
||||
|
||||
@ -30,7 +30,7 @@ realesrgan==0.3.0
|
||||
pydantic==2.9.2
|
||||
vtk==9.3.1
|
||||
spaces
|
||||
utils3d@git+https://github.com/EasternJournalist/utils3d.git@9a4eb15e4021b67b12c460c7057d642626897ec8
|
||||
utils3d@git+https://github.com/EasternJournalist/utils3d.git#egg=9a4eb15
|
||||
clip@git+https://github.com/openai/CLIP.git
|
||||
kolors@git+https://github.com/Kwai-Kolors/Kolors.git#egg=038818d
|
||||
segment-anything@git+https://github.com/facebookresearch/segment-anything.git#egg=dca509f
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user