feat(texture): Optimize backprojected texture quality and add texture-cli. (#38)
* feat(sim): Add auto scale in convex decomposition. * feat(texture): Optimize back-projected texture quality. * feat(texture): Add `texture-cli`.
This commit is contained in:
parent
768d1fbb1d
commit
cf3b919b65
17
README.md
17
README.md
@ -147,15 +147,12 @@ python apps/texture_edit.py
|
|||||||
### ⚡ API
|
### ⚡ API
|
||||||
Support Chinese and English prompts.
|
Support Chinese and English prompts.
|
||||||
```sh
|
```sh
|
||||||
bash embodied_gen/scripts/texture_gen.sh \
|
texture-cli --mesh_path "apps/assets/example_texture/meshes/robot_text.obj" \
|
||||||
--mesh_path "apps/assets/example_texture/meshes/robot_text.obj" \
|
"apps/assets/example_texture/meshes/horse.obj" \
|
||||||
--prompt "举着牌子的写实风格机器人,大眼睛,牌子上写着“Hello”的文字" \
|
--prompt "举着牌子的写实风格机器人,大眼睛,牌子上写着“Hello”的文字" \
|
||||||
--output_root "outputs/texture_gen/robot_text"
|
"A gray horse head with flying mane and brown eyes" \
|
||||||
|
--output_root "outputs/texture_gen" \
|
||||||
bash embodied_gen/scripts/texture_gen.sh \
|
--seed 0
|
||||||
--mesh_path "apps/assets/example_texture/meshes/horse.obj" \
|
|
||||||
--prompt "A gray horse head with flying mane and brown eyes" \
|
|
||||||
--output_root "outputs/texture_gen/gray_horse"
|
|
||||||
```
|
```
|
||||||
|
|
||||||
---
|
---
|
||||||
@ -185,7 +182,7 @@ CUDA_VISIBLE_DEVICES=0 scene3d-cli \
|
|||||||
|
|
||||||
🚧 *Coming Soon*
|
🚧 *Coming Soon*
|
||||||
|
|
||||||
<img src="apps/assets/articulate.gif" alt="articulate" style="width: 430px;">
|
<img src="apps/assets/articulate.gif" alt="articulate" style="width: 500px;">
|
||||||
|
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|||||||
@ -503,7 +503,12 @@ def extract_3d_representations_v2(
|
|||||||
device="cpu",
|
device="cpu",
|
||||||
)
|
)
|
||||||
color_path = os.path.join(user_dir, "color.png")
|
color_path = os.path.join(user_dir, "color.png")
|
||||||
render_gs_api(aligned_gs_path, color_path)
|
render_gs_api(
|
||||||
|
input_gs=aligned_gs_path,
|
||||||
|
output_path=color_path,
|
||||||
|
elevation=[20, -10, 60, -50],
|
||||||
|
num_images=12,
|
||||||
|
)
|
||||||
|
|
||||||
mesh = trimesh.Trimesh(
|
mesh = trimesh.Trimesh(
|
||||||
vertices=mesh_model.vertices.cpu().numpy(),
|
vertices=mesh_model.vertices.cpu().numpy(),
|
||||||
@ -524,6 +529,8 @@ def extract_3d_representations_v2(
|
|||||||
skip_fix_mesh=False,
|
skip_fix_mesh=False,
|
||||||
delight=enable_delight,
|
delight=enable_delight,
|
||||||
texture_wh=[texture_size, texture_size],
|
texture_wh=[texture_size, texture_size],
|
||||||
|
elevation=[20, -10, 60, -50],
|
||||||
|
num_images=12,
|
||||||
)
|
)
|
||||||
|
|
||||||
mesh_glb_path = os.path.join(user_dir, f"{filename}.glb")
|
mesh_glb_path = os.path.join(user_dir, f"{filename}.glb")
|
||||||
|
|||||||
@ -33,6 +33,7 @@ from embodied_gen.data.mesh_operator import MeshFixer
|
|||||||
from embodied_gen.data.utils import (
|
from embodied_gen.data.utils import (
|
||||||
CameraSetting,
|
CameraSetting,
|
||||||
DiffrastRender,
|
DiffrastRender,
|
||||||
|
as_list,
|
||||||
get_images_from_grid,
|
get_images_from_grid,
|
||||||
init_kal_camera,
|
init_kal_camera,
|
||||||
normalize_vertices_array,
|
normalize_vertices_array,
|
||||||
@ -41,6 +42,7 @@ from embodied_gen.data.utils import (
|
|||||||
)
|
)
|
||||||
from embodied_gen.models.delight_model import DelightingModel
|
from embodied_gen.models.delight_model import DelightingModel
|
||||||
from embodied_gen.models.sr_model import ImageRealESRGAN
|
from embodied_gen.models.sr_model import ImageRealESRGAN
|
||||||
|
from embodied_gen.utils.process_media import vcat_pil_images
|
||||||
|
|
||||||
logging.basicConfig(
|
logging.basicConfig(
|
||||||
format="%(asctime)s - %(levelname)s - %(message)s", level=logging.INFO
|
format="%(asctime)s - %(levelname)s - %(message)s", level=logging.INFO
|
||||||
@ -541,8 +543,9 @@ def parse_args():
|
|||||||
parser = argparse.ArgumentParser(description="Backproject texture")
|
parser = argparse.ArgumentParser(description="Backproject texture")
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--color_path",
|
"--color_path",
|
||||||
|
nargs="+",
|
||||||
type=str,
|
type=str,
|
||||||
help="Multiview color image in 6x512x512 file path",
|
help="Multiview color image in grid file paths",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--mesh_path",
|
"--mesh_path",
|
||||||
@ -559,7 +562,7 @@ def parse_args():
|
|||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--elevation",
|
"--elevation",
|
||||||
nargs=2,
|
nargs="+",
|
||||||
type=float,
|
type=float,
|
||||||
default=[20.0, -10.0],
|
default=[20.0, -10.0],
|
||||||
help="Elevation angles for the camera (default: [20.0, -10.0])",
|
help="Elevation angles for the camera (default: [20.0, -10.0])",
|
||||||
@ -647,19 +650,23 @@ def entrypoint(
|
|||||||
fov=math.radians(args.fov),
|
fov=math.radians(args.fov),
|
||||||
device=args.device,
|
device=args.device,
|
||||||
)
|
)
|
||||||
view_weights = [1, 0.1, 0.02, 0.1, 1, 0.02]
|
|
||||||
|
|
||||||
color_grid = Image.open(args.color_path)
|
args.color_path = as_list(args.color_path)
|
||||||
if args.delight:
|
if args.delight and delight_model is None:
|
||||||
if delight_model is None:
|
|
||||||
delight_model = DelightingModel()
|
delight_model = DelightingModel()
|
||||||
save_dir = os.path.dirname(args.output_path)
|
|
||||||
os.makedirs(save_dir, exist_ok=True)
|
color_grid = [Image.open(color_path) for color_path in args.color_path]
|
||||||
|
color_grid = vcat_pil_images(color_grid, image_mode="RGBA")
|
||||||
|
if args.delight:
|
||||||
color_grid = delight_model(color_grid)
|
color_grid = delight_model(color_grid)
|
||||||
if not args.no_save_delight_img:
|
if not args.no_save_delight_img:
|
||||||
color_grid.save(f"{save_dir}/color_grid_delight.png")
|
save_dir = os.path.dirname(args.output_path)
|
||||||
|
os.makedirs(save_dir, exist_ok=True)
|
||||||
|
color_grid.save(f"{save_dir}/color_delight.png")
|
||||||
|
|
||||||
multiviews = get_images_from_grid(color_grid, img_size=512)
|
multiviews = get_images_from_grid(color_grid, img_size=512)
|
||||||
|
view_weights = [1, 0.1, 0.02, 0.1, 1, 0.02]
|
||||||
|
view_weights += [0.01] * (len(multiviews) - len(view_weights))
|
||||||
|
|
||||||
# Use RealESRGAN_x4plus for x4 (512->2048) image super resolution.
|
# Use RealESRGAN_x4plus for x4 (512->2048) image super resolution.
|
||||||
if imagesr_model is None:
|
if imagesr_model is None:
|
||||||
@ -688,7 +695,7 @@ def entrypoint(
|
|||||||
texture_backer = TextureBacker(
|
texture_backer = TextureBacker(
|
||||||
camera_params=camera_params,
|
camera_params=camera_params,
|
||||||
view_weights=view_weights,
|
view_weights=view_weights,
|
||||||
render_wh=camera_params.resolution_hw,
|
render_wh=args.resolution_hw,
|
||||||
texture_wh=args.texture_wh,
|
texture_wh=args.texture_wh,
|
||||||
smooth_texture=not args.no_smooth_texture,
|
smooth_texture=not args.no_smooth_texture,
|
||||||
)
|
)
|
||||||
|
|||||||
@ -503,7 +503,7 @@ def parse_args():
|
|||||||
help="Whether to generate global normal .mp4 rendering file.",
|
help="Whether to generate global normal .mp4 rendering file.",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--prompts",
|
"--video_prompts",
|
||||||
type=str,
|
type=str,
|
||||||
nargs="+",
|
nargs="+",
|
||||||
default=None,
|
default=None,
|
||||||
@ -579,7 +579,7 @@ def entrypoint(**kwargs) -> None:
|
|||||||
mesh_path=args.mesh_path,
|
mesh_path=args.mesh_path,
|
||||||
output_root=args.output_root,
|
output_root=args.output_root,
|
||||||
uuid=args.uuid,
|
uuid=args.uuid,
|
||||||
prompts=args.prompts,
|
prompts=args.video_prompts,
|
||||||
)
|
)
|
||||||
|
|
||||||
return
|
return
|
||||||
|
|||||||
@ -28,7 +28,7 @@ import numpy as np
|
|||||||
import nvdiffrast.torch as dr
|
import nvdiffrast.torch as dr
|
||||||
import torch
|
import torch
|
||||||
import torch.nn.functional as F
|
import torch.nn.functional as F
|
||||||
from PIL import Image
|
from PIL import Image, ImageEnhance
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from kolors.models.modeling_chatglm import ChatGLMModel
|
from kolors.models.modeling_chatglm import ChatGLMModel
|
||||||
@ -698,6 +698,8 @@ def as_list(obj):
|
|||||||
return obj
|
return obj
|
||||||
elif isinstance(obj, set):
|
elif isinstance(obj, set):
|
||||||
return list(obj)
|
return list(obj)
|
||||||
|
elif obj is None:
|
||||||
|
return obj
|
||||||
else:
|
else:
|
||||||
return [obj]
|
return [obj]
|
||||||
|
|
||||||
@ -742,6 +744,8 @@ def _compute_az_el_by_camera_params(
|
|||||||
):
|
):
|
||||||
num_view = camera_params.num_images // len(camera_params.elevation)
|
num_view = camera_params.num_images // len(camera_params.elevation)
|
||||||
view_interval = 2 * np.pi / num_view / 2
|
view_interval = 2 * np.pi / num_view / 2
|
||||||
|
if num_view == 1:
|
||||||
|
view_interval = np.pi / 2
|
||||||
azimuths = []
|
azimuths = []
|
||||||
elevations = []
|
elevations = []
|
||||||
for idx, el in enumerate(camera_params.elevation):
|
for idx, el in enumerate(camera_params.elevation):
|
||||||
@ -758,8 +762,13 @@ def _compute_az_el_by_camera_params(
|
|||||||
return azimuths, elevations
|
return azimuths, elevations
|
||||||
|
|
||||||
|
|
||||||
def init_kal_camera(camera_params: CameraSetting) -> Camera:
|
def init_kal_camera(
|
||||||
azimuths, elevations = _compute_az_el_by_camera_params(camera_params)
|
camera_params: CameraSetting,
|
||||||
|
flip_az: bool = False,
|
||||||
|
) -> Camera:
|
||||||
|
azimuths, elevations = _compute_az_el_by_camera_params(
|
||||||
|
camera_params, flip_az
|
||||||
|
)
|
||||||
cam_pts = _compute_cam_pts_by_az_el(
|
cam_pts = _compute_cam_pts_by_az_el(
|
||||||
azimuths, elevations, camera_params.distance
|
azimuths, elevations, camera_params.distance
|
||||||
)
|
)
|
||||||
@ -856,13 +865,38 @@ def get_images_from_grid(
|
|||||||
image = Image.open(image)
|
image = Image.open(image)
|
||||||
|
|
||||||
view_images = np.array(image)
|
view_images = np.array(image)
|
||||||
view_images = np.concatenate(
|
height, width, _ = view_images.shape
|
||||||
[view_images[:img_size, ...], view_images[img_size:, ...]], axis=1
|
rows = height // img_size
|
||||||
)
|
cols = width // img_size
|
||||||
images = np.split(view_images, view_images.shape[1] // img_size, axis=1)
|
blocks = []
|
||||||
images = [Image.fromarray(img) for img in images]
|
for i in range(rows):
|
||||||
|
for j in range(cols):
|
||||||
|
block = view_images[
|
||||||
|
i * img_size : (i + 1) * img_size,
|
||||||
|
j * img_size : (j + 1) * img_size,
|
||||||
|
:,
|
||||||
|
]
|
||||||
|
blocks.append(Image.fromarray(block))
|
||||||
|
|
||||||
return images
|
return blocks
|
||||||
|
|
||||||
|
|
||||||
|
def enhance_image(
|
||||||
|
image: Image.Image,
|
||||||
|
contrast_factor: float = 1.3,
|
||||||
|
color_factor: float = 1.2,
|
||||||
|
brightness_factor: float = 0.95,
|
||||||
|
) -> Image.Image:
|
||||||
|
enhancer_contrast = ImageEnhance.Contrast(image)
|
||||||
|
img_contrasted = enhancer_contrast.enhance(contrast_factor)
|
||||||
|
|
||||||
|
enhancer_color = ImageEnhance.Color(img_contrasted)
|
||||||
|
img_colored = enhancer_color.enhance(color_factor)
|
||||||
|
|
||||||
|
enhancer_brightness = ImageEnhance.Brightness(img_colored)
|
||||||
|
enhanced_image = enhancer_brightness.enhance(brightness_factor)
|
||||||
|
|
||||||
|
return enhanced_image
|
||||||
|
|
||||||
|
|
||||||
def post_process_texture(texture: np.ndarray, iter: int = 1) -> np.ndarray:
|
def post_process_texture(texture: np.ndarray, iter: int = 1) -> np.ndarray:
|
||||||
@ -872,7 +906,14 @@ def post_process_texture(texture: np.ndarray, iter: int = 1) -> np.ndarray:
|
|||||||
texture, d=5, sigmaColor=20, sigmaSpace=20
|
texture, d=5, sigmaColor=20, sigmaSpace=20
|
||||||
)
|
)
|
||||||
|
|
||||||
return texture
|
texture = enhance_image(
|
||||||
|
image=Image.fromarray(texture),
|
||||||
|
contrast_factor=1.3,
|
||||||
|
color_factor=1.2,
|
||||||
|
brightness_factor=0.95,
|
||||||
|
)
|
||||||
|
|
||||||
|
return np.array(texture)
|
||||||
|
|
||||||
|
|
||||||
def quat_mult(q1, q2):
|
def quat_mult(q1, q2):
|
||||||
|
|||||||
@ -29,6 +29,7 @@ from diffusers import (
|
|||||||
from huggingface_hub import snapshot_download
|
from huggingface_hub import snapshot_download
|
||||||
from PIL import Image
|
from PIL import Image
|
||||||
from embodied_gen.models.segment_model import RembgRemover
|
from embodied_gen.models.segment_model import RembgRemover
|
||||||
|
from embodied_gen.utils.log import logger
|
||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
"DelightingModel",
|
"DelightingModel",
|
||||||
@ -84,6 +85,7 @@ class DelightingModel(object):
|
|||||||
|
|
||||||
def _lazy_init_pipeline(self):
|
def _lazy_init_pipeline(self):
|
||||||
if self.pipeline is None:
|
if self.pipeline is None:
|
||||||
|
logger.info("Loading Delighting Model...")
|
||||||
pipeline = StableDiffusionInstructPix2PixPipeline.from_pretrained(
|
pipeline = StableDiffusionInstructPix2PixPipeline.from_pretrained(
|
||||||
self.model_path,
|
self.model_path,
|
||||||
torch_dtype=torch.float16,
|
torch_dtype=torch.float16,
|
||||||
|
|||||||
@ -43,7 +43,7 @@ __all__ = [
|
|||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
DISTRACTOR_NUM = 3 # Maximum number of distractor objects allowed
|
DISTRACTOR_NUM = 2 # Maximum number of distractor objects allowed
|
||||||
LAYOUT_DISASSEMBLE_PROMPT = f"""
|
LAYOUT_DISASSEMBLE_PROMPT = f"""
|
||||||
You are an intelligent 3D scene planner. Given a natural language
|
You are an intelligent 3D scene planner. Given a natural language
|
||||||
description of a robotic task, output a structured description of
|
description of a robotic task, output a structured description of
|
||||||
|
|||||||
@ -29,6 +29,7 @@ from kolors.pipelines.pipeline_controlnet_xl_kolors_img2img import (
|
|||||||
)
|
)
|
||||||
from transformers import CLIPImageProcessor, CLIPVisionModelWithProjection
|
from transformers import CLIPImageProcessor, CLIPVisionModelWithProjection
|
||||||
from embodied_gen.models.text_model import download_kolors_weights
|
from embodied_gen.models.text_model import download_kolors_weights
|
||||||
|
from embodied_gen.utils.log import logger
|
||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
"build_texture_gen_pipe",
|
"build_texture_gen_pipe",
|
||||||
@ -42,7 +43,7 @@ def build_texture_gen_pipe(
|
|||||||
device: str = "cuda",
|
device: str = "cuda",
|
||||||
) -> DiffusionPipeline:
|
) -> DiffusionPipeline:
|
||||||
download_kolors_weights(f"{base_ckpt_dir}/Kolors")
|
download_kolors_weights(f"{base_ckpt_dir}/Kolors")
|
||||||
|
logger.info(f"Load Kolors weights...")
|
||||||
tokenizer = ChatGLMTokenizer.from_pretrained(
|
tokenizer = ChatGLMTokenizer.from_pretrained(
|
||||||
f"{base_ckpt_dir}/Kolors/text_encoder"
|
f"{base_ckpt_dir}/Kolors/text_encoder"
|
||||||
)
|
)
|
||||||
|
|||||||
123
embodied_gen/scripts/gen_texture.py
Normal file
123
embodied_gen/scripts/gen_texture.py
Normal file
@ -0,0 +1,123 @@
|
|||||||
|
import os
|
||||||
|
import shutil
|
||||||
|
from dataclasses import dataclass
|
||||||
|
|
||||||
|
import tyro
|
||||||
|
from embodied_gen.data.backproject_v2 import entrypoint as backproject_api
|
||||||
|
from embodied_gen.data.differentiable_render import entrypoint as drender_api
|
||||||
|
from embodied_gen.data.utils import as_list
|
||||||
|
from embodied_gen.models.delight_model import DelightingModel
|
||||||
|
from embodied_gen.models.sr_model import ImageRealESRGAN
|
||||||
|
from embodied_gen.scripts.render_mv import (
|
||||||
|
build_texture_gen_pipe,
|
||||||
|
)
|
||||||
|
from embodied_gen.scripts.render_mv import infer_pipe as render_mv_api
|
||||||
|
from embodied_gen.utils.log import logger
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class TextureGenConfig:
|
||||||
|
mesh_path: str | list[str]
|
||||||
|
prompt: str | list[str]
|
||||||
|
output_root: str
|
||||||
|
controlnet_cond_scale: float = 0.7
|
||||||
|
guidance_scale: float = 9
|
||||||
|
strength: float = 0.9
|
||||||
|
num_inference_steps: int = 40
|
||||||
|
delight: bool = True
|
||||||
|
seed: int = 0
|
||||||
|
base_ckpt_dir: str = "./weights"
|
||||||
|
texture_size: int = 2048
|
||||||
|
ip_adapt_scale: float = 0.0
|
||||||
|
ip_img_path: str | list[str] | None = None
|
||||||
|
|
||||||
|
|
||||||
|
def entrypoint() -> None:
|
||||||
|
cfg = tyro.cli(TextureGenConfig)
|
||||||
|
cfg.mesh_path = as_list(cfg.mesh_path)
|
||||||
|
cfg.prompt = as_list(cfg.prompt)
|
||||||
|
cfg.ip_img_path = as_list(cfg.ip_img_path)
|
||||||
|
assert len(cfg.mesh_path) == len(cfg.prompt)
|
||||||
|
|
||||||
|
# Pre-load models.
|
||||||
|
if cfg.ip_adapt_scale > 0:
|
||||||
|
PIPELINE = build_texture_gen_pipe(
|
||||||
|
base_ckpt_dir="./weights",
|
||||||
|
ip_adapt_scale=cfg.ip_adapt_scale,
|
||||||
|
device="cuda",
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
PIPELINE = build_texture_gen_pipe(
|
||||||
|
base_ckpt_dir="./weights",
|
||||||
|
ip_adapt_scale=0,
|
||||||
|
device="cuda",
|
||||||
|
)
|
||||||
|
DELIGHT = None
|
||||||
|
if cfg.delight:
|
||||||
|
DELIGHT = DelightingModel()
|
||||||
|
IMAGESR_MODEL = ImageRealESRGAN(outscale=4)
|
||||||
|
|
||||||
|
for idx in range(len(cfg.mesh_path)):
|
||||||
|
mesh_path = cfg.mesh_path[idx]
|
||||||
|
prompt = cfg.prompt[idx]
|
||||||
|
uuid = os.path.splitext(os.path.basename(mesh_path))[0]
|
||||||
|
output_root = os.path.join(cfg.output_root, uuid)
|
||||||
|
drender_api(
|
||||||
|
mesh_path=mesh_path,
|
||||||
|
output_root=f"{output_root}/condition",
|
||||||
|
uuid=uuid,
|
||||||
|
)
|
||||||
|
render_mv_api(
|
||||||
|
index_file=f"{output_root}/condition/index.json",
|
||||||
|
controlnet_cond_scale=cfg.controlnet_cond_scale,
|
||||||
|
guidance_scale=cfg.guidance_scale,
|
||||||
|
strength=cfg.strength,
|
||||||
|
num_inference_steps=cfg.num_inference_steps,
|
||||||
|
ip_adapt_scale=cfg.ip_adapt_scale,
|
||||||
|
ip_img_path=(
|
||||||
|
None if cfg.ip_img_path is None else cfg.ip_img_path[idx]
|
||||||
|
),
|
||||||
|
prompt=prompt,
|
||||||
|
save_dir=f"{output_root}/multi_view",
|
||||||
|
sub_idxs=[[0, 1, 2], [3, 4, 5]],
|
||||||
|
pipeline=PIPELINE,
|
||||||
|
seed=cfg.seed,
|
||||||
|
)
|
||||||
|
textured_mesh = backproject_api(
|
||||||
|
delight_model=DELIGHT,
|
||||||
|
imagesr_model=IMAGESR_MODEL,
|
||||||
|
mesh_path=mesh_path,
|
||||||
|
color_path=f"{output_root}/multi_view/color_sample0.png",
|
||||||
|
output_path=f"{output_root}/texture_mesh/{uuid}.obj",
|
||||||
|
save_glb_path=f"{output_root}/texture_mesh/{uuid}.glb",
|
||||||
|
skip_fix_mesh=True,
|
||||||
|
delight=cfg.delight,
|
||||||
|
no_save_delight_img=True,
|
||||||
|
texture_wh=[cfg.texture_size, cfg.texture_size],
|
||||||
|
)
|
||||||
|
drender_api(
|
||||||
|
mesh_path=f"{output_root}/texture_mesh/{uuid}.obj",
|
||||||
|
output_root=f"{output_root}/texture_mesh",
|
||||||
|
uuid=uuid,
|
||||||
|
num_images=90,
|
||||||
|
elevation=[20],
|
||||||
|
with_mtl=True,
|
||||||
|
gen_color_mp4=True,
|
||||||
|
pbr_light_factor=1.2,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Re-organize folders
|
||||||
|
shutil.rmtree(f"{output_root}/condition")
|
||||||
|
shutil.copy(
|
||||||
|
f"{output_root}/texture_mesh/{uuid}/color.mp4",
|
||||||
|
f"{output_root}/color.mp4",
|
||||||
|
)
|
||||||
|
shutil.rmtree(f"{output_root}/texture_mesh/{uuid}")
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
f"Successfully generate textured mesh in {output_root}/texture_mesh"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
entrypoint()
|
||||||
@ -108,6 +108,9 @@ def parse_args():
|
|||||||
default=2,
|
default=2,
|
||||||
)
|
)
|
||||||
parser.add_argument("--disable_decompose_convex", action="store_true")
|
parser.add_argument("--disable_decompose_convex", action="store_true")
|
||||||
|
parser.add_argument(
|
||||||
|
"--texture_wh", type=int, nargs=2, default=[2048, 2048]
|
||||||
|
)
|
||||||
args, unknown = parser.parse_known_args()
|
args, unknown = parser.parse_known_args()
|
||||||
|
|
||||||
return args
|
return args
|
||||||
@ -209,11 +212,17 @@ def entrypoint(**kwargs):
|
|||||||
device="cpu",
|
device="cpu",
|
||||||
)
|
)
|
||||||
color_path = os.path.join(output_root, "color.png")
|
color_path = os.path.join(output_root, "color.png")
|
||||||
render_gs_api(aligned_gs_path, color_path)
|
render_gs_api(
|
||||||
|
input_gs=aligned_gs_path,
|
||||||
geo_flag, geo_result = GEO_CHECKER(
|
output_path=color_path,
|
||||||
[color_path], text=asset_node
|
elevation=[20, -10, 60, -50],
|
||||||
|
num_images=12,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
color_img = Image.open(color_path)
|
||||||
|
keep_height = int(color_img.height * 2 / 3)
|
||||||
|
crop_img = color_img.crop((0, 0, color_img.width, keep_height))
|
||||||
|
geo_flag, geo_result = GEO_CHECKER([crop_img], text=asset_node)
|
||||||
logger.warning(
|
logger.warning(
|
||||||
f"{GEO_CHECKER.__class__.__name__}: {geo_result} for {seg_path}"
|
f"{GEO_CHECKER.__class__.__name__}: {geo_result} for {seg_path}"
|
||||||
)
|
)
|
||||||
@ -246,7 +255,9 @@ def entrypoint(**kwargs):
|
|||||||
output_path=mesh_obj_path,
|
output_path=mesh_obj_path,
|
||||||
skip_fix_mesh=False,
|
skip_fix_mesh=False,
|
||||||
delight=True,
|
delight=True,
|
||||||
texture_wh=[2048, 2048],
|
texture_wh=args.texture_wh,
|
||||||
|
elevation=[20, -10, 60, -50],
|
||||||
|
num_images=12,
|
||||||
)
|
)
|
||||||
|
|
||||||
mesh_glb_path = os.path.join(output_root, f"{filename}.glb")
|
mesh_glb_path = os.path.join(output_root, f"{filename}.glb")
|
||||||
|
|||||||
@ -18,12 +18,11 @@
|
|||||||
import argparse
|
import argparse
|
||||||
import logging
|
import logging
|
||||||
import math
|
import math
|
||||||
import os
|
|
||||||
|
|
||||||
import cv2
|
import cv2
|
||||||
import numpy as np
|
|
||||||
import spaces
|
import spaces
|
||||||
import torch
|
import torch
|
||||||
|
from PIL import Image
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
from embodied_gen.data.utils import (
|
from embodied_gen.data.utils import (
|
||||||
CameraSetting,
|
CameraSetting,
|
||||||
@ -31,6 +30,7 @@ from embodied_gen.data.utils import (
|
|||||||
normalize_vertices_array,
|
normalize_vertices_array,
|
||||||
)
|
)
|
||||||
from embodied_gen.models.gs_model import GaussianOperator
|
from embodied_gen.models.gs_model import GaussianOperator
|
||||||
|
from embodied_gen.utils.process_media import combine_images_to_grid
|
||||||
|
|
||||||
logging.basicConfig(
|
logging.basicConfig(
|
||||||
format="%(asctime)s - %(levelname)s - %(message)s", level=logging.INFO
|
format="%(asctime)s - %(levelname)s - %(message)s", level=logging.INFO
|
||||||
@ -104,7 +104,7 @@ def load_gs_model(
|
|||||||
# Normalize vertices to [-1, 1], center to (0, 0, 0).
|
# Normalize vertices to [-1, 1], center to (0, 0, 0).
|
||||||
_, scale, center = normalize_vertices_array(gs_model._means)
|
_, scale, center = normalize_vertices_array(gs_model._means)
|
||||||
scale, center = float(scale), center.tolist()
|
scale, center = float(scale), center.tolist()
|
||||||
transpose = [*[-v for v in center], *pre_quat]
|
transpose = [*[v for v in center], *pre_quat]
|
||||||
instance_pose = torch.tensor(transpose).to(gs_model.device)
|
instance_pose = torch.tensor(transpose).to(gs_model.device)
|
||||||
gs_model = gs_model.get_gaussians(instance_pose=instance_pose)
|
gs_model = gs_model.get_gaussians(instance_pose=instance_pose)
|
||||||
gs_model.rescale(scale)
|
gs_model.rescale(scale)
|
||||||
@ -113,12 +113,11 @@ def load_gs_model(
|
|||||||
|
|
||||||
|
|
||||||
@spaces.GPU
|
@spaces.GPU
|
||||||
def entrypoint(input_gs: str = None, output_path: str = None) -> None:
|
def entrypoint(**kwargs) -> None:
|
||||||
args = parse_args()
|
args = parse_args()
|
||||||
if isinstance(input_gs, str):
|
for k, v in kwargs.items():
|
||||||
args.input_gs = input_gs
|
if hasattr(args, k) and v is not None:
|
||||||
if isinstance(output_path, str):
|
setattr(args, k, v)
|
||||||
args.output_path = output_path
|
|
||||||
|
|
||||||
# Setup camera parameters
|
# Setup camera parameters
|
||||||
camera_params = CameraSetting(
|
camera_params = CameraSetting(
|
||||||
@ -129,7 +128,7 @@ def entrypoint(input_gs: str = None, output_path: str = None) -> None:
|
|||||||
fov=math.radians(args.fov),
|
fov=math.radians(args.fov),
|
||||||
device=args.device,
|
device=args.device,
|
||||||
)
|
)
|
||||||
camera = init_kal_camera(camera_params)
|
camera = init_kal_camera(camera_params, flip_az=True)
|
||||||
matrix_mv = camera.view_matrix() # (n_cam 4 4) world2cam
|
matrix_mv = camera.view_matrix() # (n_cam 4 4) world2cam
|
||||||
matrix_mv[:, :3, 3] = -matrix_mv[:, :3, 3]
|
matrix_mv[:, :3, 3] = -matrix_mv[:, :3, 3]
|
||||||
w2cs = matrix_mv.to(camera_params.device)
|
w2cs = matrix_mv.to(camera_params.device)
|
||||||
@ -153,21 +152,11 @@ def entrypoint(input_gs: str = None, output_path: str = None) -> None:
|
|||||||
(args.image_size, args.image_size),
|
(args.image_size, args.image_size),
|
||||||
interpolation=cv2.INTER_AREA,
|
interpolation=cv2.INTER_AREA,
|
||||||
)
|
)
|
||||||
images.append(color)
|
color = cv2.cvtColor(color, cv2.COLOR_BGRA2RGBA)
|
||||||
|
images.append(Image.fromarray(color))
|
||||||
|
|
||||||
# Cat color images into grid image and save.
|
combine_images_to_grid(images, image_mode="RGBA")[0].save(args.output_path)
|
||||||
select_idxs = [[0, 2, 1], [5, 4, 3]] # fix order for 6 views
|
|
||||||
grid_image = []
|
|
||||||
for row_idxs in select_idxs:
|
|
||||||
row_image = []
|
|
||||||
for row_idx in row_idxs:
|
|
||||||
row_image.append(images[row_idx])
|
|
||||||
row_image = np.concatenate(row_image, axis=1)
|
|
||||||
grid_image.append(row_image)
|
|
||||||
|
|
||||||
grid_image = np.concatenate(grid_image, axis=0)
|
|
||||||
os.makedirs(os.path.dirname(args.output_path), exist_ok=True)
|
|
||||||
cv2.imwrite(args.output_path, grid_image)
|
|
||||||
logger.info(f"Saved grid image to {args.output_path}")
|
logger.info(f"Saved grid image to {args.output_path}")
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -170,7 +170,8 @@ def entrypoint(**kwargs):
|
|||||||
for node in actions:
|
for node in actions:
|
||||||
if actions[node] is None:
|
if actions[node] is None:
|
||||||
continue
|
continue
|
||||||
for action in tqdm(actions[node]):
|
logger.info(f"Render SIM grasping in camera {idx} for {node}...")
|
||||||
|
for action in actions[node]:
|
||||||
grasp_frames = scene_manager.step_action(
|
grasp_frames = scene_manager.step_action(
|
||||||
agent,
|
agent,
|
||||||
torch.Tensor(action[None, ...]),
|
torch.Tensor(action[None, ...]),
|
||||||
|
|||||||
@ -28,6 +28,7 @@ if [[ -z "$mesh_path" || -z "$prompt" || -z "$output_root" ]]; then
|
|||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
echo "Will be deprecated, recommended to use 'texture-cli' instead."
|
||||||
uuid=$(basename "$output_root")
|
uuid=$(basename "$output_root")
|
||||||
# Step 1: drender-cli for condition rendering
|
# Step 1: drender-cli for condition rendering
|
||||||
drender-cli --mesh_path ${mesh_path} \
|
drender-cli --mesh_path ${mesh_path} \
|
||||||
|
|||||||
@ -49,6 +49,7 @@ __all__ = [
|
|||||||
"is_image_file",
|
"is_image_file",
|
||||||
"parse_text_prompts",
|
"parse_text_prompts",
|
||||||
"check_object_edge_truncated",
|
"check_object_edge_truncated",
|
||||||
|
"vcat_pil_images",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
@ -166,6 +167,7 @@ def combine_images_to_grid(
|
|||||||
images: list[str | Image.Image],
|
images: list[str | Image.Image],
|
||||||
cat_row_col: tuple[int, int] = None,
|
cat_row_col: tuple[int, int] = None,
|
||||||
target_wh: tuple[int, int] = (512, 512),
|
target_wh: tuple[int, int] = (512, 512),
|
||||||
|
image_mode: str = "RGB",
|
||||||
) -> list[Image.Image]:
|
) -> list[Image.Image]:
|
||||||
n_images = len(images)
|
n_images = len(images)
|
||||||
if n_images == 1:
|
if n_images == 1:
|
||||||
@ -178,13 +180,13 @@ def combine_images_to_grid(
|
|||||||
n_row, n_col = cat_row_col
|
n_row, n_col = cat_row_col
|
||||||
|
|
||||||
images = [
|
images = [
|
||||||
Image.open(p).convert("RGB") if isinstance(p, str) else p
|
Image.open(p).convert(image_mode) if isinstance(p, str) else p
|
||||||
for p in images
|
for p in images
|
||||||
]
|
]
|
||||||
images = [img.resize(target_wh) for img in images]
|
images = [img.resize(target_wh) for img in images]
|
||||||
|
|
||||||
grid_w, grid_h = n_col * target_wh[0], n_row * target_wh[1]
|
grid_w, grid_h = n_col * target_wh[0], n_row * target_wh[1]
|
||||||
grid = Image.new("RGB", (grid_w, grid_h), (0, 0, 0))
|
grid = Image.new(image_mode, (grid_w, grid_h), (0, 0, 0))
|
||||||
|
|
||||||
for idx, img in enumerate(images):
|
for idx, img in enumerate(images):
|
||||||
row, col = divmod(idx, n_col)
|
row, col = divmod(idx, n_col)
|
||||||
@ -435,6 +437,21 @@ def check_object_edge_truncated(
|
|||||||
return not (top or bottom or left or right)
|
return not (top or bottom or left or right)
|
||||||
|
|
||||||
|
|
||||||
|
def vcat_pil_images(
|
||||||
|
images: list[Image.Image], image_mode: str = "RGB"
|
||||||
|
) -> Image.Image:
|
||||||
|
widths, heights = zip(*(img.size for img in images))
|
||||||
|
total_height = sum(heights)
|
||||||
|
max_width = max(widths)
|
||||||
|
new_image = Image.new(image_mode, (max_width, total_height))
|
||||||
|
y_offset = 0
|
||||||
|
for image in images:
|
||||||
|
new_image.paste(image, (0, y_offset))
|
||||||
|
y_offset += image.size[1]
|
||||||
|
|
||||||
|
return new_image
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
image_paths = [
|
image_paths = [
|
||||||
"outputs/layouts_sim/task_0000/images/pen.png",
|
"outputs/layouts_sim/task_0000/images/pen.png",
|
||||||
|
|||||||
@ -249,7 +249,7 @@ class SemanticConsistChecker(BaseChecker):
|
|||||||
fewer than four legs or if the legs are unevenly distributed, are not allowed. Do not assume
|
fewer than four legs or if the legs are unevenly distributed, are not allowed. Do not assume
|
||||||
hidden legs unless they are clearly visible.)
|
hidden legs unless they are clearly visible.)
|
||||||
- Geometric completeness is required: the object must not have missing, truncated, or cropped parts.
|
- Geometric completeness is required: the object must not have missing, truncated, or cropped parts.
|
||||||
- The image must contain exactly one object. Multiple distinct objects are not allowed.
|
- The image must contain exactly one object. Multiple distinct objects (e.g. multiple pens) are not allowed.
|
||||||
A single composite object (e.g., a chair with legs) is acceptable.
|
A single composite object (e.g., a chair with legs) is acceptable.
|
||||||
- The object should be shown from a slightly angled (three-quarter) perspective,
|
- The object should be shown from a slightly angled (three-quarter) perspective,
|
||||||
not a flat, front-facing view showing only one surface.
|
not a flat, front-facing view showing only one surface.
|
||||||
|
|||||||
@ -266,7 +266,7 @@ class URDFGenerator(object):
|
|||||||
if self.decompose_convex:
|
if self.decompose_convex:
|
||||||
try:
|
try:
|
||||||
d_params = dict(
|
d_params = dict(
|
||||||
threshold=0.05, max_convex_hull=64, verbose=False
|
threshold=0.05, max_convex_hull=100, verbose=False
|
||||||
)
|
)
|
||||||
filename = f"{os.path.splitext(obj_name)[0]}_collision.ply"
|
filename = f"{os.path.splitext(obj_name)[0]}_collision.ply"
|
||||||
output_path = os.path.join(mesh_folder, filename)
|
output_path = os.path.join(mesh_folder, filename)
|
||||||
|
|||||||
@ -31,6 +31,7 @@ drender-cli = "embodied_gen.data.differentiable_render:entrypoint"
|
|||||||
backproject-cli = "embodied_gen.data.backproject_v2:entrypoint"
|
backproject-cli = "embodied_gen.data.backproject_v2:entrypoint"
|
||||||
img3d-cli = "embodied_gen.scripts.imageto3d:entrypoint"
|
img3d-cli = "embodied_gen.scripts.imageto3d:entrypoint"
|
||||||
text3d-cli = "embodied_gen.scripts.textto3d:text_to_3d"
|
text3d-cli = "embodied_gen.scripts.textto3d:text_to_3d"
|
||||||
|
texture-cli = "embodied_gen.scripts.gen_texture:entrypoint"
|
||||||
scene3d-cli = "embodied_gen.scripts.gen_scene3d:entrypoint"
|
scene3d-cli = "embodied_gen.scripts.gen_scene3d:entrypoint"
|
||||||
layout-cli = "embodied_gen.scripts.gen_layout:entrypoint"
|
layout-cli = "embodied_gen.scripts.gen_layout:entrypoint"
|
||||||
sim-cli = "embodied_gen.scripts.simulate_sapien:entrypoint"
|
sim-cli = "embodied_gen.scripts.simulate_sapien:entrypoint"
|
||||||
|
|||||||
@ -142,6 +142,7 @@ def test_semantic_checker(semantic_checker):
|
|||||||
("desk", "outputs/utest_cases/semantic_checker/task_0016_desk.png"),
|
("desk", "outputs/utest_cases/semantic_checker/task_0016_desk.png"),
|
||||||
("shelf", "outputs/utest_cases/semantic_checker/task_0018_shelf.png"),
|
("shelf", "outputs/utest_cases/semantic_checker/task_0018_shelf.png"),
|
||||||
("table", "outputs/utest_cases/semantic_checker/task_0000_table.png"),
|
("table", "outputs/utest_cases/semantic_checker/task_0000_table.png"),
|
||||||
|
("pen", "outputs/layouts_gens2/task_0000/images/pen_raw.png"),
|
||||||
]
|
]
|
||||||
for test_case in test_cases:
|
for test_case in test_cases:
|
||||||
flag, result = semantic_checker(
|
flag, result = semantic_checker(
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user