From 5f810f3574edae94dd51d8eb200d64d92fe456a7 Mon Sep 17 00:00:00 2001 From: Xinjie Date: Fri, 13 Jun 2025 01:09:49 +0800 Subject: [PATCH] chore(model): Put image_encoder to cuda to adapt to hf zero-gpu. (#7) chore(model): Put image_encoder to cuda to adapt to hf zero-gpu. --- README.md | 29 ++++----------- embodied_gen/models/text_model.py | 1 + install.sh | 59 +++++++++++++++++++++++++++++++ 3 files changed, 67 insertions(+), 22 deletions(-) create mode 100644 install.sh diff --git a/README.md b/README.md index 27d4b4b..9983225 100644 --- a/README.md +++ b/README.md @@ -51,7 +51,7 @@ You can choose between two backends for the GPT agent: [![🤗 Hugging Face](https://img.shields.io/badge/🤗-Image_to_3D_Demo-blue)](https://huggingface.co/spaces/HorizonRobotics/EmbodiedGen-Image-to-3D) Generate physically plausible 3D asset from input image. -### Local Service +### Service Run the image-to-3D generation service locally. The first run will download required models. ```sh @@ -61,7 +61,7 @@ python apps/image_to_3d.py CUDA_VISIBLE_DEVICES=0 nohup python apps/image_to_3d.py > /dev/null 2>&1 & ``` -### Local API +### API Generate a 3D model from an image using the command-line API. ```sh @@ -79,14 +79,14 @@ python3 embodied_gen/scripts/imageto3d.py \ [![🤗 Hugging Face](https://img.shields.io/badge/🤗-Text_to_3D_Demo-blue)](https://huggingface.co/spaces/HorizonRobotics/EmbodiedGen-Text-to-3D) Create 3D assets from text descriptions for a wide range of geometry and styles. -### Local Service +### Service Run the text-to-3D generation service locally. ```sh python apps/text_to_3d.py ``` -### Local API +### API ```sh bash embodied_gen/scripts/textto3d.sh \ @@ -101,14 +101,14 @@ bash embodied_gen/scripts/textto3d.sh \ [![🤗 Hugging Face](https://img.shields.io/badge/🤗-Texture_Gen_Demo-blue)](https://huggingface.co/spaces/HorizonRobotics/EmbodiedGen-Texture-Gen) Generate visually rich textures for 3D mesh. -### Local Service +### Service Run the texture generation service locally. ```sh python apps/texture_edit.py ``` -### Local API +### API Generate textures for a 3D mesh using a text prompt. ```sh @@ -154,23 +154,8 @@ Coming Soon ## 🙌 Acknowledgement EmbodiedGen builds upon the following amazing projects and models: +🌟 [Trellis](https://github.com/microsoft/TRELLIS) | 🌟 [Hunyuan-Delight](https://huggingface.co/tencent/Hunyuan3D-2/tree/main/hunyuan3d-delight-v2-0) | 🌟 [Segment Anything](https://github.com/facebookresearch/segment-anything) | 🌟 [Rembg](https://github.com/danielgatis/rembg) | 🌟 [RMBG-1.4](https://huggingface.co/briaai/RMBG-1.4) | 🌟 [Stable Diffusion x4](https://huggingface.co/stabilityai/stable-diffusion-x4-upscaler) | 🌟 [Real-ESRGAN](https://github.com/xinntao/Real-ESRGAN) | 🌟 [Kolors](https://github.com/Kwai-Kolors/Kolors) | 🌟 [ChatGLM3](https://github.com/THUDM/ChatGLM3) | 🌟 [Aesthetic Score](http://captions.christoph-schuhmann.de/aesthetic_viz_laion_sac+logos+ava1-l14-linearMSE-en-2.37B.html) | 🌟 [Pano2Room](https://github.com/TrickyGo/Pano2Room) | 🌟 [Diffusion360](https://github.com/ArcherFMY/SD-T2I-360PanoImage) | 🌟 [Kaolin](https://github.com/NVIDIAGameWorks/kaolin) | 🌟 [diffusers](https://github.com/huggingface/diffusers) | 🌟 [gsplat](https://github.com/nerfstudio-project/gsplat) | 🌟 GPT: QWEN2.5VL, GPT4o -- 🌟 [Trellis](https://github.com/microsoft/TRELLIS) -- 🌟 [Hunyuan-Delight](https://huggingface.co/tencent/Hunyuan3D-2/tree/main/hunyuan3d-delight-v2-0) -- 🌟 [Segment Anything Model](https://github.com/facebookresearch/segment-anything) -- 🌟 [Rembg: a tool to remove images background](https://github.com/danielgatis/rembg) -- 🌟 [RMBG-1.4: BRIA Background Removal](https://huggingface.co/briaai/RMBG-1.4) -- 🌟 [stable-diffusion-x4-upscaler](https://huggingface.co/stabilityai/stable-diffusion-x4-upscaler) -- 🌟 [Real-ESRGAN](https://github.com/xinntao/Real-ESRGAN) -- 🌟 [Kolors](https://github.com/Kwai-Kolors/Kolors) -- 🌟 [ChatGLM3](https://github.com/THUDM/ChatGLM3) -- 🌟 [Aesthetic Score Model](http://captions.christoph-schuhmann.de/aesthetic_viz_laion_sac+logos+ava1-l14-linearMSE-en-2.37B.html) -- 🌟 [Pano2Room](https://github.com/TrickyGo/Pano2Room) -- 🌟 [Diffusion360](https://github.com/ArcherFMY/SD-T2I-360PanoImage) -- 🌟 [kaolin](https://github.com/NVIDIAGameWorks/kaolin) -- 🌟 [diffusers](https://github.com/huggingface/diffusers) -- 🌟 [gsplat](https://github.com/nerfstudio-project/gsplat) -- 🌟 GPT: QWEN2.5VL, GPT4o --- diff --git a/embodied_gen/models/text_model.py b/embodied_gen/models/text_model.py index 109762b..16857de 100644 --- a/embodied_gen/models/text_model.py +++ b/embodied_gen/models/text_model.py @@ -94,6 +94,7 @@ def build_text2img_ip_pipeline( pipe.set_ip_adapter_scale([ref_scale]) pipe = pipe.to(device) + pipe.image_encoder = pipe.image_encoder.to(device) pipe.enable_model_cpu_offload() # pipe.enable_xformers_memory_efficient_attention() # pipe.enable_vae_slicing() diff --git a/install.sh b/install.sh new file mode 100644 index 0000000..535d847 --- /dev/null +++ b/install.sh @@ -0,0 +1,59 @@ +#!/bin/bash +set -e + +RED='\033[0;31m' +GREEN='\033[0;32m' +NC='\033[0m' + +echo -e "${GREEN}Starting installation process...${NC}" +git config --global http.postBuffer 524288000 + +echo -e "${GREEN}Installing dependencies from requirements.txt...${NC}" +pip install -r requirements.txt --use-deprecated=legacy-resolver --default-timeout=60 || { + echo -e "${RED}Failed to install requirements${NC}" + exit 1 +} + + +echo -e "${GREEN}Installing kaolin from GitHub...${NC}" +pip install kaolin@git+https://github.com/NVIDIAGameWorks/kaolin.git@v0.16.0 || { + echo -e "${RED}Failed to install kaolin${NC}" + exit 1 +} + + +echo -e "${GREEN}Installing flash-attn...${NC}" +pip install flash-attn==2.7.0.post2 --no-build-isolation || { + echo -e "${RED}Failed to install flash-attn${NC}" + exit 1 +} + + +echo -e "${GREEN}Installing diff-gaussian-rasterization...${NC}" +TMP_DIR="/tmp/mip-splatting" +rm -rf "$TMP_DIR" +git clone --recursive https://github.com/autonomousvision/mip-splatting.git "$TMP_DIR" && \ +pip install "$TMP_DIR/submodules/diff-gaussian-rasterization" && \ +rm -rf "$TMP_DIR" || { + echo -e "${RED}Failed to clone or install diff-gaussian-rasterization${NC}" + rm -rf "$TMP_DIR" + exit 1 +} +echo -e "${GREEN}Installation completed successfully!${NC}" + + +echo -e "${GREEN}Installing gsplat from GitHub...${NC}" +pip install git+https://github.com/nerfstudio-project/gsplat.git@v1.5.0 || { + echo -e "${RED}Failed to install gsplat${NC}" + exit 1 +} + + +echo -e "${GREEN}Installing EmbodiedGen...${NC}" +pip install -e . || { + echo -e "${RED}Failed to install local package${NC}" + exit 1 +} + +echo -e "${GREEN}Installation completed successfully!${NC}" +