🐛 fix txt bug
This commit is contained in:
parent
73ce51c611
commit
e8ed31d335
@ -22,6 +22,8 @@ class OpsMMEmbeddingV1(nn.Module):
|
|||||||
load_in_4bit: bool = False,
|
load_in_4bit: bool = False,
|
||||||
load_in_8bit: bool = False,
|
load_in_8bit: bool = False,
|
||||||
torch_dtype: Optional[torch.dtype] = torch.bfloat16,
|
torch_dtype: Optional[torch.dtype] = torch.bfloat16,
|
||||||
|
processor_min_pixels: int = 128 * 28 * 28,
|
||||||
|
processor_max_pixels: int = 512 * 28 * 28,
|
||||||
):
|
):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
self.device = device
|
self.device = device
|
||||||
@ -49,7 +51,12 @@ class OpsMMEmbeddingV1(nn.Module):
|
|||||||
if device_map is None:
|
if device_map is None:
|
||||||
self.base_model = self.base_model.to(self.device)
|
self.base_model = self.base_model.to(self.device)
|
||||||
|
|
||||||
self.processor = AutoProcessor.from_pretrained(model_name, min_pixels=256 * 28 * 28, max_pixels=1280 * 28 * 28)
|
# Use configurable pixel limits to control VRAM usage
|
||||||
|
self.processor = AutoProcessor.from_pretrained(
|
||||||
|
model_name,
|
||||||
|
min_pixels=processor_min_pixels,
|
||||||
|
max_pixels=processor_max_pixels,
|
||||||
|
)
|
||||||
self.processor.tokenizer.padding_side = "left"
|
self.processor.tokenizer.padding_side = "left"
|
||||||
self.eval()
|
self.eval()
|
||||||
|
|
||||||
@ -139,17 +146,27 @@ class OpsMMEmbeddingV1(nn.Module):
|
|||||||
input_texts.append(msg)
|
input_texts.append(msg)
|
||||||
input_images.append(processed_image)
|
input_images.append(processed_image)
|
||||||
|
|
||||||
# Only pass to processor if we actually have images
|
# Only pass images when present; some processors expect paired inputs and
|
||||||
processed_images = input_images if any(img is not None for img in input_images) else None
|
# can raise unpack errors if we pass images=None with multi-modal processor.
|
||||||
|
has_images = any(img is not None for img in input_images)
|
||||||
inputs = self.processor(
|
if has_images:
|
||||||
text=input_texts,
|
processed_images = input_images
|
||||||
images=processed_images,
|
inputs = self.processor(
|
||||||
padding=True,
|
text=input_texts,
|
||||||
truncation=True,
|
images=processed_images,
|
||||||
max_length=self.max_length,
|
padding=True,
|
||||||
return_tensors="pt",
|
truncation=True,
|
||||||
)
|
max_length=self.max_length,
|
||||||
|
return_tensors="pt",
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
inputs = self.processor(
|
||||||
|
text=input_texts,
|
||||||
|
padding=True,
|
||||||
|
truncation=True,
|
||||||
|
max_length=self.max_length,
|
||||||
|
return_tensors="pt",
|
||||||
|
)
|
||||||
inputs = {k: v.to(self.device) for k, v in inputs.items()}
|
inputs = {k: v.to(self.device) for k, v in inputs.items()}
|
||||||
|
|
||||||
with torch.inference_mode():
|
with torch.inference_mode():
|
||||||
|
|||||||
@ -7,5 +7,8 @@ export CUDA_VISIBLE_DEVICES=0,1
|
|||||||
# Unbuffered stdout for real-time logs
|
# Unbuffered stdout for real-time logs
|
||||||
export PYTHONUNBUFFERED=1
|
export PYTHONUNBUFFERED=1
|
||||||
|
|
||||||
|
# Help PyTorch allocator avoid fragmentation (see OOM hint)
|
||||||
|
export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
|
||||||
|
|
||||||
# Start the local web app
|
# Start the local web app
|
||||||
exec python3 web_app_local.py "$@"
|
exec python3 web_app_local.py "$@"
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user