🐛 fix txt bug

This commit is contained in:
eust-w 2025-09-23 11:12:14 +08:00
parent 73ce51c611
commit e8ed31d335
2 changed files with 32 additions and 12 deletions

View File

@ -22,6 +22,8 @@ class OpsMMEmbeddingV1(nn.Module):
load_in_4bit: bool = False,
load_in_8bit: bool = False,
torch_dtype: Optional[torch.dtype] = torch.bfloat16,
processor_min_pixels: int = 128 * 28 * 28,
processor_max_pixels: int = 512 * 28 * 28,
):
super().__init__()
self.device = device
@ -49,7 +51,12 @@ class OpsMMEmbeddingV1(nn.Module):
if device_map is None:
self.base_model = self.base_model.to(self.device)
self.processor = AutoProcessor.from_pretrained(model_name, min_pixels=256 * 28 * 28, max_pixels=1280 * 28 * 28)
# Use configurable pixel limits to control VRAM usage
self.processor = AutoProcessor.from_pretrained(
model_name,
min_pixels=processor_min_pixels,
max_pixels=processor_max_pixels,
)
self.processor.tokenizer.padding_side = "left"
self.eval()
@ -139,9 +146,11 @@ class OpsMMEmbeddingV1(nn.Module):
input_texts.append(msg)
input_images.append(processed_image)
# Only pass to processor if we actually have images
processed_images = input_images if any(img is not None for img in input_images) else None
# Only pass images when present; some processors expect paired inputs and
# can raise unpack errors if we pass images=None with multi-modal processor.
has_images = any(img is not None for img in input_images)
if has_images:
processed_images = input_images
inputs = self.processor(
text=input_texts,
images=processed_images,
@ -150,6 +159,14 @@ class OpsMMEmbeddingV1(nn.Module):
max_length=self.max_length,
return_tensors="pt",
)
else:
inputs = self.processor(
text=input_texts,
padding=True,
truncation=True,
max_length=self.max_length,
return_tensors="pt",
)
inputs = {k: v.to(self.device) for k, v in inputs.items()}
with torch.inference_mode():

View File

@ -7,5 +7,8 @@ export CUDA_VISIBLE_DEVICES=0,1
# Unbuffered stdout for real-time logs
export PYTHONUNBUFFERED=1
# Help PyTorch allocator avoid fragmentation (see OOM hint)
export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
# Start the local web app
exec python3 web_app_local.py "$@"