🐛 fix txt bug
This commit is contained in:
parent
73ce51c611
commit
e8ed31d335
@ -22,6 +22,8 @@ class OpsMMEmbeddingV1(nn.Module):
|
||||
load_in_4bit: bool = False,
|
||||
load_in_8bit: bool = False,
|
||||
torch_dtype: Optional[torch.dtype] = torch.bfloat16,
|
||||
processor_min_pixels: int = 128 * 28 * 28,
|
||||
processor_max_pixels: int = 512 * 28 * 28,
|
||||
):
|
||||
super().__init__()
|
||||
self.device = device
|
||||
@ -49,7 +51,12 @@ class OpsMMEmbeddingV1(nn.Module):
|
||||
if device_map is None:
|
||||
self.base_model = self.base_model.to(self.device)
|
||||
|
||||
self.processor = AutoProcessor.from_pretrained(model_name, min_pixels=256 * 28 * 28, max_pixels=1280 * 28 * 28)
|
||||
# Use configurable pixel limits to control VRAM usage
|
||||
self.processor = AutoProcessor.from_pretrained(
|
||||
model_name,
|
||||
min_pixels=processor_min_pixels,
|
||||
max_pixels=processor_max_pixels,
|
||||
)
|
||||
self.processor.tokenizer.padding_side = "left"
|
||||
self.eval()
|
||||
|
||||
@ -139,17 +146,27 @@ class OpsMMEmbeddingV1(nn.Module):
|
||||
input_texts.append(msg)
|
||||
input_images.append(processed_image)
|
||||
|
||||
# Only pass to processor if we actually have images
|
||||
processed_images = input_images if any(img is not None for img in input_images) else None
|
||||
|
||||
inputs = self.processor(
|
||||
text=input_texts,
|
||||
images=processed_images,
|
||||
padding=True,
|
||||
truncation=True,
|
||||
max_length=self.max_length,
|
||||
return_tensors="pt",
|
||||
)
|
||||
# Only pass images when present; some processors expect paired inputs and
|
||||
# can raise unpack errors if we pass images=None with multi-modal processor.
|
||||
has_images = any(img is not None for img in input_images)
|
||||
if has_images:
|
||||
processed_images = input_images
|
||||
inputs = self.processor(
|
||||
text=input_texts,
|
||||
images=processed_images,
|
||||
padding=True,
|
||||
truncation=True,
|
||||
max_length=self.max_length,
|
||||
return_tensors="pt",
|
||||
)
|
||||
else:
|
||||
inputs = self.processor(
|
||||
text=input_texts,
|
||||
padding=True,
|
||||
truncation=True,
|
||||
max_length=self.max_length,
|
||||
return_tensors="pt",
|
||||
)
|
||||
inputs = {k: v.to(self.device) for k, v in inputs.items()}
|
||||
|
||||
with torch.inference_mode():
|
||||
|
||||
@ -7,5 +7,8 @@ export CUDA_VISIBLE_DEVICES=0,1
|
||||
# Unbuffered stdout for real-time logs
|
||||
export PYTHONUNBUFFERED=1
|
||||
|
||||
# Help PyTorch allocator avoid fragmentation (see OOM hint)
|
||||
export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
|
||||
|
||||
# Start the local web app
|
||||
exec python3 web_app_local.py "$@"
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user