🐛 fix txt bug

2025-09-23 11:12:14 +08:00 · 2025-09-23 11:12:14 +08:00 · e8ed31d335
commit e8ed31d335
parent 73ce51c611
2 changed files with 32 additions and 12 deletions
--- a/ops_mm_embedding_v1.py
+++ b/ops_mm_embedding_v1.py
@ -22,6 +22,8 @@ class OpsMMEmbeddingV1(nn.Module):
        load_in_4bit: bool = False,
        load_in_8bit: bool = False,
        torch_dtype: Optional[torch.dtype] = torch.bfloat16,
        processor_min_pixels: int = 128 * 28 * 28,
        processor_max_pixels: int = 512 * 28 * 28,
    ):
        super().__init__()
        self.device = device
@ -49,7 +51,12 @@ class OpsMMEmbeddingV1(nn.Module):
        if device_map is None:
            self.base_model = self.base_model.to(self.device)
-        self.processor = AutoProcessor.from_pretrained(model_name, min_pixels=256 * 28 * 28, max_pixels=1280 * 28 * 28)
+        # Use configurable pixel limits to control VRAM usage
        self.processor = AutoProcessor.from_pretrained(
            model_name,
            min_pixels=processor_min_pixels,
            max_pixels=processor_max_pixels,
        )
        self.processor.tokenizer.padding_side = "left"
        self.eval()
@ -139,17 +146,27 @@ class OpsMMEmbeddingV1(nn.Module):
            input_texts.append(msg)
            input_images.append(processed_image)
-        # Only pass to processor if we actually have images
+        # Only pass images when present; some processors expect paired inputs and
-        processed_images = input_images if any(img is not None for img in input_images) else None
+        # can raise unpack errors if we pass images=None with multi-modal processor.
-
+        has_images = any(img is not None for img in input_images)
-        inputs = self.processor(
+        if has_images:
-            text=input_texts,
+            processed_images = input_images
-            images=processed_images,
+            inputs = self.processor(
-            padding=True,
+                text=input_texts,
-            truncation=True,
+                images=processed_images,
-            max_length=self.max_length,
+                padding=True,
-            return_tensors="pt",
+                truncation=True,
-        )
+                max_length=self.max_length,
                return_tensors="pt",
            )
        else:
            inputs = self.processor(
                text=input_texts,
                padding=True,
                truncation=True,
                max_length=self.max_length,
                return_tensors="pt",
            )
        inputs = {k: v.to(self.device) for k, v in inputs.items()}
        with torch.inference_mode():
--- a/run_server.sh
+++ b/run_server.sh
@ -7,5 +7,8 @@ export CUDA_VISIBLE_DEVICES=0,1
 # Unbuffered stdout for real-time logs
 export PYTHONUNBUFFERED=1
 # Help PyTorch allocator avoid fragmentation (see OOM hint)
 export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
 # Start the local web app
 exec python3 web_app_local.py "$@"