🐛 fix txt bug

2025-09-23 11:12:14 +08:00 · 2025-09-23 11:12:14 +08:00 · e8ed31d335
commit e8ed31d335
parent 73ce51c611
2 changed files with 32 additions and 12 deletions
--- a/ops_mm_embedding_v1.py
+++ b/ops_mm_embedding_v1.py
@ -22,6 +22,8 @@ class OpsMMEmbeddingV1(nn.Module):
        load_in_4bit: bool = False,
        load_in_8bit: bool = False,
        torch_dtype: Optional[torch.dtype] = torch.bfloat16,
+        processor_min_pixels: int = 128 * 28 * 28,
+        processor_max_pixels: int = 512 * 28 * 28,
    ):
        super().__init__()
        self.device = device
@ -49,7 +51,12 @@ class OpsMMEmbeddingV1(nn.Module):
        if device_map is None:
            self.base_model = self.base_model.to(self.device)

-        self.processor = AutoProcessor.from_pretrained(model_name, min_pixels=256 * 28 * 28, max_pixels=1280 * 28 * 28)
+        # Use configurable pixel limits to control VRAM usage
+        self.processor = AutoProcessor.from_pretrained(
+            model_name,
+            min_pixels=processor_min_pixels,
+            max_pixels=processor_max_pixels,
+        )
        self.processor.tokenizer.padding_side = "left"
        self.eval()

@ -139,17 +146,27 @@ class OpsMMEmbeddingV1(nn.Module):
            input_texts.append(msg)
            input_images.append(processed_image)

-        # Only pass to processor if we actually have images
-        processed_images = input_images if any(img is not None for img in input_images) else None
-
-        inputs = self.processor(
-            text=input_texts,
-            images=processed_images,
-            padding=True,
-            truncation=True,
-            max_length=self.max_length,
-            return_tensors="pt",
-        )
+        # Only pass images when present; some processors expect paired inputs and
+        # can raise unpack errors if we pass images=None with multi-modal processor.
+        has_images = any(img is not None for img in input_images)
+        if has_images:
+            processed_images = input_images
+            inputs = self.processor(
+                text=input_texts,
+                images=processed_images,
+                padding=True,
+                truncation=True,
+                max_length=self.max_length,
+                return_tensors="pt",
+            )
+        else:
+            inputs = self.processor(
+                text=input_texts,
+                padding=True,
+                truncation=True,
+                max_length=self.max_length,
+                return_tensors="pt",
+            )
        inputs = {k: v.to(self.device) for k, v in inputs.items()}

        with torch.inference_mode():
--- a/run_server.sh
+++ b/run_server.sh
@ -7,5 +7,8 @@ export CUDA_VISIBLE_DEVICES=0,1
 # Unbuffered stdout for real-time logs
 export PYTHONUNBUFFERED=1

+# Help PyTorch allocator avoid fragmentation (see OOM hint)
+export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
+
 # Start the local web app
 exec python3 web_app_local.py "$@"