[Bugfix] Fix image edit RoPE crash when explicit height/width are provided (#1265)

lishunyang12 · hsliuustc0106 · web-flow · commit 8a9644c8213b · 2026-02-10T21:18:36.000Z
Signed-off-by: lishunyang &lt;lishunyang12@163.com&gt;
Co-authored-by: Hongsheng Liu &lt;liuhongsheng4@huawei.com&gt;
diff --git a/examples/online_serving/image_to_image/openai_chat_client.py b/examples/online_serving/image_to_image/openai_chat_client.py
@@ -76,6 +76,10 @@ def edit_image(
 
     # Build extra_body with generation parameters
     extra_body = {}
+    if height is not None:
+        extra_body["height"] = height
+    if width is not None:
+        extra_body["width"] = width
     if steps is not None:
         extra_body["num_inference_steps"] = steps
     if guidance_scale is not None:
diff --git a/examples/online_serving/image_to_image/run_curl_image_edit.sh b/examples/online_serving/image_to_image/run_curl_image_edit.sh
@@ -19,26 +19,25 @@ if [[ ! -f "$INPUT_IMG" ]]; then
   exit 1
 fi
 
-IMG_B64=$(base64 -w0 "$INPUT_IMG")
+REQUEST_JSON_FILE=$(mktemp)
+trap 'rm -f "$REQUEST_JSON_FILE"' EXIT
 
-REQUEST_JSON=$(
-  jq -n --arg prompt "$PROMPT" --arg img "$IMG_B64" '{
+# Pipe base64 into jq via stdin to avoid ARG_MAX limit on large images
+base64 -w0 "$INPUT_IMG" \
+  | jq -Rs --arg prompt "$PROMPT" '{
     messages: [{
       role: "user",
       content: [
         {"type": "text", "text": $prompt},
-        {"type": "image_url", "image_url": {"url": ("data:image/png;base64," + $img)}}
+        {"type": "image_url", "image_url": {"url": ("data:image/png;base64," + .)}}
       ]
     }],
     extra_body: {
-      height: 1024,
-      width: 1024,
       num_inference_steps: 50,
       guidance_scale: 1,
       seed: 42
     }
-  }'
-)
+  }' > "$REQUEST_JSON_FILE"
 
 echo "Generating edited image..."
 echo "Server: $SERVER"
@@ -48,7 +47,7 @@ echo "Output: $OUTPUT"
 
 curl -s "$SERVER/v1/chat/completions" \
   -H "Content-Type: application/json" \
-  -d "$REQUEST_JSON" \
+  -d @"$REQUEST_JSON_FILE" \
   | jq -r '.choices[0].message.content[0].image_url.url' \
   | cut -d',' -f2 \
   | base64 -d > "$OUTPUT"
diff --git a/vllm_omni/diffusion/models/qwen_image/pipeline_qwen_image_edit.py b/vllm_omni/diffusion/models/qwen_image/pipeline_qwen_image_edit.py
@@ -110,9 +110,9 @@ def pre_process_func(
             if image is not None and not (
                 isinstance(image, torch.Tensor) and len(image.shape) > 1 and image.shape[1] == latent_channels
             ):
-                image = image_processor.resize(image, height, width)
+                image = image_processor.resize(image, calculated_height, calculated_width)
                 prompt_image = image
-                image = image_processor.preprocess(image, height, width)
+                image = image_processor.preprocess(image, calculated_height, calculated_width)
                 image = image.unsqueeze(2)
 
                 # Store preprocessed image and prompt image in request