Fix IndexError in sdpa_mask and flex_attention_mask for 0D tensors during ONNX export

Open Source Contributor · Open Source Contributor · commit 587ec721078c · 2026-05-01T19:10:02.000-06:00
Fix for Issue #45735 When torch.onnx.export is called with ModernBERT, cache_position can be passed as a 0-dimensional tensor (scalar), causing IndexError when accessing cache_position.shape[0] or cache_position[0]. This fix handles the 0D tensor case by unsqueezing to 1D before extracting shape and offset information.
diff --git a/src/transformers/masking_utils.py b/src/transformers/masking_utils.py
@@ -477,6 +477,8 @@ def sdpa_mask(
     ```
 
     """
+    if cache_position.ndim == 0:
+        cache_position = cache_position.unsqueeze(0)
     q_length = cache_position.shape[0]
 
     # Potentially pad the 2D mask
@@ -660,6 +662,8 @@ def flex_attention_mask(
         attention_mask (`torch.Tensor`, optional):
             The 2D attention mask corresponding to padded tokens of shape (batch_size, number_of_seen_tokens+q_length)
     """
+    if cache_position.ndim == 0:
+        cache_position = cache_position.unsqueeze(0)
     q_length, q_offset = cache_position.shape[0], cache_position[0]
 
     # Potentially add the padding 2D mask