Fix check_copies: sync retrieve_timesteps and drop unsupported Copied from tags

akshan-main · akshan-main · commit a972af0cbedd · 2026-04-07T18:11:26.000-07:00
diff --git a/src/diffusers/modular_pipelines/ltx/before_denoise.py b/src/diffusers/modular_pipelines/ltx/before_denoise.py
@@ -50,17 +50,47 @@ def retrieve_timesteps(
     sigmas: list[float] | None = None,
     **kwargs,
 ):
+    r"""
+    Calls the scheduler's `set_timesteps` method and retrieves timesteps from the scheduler after the call. Handles
+    custom timesteps. Any kwargs will be supplied to `scheduler.set_timesteps`.
+
+    Args:
+        scheduler (`SchedulerMixin`):
+            The scheduler to get timesteps from.
+        num_inference_steps (`int`):
+            The number of diffusion steps used when generating samples with a pre-trained model. If used, `timesteps`
+            must be `None`.
+        device (`str` or `torch.device`, *optional*):
+            The device to which the timesteps should be moved to. If `None`, the timesteps are not moved.
+        timesteps (`list[int]`, *optional*):
+            Custom timesteps used to override the timestep spacing strategy of the scheduler. If `timesteps` is passed,
+            `num_inference_steps` and `sigmas` must be `None`.
+        sigmas (`list[float]`, *optional*):
+            Custom sigmas used to override the timestep spacing strategy of the scheduler. If `sigmas` is passed,
+            `num_inference_steps` and `timesteps` must be `None`.
+
+    Returns:
+        `tuple[torch.Tensor, int]`: A tuple where the first element is the timestep schedule from the scheduler and the
+        second element is the number of inference steps.
+    """
     if timesteps is not None and sigmas is not None:
-        raise ValueError("Only one of `timesteps` or `sigmas` can be passed.")
+        raise ValueError("Only one of `timesteps` or `sigmas` can be passed. Please choose one to set custom values")
     if timesteps is not None:
+        accepts_timesteps = "timesteps" in set(inspect.signature(scheduler.set_timesteps).parameters.keys())
+        if not accepts_timesteps:
+            raise ValueError(
+                f"The current scheduler class {scheduler.__class__}'s `set_timesteps` does not support custom"
+                f" timestep schedules. Please check whether you are using the correct scheduler."
+            )
         scheduler.set_timesteps(timesteps=timesteps, device=device, **kwargs)
         timesteps = scheduler.timesteps
         num_inference_steps = len(timesteps)
     elif sigmas is not None:
         accept_sigmas = "sigmas" in set(inspect.signature(scheduler.set_timesteps).parameters.keys())
         if not accept_sigmas:
             raise ValueError(
-                f"The current scheduler class {scheduler.__class__}'s `set_timesteps` does not support custom sigmas."
+                f"The current scheduler class {scheduler.__class__}'s `set_timesteps` does not support custom"
+                f" sigmas schedules. Please check whether you are using the correct scheduler."
             )
         scheduler.set_timesteps(sigmas=sigmas, device=device, **kwargs)
         timesteps = scheduler.timesteps
@@ -71,14 +101,11 @@ def retrieve_timesteps(
     return timesteps, num_inference_steps
 
 
-# Copied from diffusers.pipelines.ltx.pipeline_ltx.LTXPipeline._pack_latents
 def _pack_latents(latents: torch.Tensor, patch_size: int = 1, patch_size_t: int = 1) -> torch.Tensor:
-    # Unpacked latents of shape are [B, C, F, H, W] are patched into tokens of shape
-    # [B, C, F // p_t, p_t, H // p, p, W // p, p].
+    # Unpacked latents of shape are [B, C, F, H, W] are patched into tokens of shape [B, C, F // p_t, p_t, H // p, p, W // p, p].
     # The patch dimensions are then permuted and collapsed into the channel dimension of shape:
     # [B, F // p_t * H // p * W // p, C * p_t * p * p] (an ndim=3 tensor).
-    # dim=0 is the batch size, dim=1 is the effective video sequence length,
-    # dim=2 is the effective number of input features
+    # dim=0 is the batch size, dim=1 is the effective video sequence length, dim=2 is the effective number of input features
     batch_size, num_channels, num_frames, height, width = latents.shape
     post_patch_num_frames = num_frames // patch_size_t
     post_patch_height = height // patch_size
@@ -97,7 +124,6 @@ def _pack_latents(latents: torch.Tensor, patch_size: int = 1, patch_size_t: int
     return latents
 
 
-# Copied from diffusers.pipelines.ltx.pipeline_ltx.LTXPipeline._normalize_latents
 def _normalize_latents(
     latents: torch.Tensor, latents_mean: torch.Tensor, latents_std: torch.Tensor, scaling_factor: float = 1.0
 ) -> torch.Tensor:
diff --git a/src/diffusers/modular_pipelines/ltx/decoders.py b/src/diffusers/modular_pipelines/ltx/decoders.py
@@ -28,7 +28,6 @@
 logger = logging.get_logger(__name__)
 
 
-# Copied from diffusers.pipelines.ltx.pipeline_ltx.LTXPipeline._unpack_latents
 def _unpack_latents(
     latents: torch.Tensor, num_frames: int, height: int, width: int, patch_size: int = 1, patch_size_t: int = 1
 ) -> torch.Tensor:
@@ -42,7 +41,6 @@ def _unpack_latents(
     return latents
 
 
-# Copied from diffusers.pipelines.ltx.pipeline_ltx.LTXPipeline._denormalize_latents
 def _denormalize_latents(
     latents: torch.Tensor, latents_mean: torch.Tensor, latents_std: torch.Tensor, scaling_factor: float = 1.0
 ) -> torch.Tensor:
diff --git a/src/diffusers/modular_pipelines/ltx/denoise.py b/src/diffusers/modular_pipelines/ltx/denoise.py
@@ -34,7 +34,6 @@
 logger = logging.get_logger(__name__)
 
 
-# Copied from diffusers.pipelines.ltx.pipeline_ltx.LTXPipeline._pack_latents
 def _pack_latents(latents: torch.Tensor, patch_size: int = 1, patch_size_t: int = 1) -> torch.Tensor:
     # Unpacked latents of shape are [B, C, F, H, W] are patched into tokens of shape
     # [B, C, F // p_t, p_t, H // p, p, W // p, p].
@@ -60,7 +59,6 @@ def _pack_latents(latents: torch.Tensor, patch_size: int = 1, patch_size_t: int
     return latents
 
 
-# Copied from diffusers.pipelines.ltx.pipeline_ltx.LTXPipeline._unpack_latents
 def _unpack_latents(
     latents: torch.Tensor, num_frames: int, height: int, width: int, patch_size: int = 1, patch_size_t: int = 1
 ) -> torch.Tensor:
diff --git a/src/diffusers/modular_pipelines/ltx/encoders.py b/src/diffusers/modular_pipelines/ltx/encoders.py
@@ -186,7 +186,6 @@ def retrieve_latents(
         raise AttributeError("Could not access latents of provided encoder_output")
 
 
-# Copied from diffusers.pipelines.ltx.pipeline_ltx.LTXPipeline._normalize_latents
 def _normalize_latents(
     latents: torch.Tensor, latents_mean: torch.Tensor, latents_std: torch.Tensor, scaling_factor: float = 1.0
 ) -> torch.Tensor: