Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 33 additions & 19 deletions ppdiffusers/examples/Open-Sora/dataset/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from collections.abc import Sequence

import cv2
import imageio
import numpy as np
import paddle
import pandas as pd
Expand Down Expand Up @@ -247,40 +248,53 @@ def read_from_path(path, image_size, transform_name="center"):
assert ext.lower() in IMG_EXTENSIONS, f"Unsupported file format: {ext}"
return read_image_from_path(path, image_size=image_size, transform_name=transform_name)


def save_sample(x, fps=8, save_path=None, normalize=True, value_range=(-1.0, 1.0)):
"""
Saves a video sample from a tensor without using OpenCV.

Args:
x (Tensor): shape [C, T, H, W]
x (Tensor): Tensor of shape [C, T, H, W].
fps (int, optional): Frames per second for the saved video. Defaults to 8.
save_path (str, optional): Path to save the video. If None, a default path is used.
normalize (bool, optional): Whether to normalize the tensor values. Defaults to True.
value_range (tuple, optional): Tuple specifying the (min, max) range for normalization. Defaults to (-1.0, 1.0).

Returns:
str: The path where the video is saved.
"""
assert x.ndim == 4
assert x.ndim == 4, f"Expected tensor with 4 dimensions [C, T, H, W], but got {x.ndim} dimensions."

if save_path is None:
raise ValueError("save_path must be provided.")

save_path += ".mp4"
if normalize:

if normalize:
low, high = paddle.to_tensor(value_range, dtype="float32")
x.clip_(min=low, max=high)
x.subtract_(low).divide_(max(high - low, 1e-5))
x = x.clip(min=low, max=high)
x = (x - low) / paddle.maximum(high - low, paddle.to_tensor(1e-5))

# Scale to [0, 255] and convert to uint8
video_data = (
x.multiply(y=paddle.to_tensor(255.0, dtype="float32"))
.add_(paddle.to_tensor(0.5, dtype="float32"))
.clip_(0, 255)
x.multiply(paddle.to_tensor(255.0, dtype="float32"))
.add(paddle.to_tensor(0.5, dtype="float32")) # For rounding
.clip(0, 255)
)
video_data = video_data.transpose([1, 2, 3, 0])
video_data = video_data.numpy()
video_data = video_data.astype(np.uint8)
video_data = video_data.numpy().astype(np.uint8)

frames, height, width, channels = video_data.shape

fourcc = cv2.VideoWriter_fourcc(*"mp4v")
out = cv2.VideoWriter(save_path, fourcc, fps, (width, height))

for i in range(frames):
frame = cv2.cvtColor(video_data[i], cv2.COLOR_RGB2BGR)
out.write(frame)
# Initialize the video writer using imageio
writer = imageio.get_writer(save_path, fps=fps, codec='libx264', format='mp4')

out.release()
try:
for i in range(frames):
frame = video_data[i]
# Ensure frame is in RGB format
writer.append_data(frame)
finally:
writer.close()

print(f"Saved to {save_path}")
return save_path
return save_path
4 changes: 3 additions & 1 deletion ppdiffusers/examples/Open-Sora/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,6 @@ pyarrow
pyav
tqdm
beartype
pandarallel
pandarallel
imageio
imageio-ffmpeg
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,7 @@ def __init__(self, *, betas: paddle.Tensor, model_mean_type: str, model_var_type
# Use float64 for accuracy.
self.betas = betas
assert len(self.betas.shape) == 1, "betas must be 1-D"
assert (self.betas > 0).all() and (self.betas <= 1).all()
assert (self.betas > 0).all() and (self.betas <= 1).all(), self.betas

self.num_timesteps = int(betas.shape[0])

Expand Down