From 366977c9be7c81f4dfd5ab6fefbc489697801c3c Mon Sep 17 00:00:00 2001 From: Bryce Date: Sat, 20 Jan 2024 09:37:49 -0800 Subject: [PATCH] refactor: rename some variables to be more precise noise => predicted_noise denoised_x => renoised_x (denoised seems inaccurate since it does have noise added) --- pyproject.toml | 2 +- .../foundationals/latent_diffusion/model.py | 8 +++++--- .../latent_diffusion/schedulers/ddim.py | 8 ++++---- .../latent_diffusion/schedulers/ddpm.py | 2 +- .../latent_diffusion/schedulers/dpm_solver.py | 4 ++-- .../latent_diffusion/schedulers/euler.py | 8 +++++--- .../latent_diffusion/schedulers/scheduler.py | 8 ++++---- .../training_utils/latent_diffusion.py | 4 ++-- tests/e2e/test_diffusion.py | 2 +- .../latent_diffusion/test_schedulers.py | 18 +++++++++--------- 10 files changed, 34 insertions(+), 30 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 0ca1f8b4e..43d0d13e3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -96,5 +96,5 @@ black = true [tool.pyright] include = ["src/refiners", "tests", "scripts"] strict = ["*"] -exclude = ["**/__pycache__"] +exclude = ["**/__pycache__", "tests/weights"] reportMissingTypeStubs = "warning" diff --git a/src/refiners/foundationals/latent_diffusion/model.py b/src/refiners/foundationals/latent_diffusion/model.py index 44ec1cc89..caa0cbb35 100644 --- a/src/refiners/foundationals/latent_diffusion/model.py +++ b/src/refiners/foundationals/latent_diffusion/model.py @@ -99,15 +99,17 @@ def forward( unconditional_prediction, conditional_prediction = self.unet(latents).chunk(2) # classifier-free guidance - noise = unconditional_prediction + condition_scale * (conditional_prediction - unconditional_prediction) + predicted_noise = unconditional_prediction + condition_scale * ( + conditional_prediction - unconditional_prediction + ) x = x.narrow(dim=1, start=0, length=4) # support > 4 channels for inpainting if self.has_self_attention_guidance(): - noise += self.compute_self_attention_guidance( + predicted_noise += self.compute_self_attention_guidance( x=x, noise=unconditional_prediction, step=step, clip_text_embedding=clip_text_embedding, **kwargs ) - return self.scheduler(x, noise=noise, step=step) + return self.scheduler(x, predicted_noise=predicted_noise, step=step) def structural_copy(self: TLatentDiffusionModel) -> TLatentDiffusionModel: return self.__class__( diff --git a/src/refiners/foundationals/latent_diffusion/schedulers/ddim.py b/src/refiners/foundationals/latent_diffusion/schedulers/ddim.py index 125dada1d..79d4fadcc 100644 --- a/src/refiners/foundationals/latent_diffusion/schedulers/ddim.py +++ b/src/refiners/foundationals/latent_diffusion/schedulers/ddim.py @@ -36,7 +36,7 @@ def _generate_timesteps(self) -> Tensor: timesteps = arange(start=0, end=self.num_inference_steps, step=1, device=self.device) * step_ratio + 1 return timesteps.flip(0) - def __call__(self, x: Tensor, noise: Tensor, step: int, generator: Generator | None = None) -> Tensor: + def __call__(self, x: Tensor, predicted_noise: Tensor, step: int, generator: Generator | None = None) -> Tensor: assert self.first_inference_step <= step < self.num_inference_steps, "invalid step {step}" timestep, previous_timestep = ( @@ -55,13 +55,13 @@ def __call__(self, x: Tensor, noise: Tensor, step: int, generator: Generator | N else self.cumulative_scale_factors[0] ), ) - predicted_x = (x - sqrt(1 - current_scale_factor**2) * noise) / current_scale_factor + predicted_x = (x - sqrt(1 - current_scale_factor**2) * predicted_noise) / current_scale_factor noise_factor = sqrt(1 - previous_scale_factor**2) # Do not add noise at the last step to avoid visual artifacts. if step == self.num_inference_steps - 1: noise_factor = 0 - denoised_x = previous_scale_factor * predicted_x + noise_factor * noise + renoised_x = previous_scale_factor * predicted_x + noise_factor * predicted_noise - return denoised_x + return renoised_x diff --git a/src/refiners/foundationals/latent_diffusion/schedulers/ddpm.py b/src/refiners/foundationals/latent_diffusion/schedulers/ddpm.py index 2fb59529b..9293b158c 100644 --- a/src/refiners/foundationals/latent_diffusion/schedulers/ddpm.py +++ b/src/refiners/foundationals/latent_diffusion/schedulers/ddpm.py @@ -33,5 +33,5 @@ def _generate_timesteps(self) -> Tensor: timesteps = arange(start=0, end=self.num_inference_steps, step=1, device=self.device) * step_ratio return timesteps.flip(0) - def __call__(self, x: Tensor, noise: Tensor, step: int, generator: Generator | None = None) -> Tensor: + def __call__(self, x: Tensor, predicted_noise: Tensor, step: int, generator: Generator | None = None) -> Tensor: raise NotImplementedError diff --git a/src/refiners/foundationals/latent_diffusion/schedulers/dpm_solver.py b/src/refiners/foundationals/latent_diffusion/schedulers/dpm_solver.py index 0711fd9ed..a75b1c799 100644 --- a/src/refiners/foundationals/latent_diffusion/schedulers/dpm_solver.py +++ b/src/refiners/foundationals/latent_diffusion/schedulers/dpm_solver.py @@ -94,7 +94,7 @@ def multistep_dpm_solver_second_order_update(self, x: Tensor, step: int) -> Tens ) return denoised_x - def __call__(self, x: Tensor, noise: Tensor, step: int, generator: Generator | None = None) -> Tensor: + def __call__(self, x: Tensor, predicted_noise: Tensor, step: int, generator: Generator | None = None) -> Tensor: """ Represents one step of the backward diffusion process that iteratively denoises the input data `x`. @@ -106,7 +106,7 @@ def __call__(self, x: Tensor, noise: Tensor, step: int, generator: Generator | N current_timestep = self.timesteps[step] scale_factor, noise_ratio = self.cumulative_scale_factors[current_timestep], self.noise_std[current_timestep] - estimated_denoised_data = (x - noise_ratio * noise) / scale_factor + estimated_denoised_data = (x - noise_ratio * predicted_noise) / scale_factor self.estimated_data.append(estimated_denoised_data) if step == self.first_inference_step or (self.last_step_first_order and step == self.num_inference_steps - 1): diff --git a/src/refiners/foundationals/latent_diffusion/schedulers/euler.py b/src/refiners/foundationals/latent_diffusion/schedulers/euler.py index 28c7c2bc5..312c9bf39 100644 --- a/src/refiners/foundationals/latent_diffusion/schedulers/euler.py +++ b/src/refiners/foundationals/latent_diffusion/schedulers/euler.py @@ -58,7 +58,7 @@ def scale_model_input(self, x: Tensor, step: int) -> Tensor: def __call__( self, x: Tensor, - noise: Tensor, + predicted_noise: Tensor, step: int, generator: Generator | None = None, s_churn: float = 0.0, @@ -72,13 +72,15 @@ def __call__( gamma = min(s_churn / (len(self.sigmas) - 1), 2**0.5 - 1) if s_tmin <= sigma <= s_tmax else 0 - alt_noise = torch.randn(noise.shape, generator=generator, device=noise.device, dtype=noise.dtype) + alt_noise = torch.randn( + predicted_noise.shape, generator=generator, device=predicted_noise.device, dtype=predicted_noise.dtype + ) eps = alt_noise * s_noise sigma_hat = sigma * (gamma + 1) if gamma > 0: x = x + eps * (sigma_hat**2 - sigma**2) ** 0.5 - predicted_x = x - sigma_hat * noise + predicted_x = x - sigma_hat * predicted_noise # 1st order Euler derivative = (x - predicted_x) / sigma_hat diff --git a/src/refiners/foundationals/latent_diffusion/schedulers/scheduler.py b/src/refiners/foundationals/latent_diffusion/schedulers/scheduler.py index 37f9bebf8..b4e3b3ba4 100644 --- a/src/refiners/foundationals/latent_diffusion/schedulers/scheduler.py +++ b/src/refiners/foundationals/latent_diffusion/schedulers/scheduler.py @@ -17,7 +17,7 @@ class Scheduler(ABC): """ A base class for creating a diffusion model scheduler. - The Scheduler creates a sequence of noise and scaling factors used in the diffusion process, + The Scheduler creates a sequence of predicted_noise and scaling factors used in the diffusion process, which gradually transforms the original data distribution into a Gaussian one. This process is described using several parameters such as initial and final diffusion rates, @@ -52,9 +52,9 @@ def __init__( self.timesteps = self._generate_timesteps() @abstractmethod - def __call__(self, x: Tensor, noise: Tensor, step: int, generator: Generator | None = None) -> Tensor: + def __call__(self, x: Tensor, predicted_noise: Tensor, step: int, generator: Generator | None = None) -> Tensor: """ - Applies a step of the diffusion process to the input tensor `x` using the provided `noise` and `timestep`. + Applies a step of the diffusion process to the input tensor `x` using the provided `predicted_noise` and `timestep`. This method should be overridden by subclasses to implement the specific diffusion process. """ @@ -104,7 +104,7 @@ def sample_noise_schedule(self) -> Tensor: case "karras": return 1 - self.sample_power_distribution(7) case _: - raise ValueError(f"Unknown noise schedule: {self.noise_schedule}") + raise ValueError(f"Unknown predicted_noise schedule: {self.noise_schedule}") def add_noise( self, diff --git a/src/refiners/training_utils/latent_diffusion.py b/src/refiners/training_utils/latent_diffusion.py index f4f8ccf0a..4faa8db8b 100644 --- a/src/refiners/training_utils/latent_diffusion.py +++ b/src/refiners/training_utils/latent_diffusion.py @@ -214,9 +214,9 @@ def sample_noise( dtype: DType | None = None, generator: Generator | None = None, ) -> Tensor: - """Sample noise from a normal distribution. + """Sample predicted_noise from a normal distribution. - If `offset_noise` is more than 0, the noise will be offset by a small amount. It allows the model to generate + If `offset_noise` is more than 0, the predicted_noise will be offset by a small amount. It allows the model to generate images with a wider range of contrast https://www.crosslabs.org/blog/diffusion-with-offset-noise. """ device = Device(device) diff --git a/tests/e2e/test_diffusion.py b/tests/e2e/test_diffusion.py index bb3ec4a66..3489a00e1 100644 --- a/tests/e2e/test_diffusion.py +++ b/tests/e2e/test_diffusion.py @@ -1127,7 +1127,7 @@ def test_diffusion_refonly( torch.randn(2, 4, 64, 64, device=test_device) # for SD Web UI reproductibility only predicted_image = sd15.lda.decode_latents(x) - # min_psnr lowered to 33 because this reference image was generated without noise removal (see #192) + # min_psnr lowered to 33 because this reference image was generated without predicted_noise removal (see #192) ensure_similar_images(predicted_image, expected_image_refonly, min_psnr=33, min_ssim=0.99) diff --git a/tests/foundationals/latent_diffusion/test_schedulers.py b/tests/foundationals/latent_diffusion/test_schedulers.py index 3737c267d..842335685 100644 --- a/tests/foundationals/latent_diffusion/test_schedulers.py +++ b/tests/foundationals/latent_diffusion/test_schedulers.py @@ -35,11 +35,11 @@ def test_dpm_solver_diffusers(n_steps: int, last_step_first_order: bool): refiners_scheduler = DPMSolver(num_inference_steps=n_steps, last_step_first_order=last_step_first_order) sample = randn(1, 3, 32, 32) - noise = randn(1, 3, 32, 32) + predicted_noise = randn(1, 3, 32, 32) for step, timestep in enumerate(diffusers_scheduler.timesteps): - diffusers_output = cast(Tensor, diffusers_scheduler.step(noise, timestep, sample).prev_sample) # type: ignore - refiners_output = refiners_scheduler(x=sample, noise=noise, step=step) + diffusers_output = cast(Tensor, diffusers_scheduler.step(predicted_noise, timestep, sample).prev_sample) # type: ignore + refiners_output = refiners_scheduler(x=sample, predicted_noise=predicted_noise, step=step) assert allclose(diffusers_output, refiners_output, rtol=0.01), f"outputs differ at step {step}" @@ -60,11 +60,11 @@ def test_ddim_diffusers(): refiners_scheduler = DDIM(num_inference_steps=30) sample = randn(1, 4, 32, 32) - noise = randn(1, 4, 32, 32) + predicted_noise = randn(1, 4, 32, 32) for step, timestep in enumerate(diffusers_scheduler.timesteps): - diffusers_output = cast(Tensor, diffusers_scheduler.step(noise, timestep, sample).prev_sample) # type: ignore - refiners_output = refiners_scheduler(x=sample, noise=noise, step=step) + diffusers_output = cast(Tensor, diffusers_scheduler.step(predicted_noise, timestep, sample).prev_sample) # type: ignore + refiners_output = refiners_scheduler(x=sample, predicted_noise=predicted_noise, step=step) assert allclose(diffusers_output, refiners_output, rtol=0.01), f"outputs differ at step {step}" @@ -86,15 +86,15 @@ def test_euler_diffusers(): refiners_scheduler = EulerScheduler(num_inference_steps=30) sample = randn(1, 4, 32, 32) - noise = randn(1, 4, 32, 32) + predicted_noise = randn(1, 4, 32, 32) ref_init_noise_sigma = diffusers_scheduler.init_noise_sigma # type: ignore assert isinstance(ref_init_noise_sigma, Tensor) assert isclose(ref_init_noise_sigma, refiners_scheduler.init_noise_sigma), "init_noise_sigma differ" for step, timestep in enumerate(diffusers_scheduler.timesteps): - diffusers_output = cast(Tensor, diffusers_scheduler.step(noise, timestep, sample).prev_sample) # type: ignore - refiners_output = refiners_scheduler(x=sample, noise=noise, step=step) + diffusers_output = cast(Tensor, diffusers_scheduler.step(predicted_noise, timestep, sample).prev_sample) # type: ignore + refiners_output = refiners_scheduler(x=sample, predicted_noise=predicted_noise, step=step) assert allclose(diffusers_output, refiners_output, rtol=0.01), f"outputs differ at step {step}"