add Flux2FlowDenoiser

leejet · leejet · commit 2ddbfe5ddeec · 2025-11-29T15:07:06.000+08:00
diff --git a/denoiser.hpp b/denoiser.hpp
@@ -356,7 +356,7 @@ struct Denoiser {
     virtual ggml_tensor* noise_scaling(float sigma, ggml_tensor* noise, ggml_tensor* latent) = 0;
     virtual ggml_tensor* inverse_noise_scaling(float sigma, ggml_tensor* latent)             = 0;
 
-    virtual std::vector<float> get_sigmas(uint32_t n, scheduler_t scheduler_type, SDVersion version) {
+    virtual std::vector<float> get_sigmas(uint32_t n, int /*image_seq_len*/, scheduler_t scheduler_type, SDVersion version) {
         auto bound_t_to_sigma = std::bind(&Denoiser::t_to_sigma, this, std::placeholders::_1);
         std::shared_ptr<SigmaScheduler> scheduler;
         switch (scheduler_type) {
@@ -582,10 +582,14 @@ struct FluxFlowDenoiser : public Denoiser {
         set_parameters(shift);
     }
 
-    void set_parameters(float shift = 1.15f) {
+    void set_shift(float shift) {
         this->shift = shift;
-        for (int i = 1; i < TIMESTEPS + 1; i++) {
-            sigmas[i - 1] = t_to_sigma(i / TIMESTEPS * TIMESTEPS);
+    }
+
+    void set_parameters(float shift) {
+        set_shift(shift);
+        for (int i = 0; i < TIMESTEPS; i++) {
+            sigmas[i] = t_to_sigma(i);
         }
     }
 
@@ -627,6 +631,38 @@ struct FluxFlowDenoiser : public Denoiser {
     }
 };
 
+struct Flux2FlowDenoiser : public FluxFlowDenoiser {
+    Flux2FlowDenoiser() = default;
+
+    float compute_empirical_mu(uint32_t n, int image_seq_len) {
+        const float a1 = 8.73809524e-05f;
+        const float b1 = 1.89833333f;
+        const float a2 = 0.00016927f;
+        const float b2 = 0.45666666f;
+
+        if (image_seq_len > 4300) {
+            float mu = a2 * image_seq_len + b2;
+            return mu;
+        }
+
+        float m_200 = a2 * image_seq_len + b2;
+        float m_10  = a1 * image_seq_len + b1;
+
+        float a  = (m_200 - m_10) / 190.0f;
+        float b  = m_200 - 200.0f * a;
+        float mu = a * n + b;
+
+        return mu;
+    }
+
+    std::vector<float> get_sigmas(uint32_t n, int image_seq_len, scheduler_t scheduler_type, SDVersion version) override {
+        float mu = compute_empirical_mu(n, image_seq_len);
+        LOG_DEBUG("Flux2FlowDenoiser: set shift to %.3f", mu);
+        set_shift(mu);
+        return Denoiser::get_sigmas(n, image_seq_len, scheduler_type, version);
+    }
+};
+
 typedef std::function<ggml_tensor*(ggml_tensor*, float, int)> denoise_cb_t;
 
 // k diffusion reverse ODE: dx = (x - D(x;\sigma)) / \sigma dt; \sigma(t) = t
diff --git a/examples/cli/README.md b/examples/cli/README.md
@@ -101,7 +101,7 @@ Options:
   -s, --seed                               RNG seed (default: 42, use random seed for < 0)
   --sampling-method                        sampling method, one of [euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm, ddim_trailing,
                                            tcd] (default: euler for Flux/SD3/Wan, euler_a otherwise)
-  --prediction                             prediction type override, one of [eps, v, edm_v, sd3_flow, flux_flow]
+  --prediction                             prediction type override, one of [eps, v, edm_v, sd3_flow, flux_flow, flux2_flow]
   --lora-apply-mode                        the way to apply LoRA, one of [auto, immediately, at_runtime], default is auto. In auto mode, if the model weights
                                            contain any quantized parameters, the at_runtime mode will be used; otherwise,
                                            immediately will be used.The immediately mode may have precision and
diff --git a/examples/cli/main.cpp b/examples/cli/main.cpp
@@ -1193,7 +1193,7 @@ void parse_args(int argc, const char** argv, SDParams& params) {
          on_sample_method_arg},
         {"",
          "--prediction",
-         "prediction type override, one of [eps, v, edm_v, sd3_flow, flux_flow]",
+         "prediction type override, one of [eps, v, edm_v, sd3_flow, flux_flow, flux2_flow]",
          on_prediction_arg},
         {"",
          "--lora-apply-mode",
diff --git a/stable-diffusion.cpp b/stable-diffusion.cpp
@@ -802,6 +802,11 @@ class StableDiffusionGGML {
                     denoiser = std::make_shared<FluxFlowDenoiser>(shift);
                     break;
                 }
+                case FLUX2_FLOW_PRED: {
+                    LOG_INFO("running in Flux2 FLOW mode");
+                    denoiser = std::make_shared<Flux2FlowDenoiser>();
+                    break;
+                }
                 default: {
                     LOG_ERROR("Unknown parametrization %i", sd_ctx_params->prediction);
                     return false;
@@ -834,7 +839,7 @@ class StableDiffusionGGML {
                     shift = 3.0;
                 }
                 denoiser = std::make_shared<DiscreteFlowDenoiser>(shift);
-            } else if (sd_version_is_flux(version) || sd_version_is_flux2(version)) {
+            } else if (sd_version_is_flux(version)) {
                 LOG_INFO("running in Flux FLOW mode");
                 float shift = sd_ctx_params->flow_shift;
                 if (shift == INFINITY) {
@@ -844,11 +849,11 @@ class StableDiffusionGGML {
                             shift = 1.15f;
                         }
                     }
-                    if (sd_version_is_flux2(version)) {
-                        shift = 2.05f;
-                    }
                 }
                 denoiser = std::make_shared<FluxFlowDenoiser>(shift);
+            } else if (sd_version_is_flux2(version)) {
+                LOG_INFO("running in Flux2 FLOW mode");
+                denoiser = std::make_shared<Flux2FlowDenoiser>();
             } else if (sd_version_is_wan(version)) {
                 LOG_INFO("running in FLOW mode");
                 float shift = sd_ctx_params->flow_shift;
@@ -1869,6 +1874,11 @@ class StableDiffusionGGML {
         return latent_channel;
     }
 
+    int get_image_seq_len(int h, int w) {
+        int vae_scale_factor = get_vae_scale_factor();
+        return (h / vae_scale_factor) * (w / vae_scale_factor);
+    }
+
     ggml_tensor* generate_init_latent(ggml_context* work_ctx,
                                       int width,
                                       int height,
@@ -2361,6 +2371,7 @@ const char* prediction_to_str[] = {
     "edm_v",
     "sd3_flow",
     "flux_flow",
+    "flux2_flow",
 };
 
 const char* sd_prediction_name(enum prediction_t prediction) {
@@ -3131,7 +3142,10 @@ sd_image_t* generate_image(sd_ctx_t* sd_ctx, const sd_img_gen_params_t* sd_img_g
     LOG_INFO("sampling using %s method", sampling_methods_str[sample_method]);
 
     int sample_steps          = sd_img_gen_params->sample_params.sample_steps;
-    std::vector<float> sigmas = sd_ctx->sd->denoiser->get_sigmas(sample_steps, sd_img_gen_params->sample_params.scheduler, sd_ctx->sd->version);
+    std::vector<float> sigmas = sd_ctx->sd->denoiser->get_sigmas(sample_steps,
+                                                                 sd_ctx->sd->get_image_seq_len(height, width),
+                                                                 sd_img_gen_params->sample_params.scheduler,
+                                                                 sd_ctx->sd->version);
 
     ggml_tensor* init_latent   = nullptr;
     ggml_tensor* concat_latent = nullptr;
@@ -3384,7 +3398,7 @@ SD_API sd_image_t* generate_video(sd_ctx_t* sd_ctx, const sd_vid_gen_params_t* s
     if (high_noise_sample_steps > 0) {
         total_steps += high_noise_sample_steps;
     }
-    std::vector<float> sigmas = sd_ctx->sd->denoiser->get_sigmas(total_steps, sd_vid_gen_params->sample_params.scheduler, sd_ctx->sd->version);
+    std::vector<float> sigmas = sd_ctx->sd->denoiser->get_sigmas(total_steps, 0, sd_vid_gen_params->sample_params.scheduler, sd_ctx->sd->version);
 
     if (high_noise_sample_steps < 0) {
         // timesteps &prop; sigmas for Flow models (like wan2.2 a14b)
diff --git a/stable-diffusion.h b/stable-diffusion.h
@@ -71,6 +71,7 @@ enum prediction_t {
     EDM_V_PRED,
     SD3_FLOW_PRED,
     FLUX_FLOW_PRED,
+    FLUX2_FLOW_PRED,
     PREDICTION_COUNT
 };