@@ -802,6 +802,11 @@ class StableDiffusionGGML {
802802 denoiser = std::make_shared<FluxFlowDenoiser>(shift);
803803 break ;
804804 }
805+ case FLUX2_FLOW_PRED: {
806+ LOG_INFO (" running in Flux2 FLOW mode" );
807+ denoiser = std::make_shared<Flux2FlowDenoiser>();
808+ break ;
809+ }
805810 default : {
806811 LOG_ERROR (" Unknown parametrization %i" , sd_ctx_params->prediction );
807812 return false ;
@@ -834,7 +839,7 @@ class StableDiffusionGGML {
834839 shift = 3.0 ;
835840 }
836841 denoiser = std::make_shared<DiscreteFlowDenoiser>(shift);
837- } else if (sd_version_is_flux (version) || sd_version_is_flux2 (version) ) {
842+ } else if (sd_version_is_flux (version)) {
838843 LOG_INFO (" running in Flux FLOW mode" );
839844 float shift = sd_ctx_params->flow_shift ;
840845 if (shift == INFINITY) {
@@ -844,11 +849,11 @@ class StableDiffusionGGML {
844849 shift = 1 .15f ;
845850 }
846851 }
847- if (sd_version_is_flux2 (version)) {
848- shift = 2 .05f ;
849- }
850852 }
851853 denoiser = std::make_shared<FluxFlowDenoiser>(shift);
854+ } else if (sd_version_is_flux2 (version)) {
855+ LOG_INFO (" running in Flux2 FLOW mode" );
856+ denoiser = std::make_shared<Flux2FlowDenoiser>();
852857 } else if (sd_version_is_wan (version)) {
853858 LOG_INFO (" running in FLOW mode" );
854859 float shift = sd_ctx_params->flow_shift ;
@@ -1869,6 +1874,11 @@ class StableDiffusionGGML {
18691874 return latent_channel;
18701875 }
18711876
1877+ int get_image_seq_len (int h, int w) {
1878+ int vae_scale_factor = get_vae_scale_factor ();
1879+ return (h / vae_scale_factor) * (w / vae_scale_factor);
1880+ }
1881+
18721882 ggml_tensor* generate_init_latent (ggml_context* work_ctx,
18731883 int width,
18741884 int height,
@@ -2361,6 +2371,7 @@ const char* prediction_to_str[] = {
23612371 " edm_v" ,
23622372 " sd3_flow" ,
23632373 " flux_flow" ,
2374+ " flux2_flow" ,
23642375};
23652376
23662377const char * sd_prediction_name (enum prediction_t prediction) {
@@ -3131,7 +3142,10 @@ sd_image_t* generate_image(sd_ctx_t* sd_ctx, const sd_img_gen_params_t* sd_img_g
31313142 LOG_INFO (" sampling using %s method" , sampling_methods_str[sample_method]);
31323143
31333144 int sample_steps = sd_img_gen_params->sample_params .sample_steps ;
3134- std::vector<float > sigmas = sd_ctx->sd ->denoiser ->get_sigmas (sample_steps, sd_img_gen_params->sample_params .scheduler , sd_ctx->sd ->version );
3145+ std::vector<float > sigmas = sd_ctx->sd ->denoiser ->get_sigmas (sample_steps,
3146+ sd_ctx->sd ->get_image_seq_len (height, width),
3147+ sd_img_gen_params->sample_params .scheduler ,
3148+ sd_ctx->sd ->version );
31353149
31363150 ggml_tensor* init_latent = nullptr ;
31373151 ggml_tensor* concat_latent = nullptr ;
@@ -3384,7 +3398,7 @@ SD_API sd_image_t* generate_video(sd_ctx_t* sd_ctx, const sd_vid_gen_params_t* s
33843398 if (high_noise_sample_steps > 0 ) {
33853399 total_steps += high_noise_sample_steps;
33863400 }
3387- std::vector<float > sigmas = sd_ctx->sd ->denoiser ->get_sigmas (total_steps, sd_vid_gen_params->sample_params .scheduler , sd_ctx->sd ->version );
3401+ std::vector<float > sigmas = sd_ctx->sd ->denoiser ->get_sigmas (total_steps, 0 , sd_vid_gen_params->sample_params .scheduler , sd_ctx->sd ->version );
33883402
33893403 if (high_noise_sample_steps < 0 ) {
33903404 // timesteps ∝ sigmas for Flow models (like wan2.2 a14b)
0 commit comments