wbruna
diff --git a/‎clip.hpp‎
Lines changed: 2 additions & 2 deletions b/‎clip.hpp‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎conditioner.hpp‎
Lines changed: 2 additions & 2 deletions b/‎conditioner.hpp‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎control.hpp‎
Lines changed: 7 additions & 3 deletions b/‎control.hpp‎
Lines changed: 7 additions & 3 deletions
diff --git a/‎denoiser.hpp‎
Lines changed: 42 additions & 5 deletions b/‎denoiser.hpp‎
Lines changed: 42 additions & 5 deletions
diff --git a/‎diffusion_model.hpp‎
Lines changed: 7 additions & 7 deletions b/‎diffusion_model.hpp‎
Lines changed: 7 additions & 7 deletions
diff --git a/‎esrgan.hpp‎
Lines changed: 2 additions & 2 deletions b/‎esrgan.hpp‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎flux.hpp‎
Lines changed: 2 additions & 2 deletions b/‎flux.hpp‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎ggml_extend.hpp‎
Lines changed: 16 additions & 5 deletions b/‎ggml_extend.hpp‎
Lines changed: 16 additions & 5 deletions
diff --git a/‎llm.hpp‎
Lines changed: 2 additions & 2 deletions b/‎llm.hpp‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎mmdit.hpp‎
Lines changed: 2 additions & 2 deletions b/‎mmdit.hpp‎
Lines changed: 2 additions & 2 deletions
@@ -963,7 +963,7 @@ struct CLIPTextModelRunner : public GGMLRunner {
         return gf;
     }
 
-    void compute(const int n_threads,
+    bool compute(const int n_threads,
                  struct ggml_tensor* input_ids,
                  int num_custom_embeddings,
                  void* custom_embeddings_data,
@@ -975,7 +975,7 @@ struct CLIPTextModelRunner : public GGMLRunner {
         auto get_graph = [&]() -> struct ggml_cgraph* {
             return build_graph(input_ids, num_custom_embeddings, custom_embeddings_data, max_token_idx, return_pooled, clip_skip);
         };
-        GGMLRunner::compute(get_graph, n_threads, true, output, output_ctx);
+        return GGMLRunner::compute(get_graph, n_threads, true, output, output_ctx);
     }
 };
 
 
@@ -703,7 +703,7 @@ struct FrozenCLIPVisionEmbedder : public GGMLRunner {
         return gf;
     }
 
-    void compute(const int n_threads,
+    bool compute(const int n_threads,
                  ggml_tensor* pixel_values,
                  bool return_pooled,
                  int clip_skip,
@@ -712,7 +712,7 @@ struct FrozenCLIPVisionEmbedder : public GGMLRunner {
         auto get_graph = [&]() -> struct ggml_cgraph* {
             return build_graph(pixel_values, return_pooled, clip_skip);
         };
-        GGMLRunner::compute(get_graph, n_threads, true, output, output_ctx);
+        return GGMLRunner::compute(get_graph, n_threads, true, output, output_ctx);
     }
 };
 
 
@@ -414,7 +414,7 @@ struct ControlNet : public GGMLRunner {
         return gf;
     }
 
-    void compute(int n_threads,
+    bool compute(int n_threads,
                  struct ggml_tensor* x,
                  struct ggml_tensor* hint,
                  struct ggml_tensor* timesteps,
@@ -430,8 +430,12 @@ struct ControlNet : public GGMLRunner {
             return build_graph(x, hint, timesteps, context, y);
         };
 
-        GGMLRunner::compute(get_graph, n_threads, false, output, output_ctx);
-        guided_hint_cached = true;
+        bool res = GGMLRunner::compute(get_graph, n_threads, false, output, output_ctx);
+        if (res) {
+            // cache guided_hint
+            guided_hint_cached = true;
+        }
+        return res;
     }
 
     bool load_from_file(const std::string& file_path, int n_threads) {
 
@@ -666,7 +666,7 @@ struct Flux2FlowDenoiser : public FluxFlowDenoiser {
 typedef std::function<ggml_tensor*(ggml_tensor*, float, int)> denoise_cb_t;
 
 // k diffusion reverse ODE: dx = (x - D(x;\sigma)) / \sigma dt; \sigma(t) = t
-static void sample_k_diffusion(sample_method_t method,
+static bool sample_k_diffusion(sample_method_t method,
                                denoise_cb_t model,
                                ggml_context* work_ctx,
                                ggml_tensor* x,
@@ -685,6 +685,9 @@ static void sample_k_diffusion(sample_method_t method,
 
                 // denoise
                 ggml_tensor* denoised = model(x, sigma, i + 1);
+                if (denoised == nullptr) {
+                    return false;
+                }
 
                 // d = (x - denoised) / sigma
                 {
@@ -738,6 +741,9 @@ static void sample_k_diffusion(sample_method_t method,
 
                 // denoise
                 ggml_tensor* denoised = model(x, sigma, i + 1);
+                if (denoised == nullptr) {
+                    return false;
+                }
 
                 // d = (x - denoised) / sigma
                 {
@@ -769,6 +775,9 @@ static void sample_k_diffusion(sample_method_t method,
             for (int i = 0; i < steps; i++) {
                 // denoise
                 ggml_tensor* denoised = model(x, sigmas[i], -(i + 1));
+                if (denoised == nullptr) {
+                    return false;
+                }
 
                 // d = (x - denoised) / sigma
                 {
@@ -803,7 +812,10 @@ static void sample_k_diffusion(sample_method_t method,
                     }
 
                     ggml_tensor* denoised = model(x2, sigmas[i + 1], i + 1);
-                    float* vec_denoised   = (float*)denoised->data;
+                    if (denoised == nullptr) {
+                        return false;
+                    }
+                    float* vec_denoised = (float*)denoised->data;
                     for (int j = 0; j < ggml_nelements(x); j++) {
                         float d2 = (vec_x2[j] - vec_denoised[j]) / sigmas[i + 1];
                         vec_d[j] = (vec_d[j] + d2) / 2;
@@ -819,6 +831,9 @@ static void sample_k_diffusion(sample_method_t method,
             for (int i = 0; i < steps; i++) {
                 // denoise
                 ggml_tensor* denoised = model(x, sigmas[i], i + 1);
+                if (denoised == nullptr) {
+                    return false;
+                }
 
                 // d = (x - denoised) / sigma
                 {
@@ -855,7 +870,10 @@ static void sample_k_diffusion(sample_method_t method,
                     }
 
                     ggml_tensor* denoised = model(x2, sigma_mid, i + 1);
-                    float* vec_denoised   = (float*)denoised->data;
+                    if (denoised == nullptr) {
+                        return false;
+                    }
+                    float* vec_denoised = (float*)denoised->data;
                     for (int j = 0; j < ggml_nelements(x); j++) {
                         float d2 = (vec_x2[j] - vec_denoised[j]) / sigma_mid;
                         vec_x[j] = vec_x[j] + d2 * dt_2;
@@ -871,6 +889,9 @@ static void sample_k_diffusion(sample_method_t method,
             for (int i = 0; i < steps; i++) {
                 // denoise
                 ggml_tensor* denoised = model(x, sigmas[i], i + 1);
+                if (denoised == nullptr) {
+                    return false;
+                }
 
                 // get_ancestral_step
                 float sigma_up   = std::min(sigmas[i + 1],
@@ -907,6 +928,9 @@ static void sample_k_diffusion(sample_method_t method,
                     }
 
                     ggml_tensor* denoised = model(x2, sigmas[i + 1], i + 1);
+                    if (denoised == nullptr) {
+                        return false;
+                    }
 
                     // Second half-step
                     for (int j = 0; j < ggml_nelements(x); j++) {
@@ -937,6 +961,9 @@ static void sample_k_diffusion(sample_method_t method,
             for (int i = 0; i < steps; i++) {
                 // denoise
                 ggml_tensor* denoised = model(x, sigmas[i], i + 1);
+                if (denoised == nullptr) {
+                    return false;
+                }
 
                 float t                 = t_fn(sigmas[i]);
                 float t_next            = t_fn(sigmas[i + 1]);
@@ -976,6 +1003,9 @@ static void sample_k_diffusion(sample_method_t method,
             for (int i = 0; i < steps; i++) {
                 // denoise
                 ggml_tensor* denoised = model(x, sigmas[i], i + 1);
+                if (denoised == nullptr) {
+                    return false;
+                }
 
                 float t                 = t_fn(sigmas[i]);
                 float t_next            = t_fn(sigmas[i + 1]);
@@ -1026,7 +1056,10 @@ static void sample_k_diffusion(sample_method_t method,
 
                 // Denoising step
                 ggml_tensor* denoised = model(x_cur, sigma, i + 1);
-                float* vec_denoised   = (float*)denoised->data;
+                if (denoised == nullptr) {
+                    return false;
+                }
+                float* vec_denoised = (float*)denoised->data;
                 // d_cur = (x_cur - denoised) / sigma
                 struct ggml_tensor* d_cur = ggml_dup_tensor(work_ctx, x_cur);
                 float* vec_d_cur          = (float*)d_cur->data;
@@ -1169,6 +1202,9 @@ static void sample_k_diffusion(sample_method_t method,
 
                 // denoise
                 ggml_tensor* denoised = model(x, sigma, i + 1);
+                if (denoised == nullptr) {
+                    return false;
+                }
 
                 // x = denoised
                 {
@@ -1561,8 +1597,9 @@ static void sample_k_diffusion(sample_method_t method,
 
         default:
             LOG_ERROR("Attempting to sample with nonexisting sample method %i", method);
-            abort();
+            return false;
     }
+    return true;
 }
 
 #endif  // __DENOISER_HPP__
@@ -27,7 +27,7 @@ struct DiffusionParams {
 
 struct DiffusionModel {
     virtual std::string get_desc()                                                      = 0;
-    virtual void compute(int n_threads,
+    virtual bool compute(int n_threads,
                          DiffusionParams diffusion_params,
                          struct ggml_tensor** output     = nullptr,
                          struct ggml_context* output_ctx = nullptr)                     = 0;
@@ -87,7 +87,7 @@ struct UNetModel : public DiffusionModel {
         unet.set_flash_attention_enabled(enabled);
     }
 
-    void compute(int n_threads,
+    bool compute(int n_threads,
                  DiffusionParams diffusion_params,
                  struct ggml_tensor** output     = nullptr,
                  struct ggml_context* output_ctx = nullptr) override {
@@ -148,7 +148,7 @@ struct MMDiTModel : public DiffusionModel {
         mmdit.set_flash_attention_enabled(enabled);
     }
 
-    void compute(int n_threads,
+    bool compute(int n_threads,
                  DiffusionParams diffusion_params,
                  struct ggml_tensor** output     = nullptr,
                  struct ggml_context* output_ctx = nullptr) override {
@@ -210,7 +210,7 @@ struct FluxModel : public DiffusionModel {
         flux.set_flash_attention_enabled(enabled);
     }
 
-    void compute(int n_threads,
+    bool compute(int n_threads,
                  DiffusionParams diffusion_params,
                  struct ggml_tensor** output     = nullptr,
                  struct ggml_context* output_ctx = nullptr) override {
@@ -277,7 +277,7 @@ struct WanModel : public DiffusionModel {
         wan.set_flash_attention_enabled(enabled);
     }
 
-    void compute(int n_threads,
+    bool compute(int n_threads,
                  DiffusionParams diffusion_params,
                  struct ggml_tensor** output     = nullptr,
                  struct ggml_context* output_ctx = nullptr) override {
@@ -343,7 +343,7 @@ struct QwenImageModel : public DiffusionModel {
         qwen_image.set_flash_attention_enabled(enabled);
     }
 
-    void compute(int n_threads,
+    bool compute(int n_threads,
                  DiffusionParams diffusion_params,
                  struct ggml_tensor** output     = nullptr,
                  struct ggml_context* output_ctx = nullptr) override {
@@ -406,7 +406,7 @@ struct ZImageModel : public DiffusionModel {
         z_image.set_flash_attention_enabled(enabled);
     }
 
-    void compute(int n_threads,
+    bool compute(int n_threads,
                  DiffusionParams diffusion_params,
                  struct ggml_tensor** output     = nullptr,
                  struct ggml_context* output_ctx = nullptr) override {
 
@@ -353,14 +353,14 @@ struct ESRGAN : public GGMLRunner {
         return gf;
     }
 
-    void compute(const int n_threads,
+    bool compute(const int n_threads,
                  struct ggml_tensor* x,
                  ggml_tensor** output,
                  ggml_context* output_ctx = nullptr) {
         auto get_graph = [&]() -> struct ggml_cgraph* {
             return build_graph(x);
         };
-        GGMLRunner::compute(get_graph, n_threads, false, output, output_ctx);
+        return GGMLRunner::compute(get_graph, n_threads, false, output, output_ctx);
     }
 };
 
 
@@ -1413,7 +1413,7 @@ namespace Flux {
             return gf;
         }
 
-        void compute(int n_threads,
+        bool compute(int n_threads,
                      struct ggml_tensor* x,
                      struct ggml_tensor* timesteps,
                      struct ggml_tensor* context,
@@ -1434,7 +1434,7 @@ namespace Flux {
                 return build_graph(x, timesteps, context, c_concat, y, guidance, ref_latents, increase_ref_index, skip_layers);
             };
 
-            GGMLRunner::compute(get_graph, n_threads, false, output, output_ctx);
+            return GGMLRunner::compute(get_graph, n_threads, false, output, output_ctx);
         }
 
         void test() {
 
@@ -1938,25 +1938,35 @@ struct GGMLRunner {
         return ggml_get_tensor(cache_ctx, name.c_str());
     }
 
-    void compute(get_graph_cb_t get_graph,
+    bool compute(get_graph_cb_t get_graph,
                  int n_threads,
                  bool free_compute_buffer_immediately = true,
                  struct ggml_tensor** output          = nullptr,
                  struct ggml_context* output_ctx      = nullptr) {
         if (!offload_params_to_runtime_backend()) {
             LOG_ERROR("%s offload params to runtime backend failed", get_desc().c_str());
-            return;
+            return false;
+        }
+        if (!alloc_compute_buffer(get_graph)) {
+            LOG_ERROR("%s alloc compute buffer failed", get_desc().c_str());
+            return false;
         }
-        alloc_compute_buffer(get_graph);
         reset_compute_ctx();
         struct ggml_cgraph* gf = get_compute_graph(get_graph);
-        GGML_ASSERT(ggml_gallocr_alloc_graph(compute_allocr, gf));
+        if (!ggml_gallocr_alloc_graph(compute_allocr, gf)) {
+            LOG_ERROR("%s alloc compute graph failed", get_desc().c_str());
+            return false;
+        }
         copy_data_to_backend_tensor();
         if (ggml_backend_is_cpu(runtime_backend)) {
             ggml_backend_cpu_set_n_threads(runtime_backend, n_threads);
         }
 
-        ggml_backend_graph_compute(runtime_backend, gf);
+        ggml_status status = ggml_backend_graph_compute(runtime_backend, gf);
+        if (status != GGML_STATUS_SUCCESS) {
+            LOG_ERROR("%s compute failed: %s", get_desc().c_str(), ggml_status_to_string(status));
+            return false;
+        }
 #ifdef GGML_PERF
         ggml_graph_print(gf);
 #endif
@@ -1974,6 +1984,7 @@ struct GGMLRunner {
         if (free_compute_buffer_immediately) {
             free_compute_buffer();
         }
+        return true;
     }
 
     void set_flash_attention_enabled(bool enabled) {
 
@@ -1191,7 +1191,7 @@ namespace LLM {
             return gf;
         }
 
-        void compute(const int n_threads,
+        bool compute(const int n_threads,
                      struct ggml_tensor* input_ids,
                      std::vector<std::pair<int, ggml_tensor*>> image_embeds,
                      std::set<int> out_layers,
@@ -1200,7 +1200,7 @@ namespace LLM {
             auto get_graph = [&]() -> struct ggml_cgraph* {
                 return build_graph(input_ids, image_embeds, out_layers);
             };
-            GGMLRunner::compute(get_graph, n_threads, true, output, output_ctx);
+            return GGMLRunner::compute(get_graph, n_threads, true, output, output_ctx);
         }
 
         int64_t get_num_image_tokens(int64_t t, int64_t h, int64_t w) {
 
@@ -894,7 +894,7 @@ struct MMDiTRunner : public GGMLRunner {
         return gf;
     }
 
-    void compute(int n_threads,
+    bool compute(int n_threads,
                  struct ggml_tensor* x,
                  struct ggml_tensor* timesteps,
                  struct ggml_tensor* context,
@@ -910,7 +910,7 @@ struct MMDiTRunner : public GGMLRunner {
             return build_graph(x, timesteps, context, y, skip_layers);
         };
 
-        GGMLRunner::compute(get_graph, n_threads, false, output, output_ctx);
+        return GGMLRunner::compute(get_graph, n_threads, false, output, output_ctx);
     }
 
     void test() {
Original file line number	Diff line number	Diff line change
`@@ -1413,7 +1413,7 @@ namespace Flux {`
`1413`	`1413`	`return gf;`
`1414`	`1414`	`}`
`1415`	`1415`
`1416`		`- void compute(int n_threads,`
	`1416`	`+ bool compute(int n_threads,`
`1417`	`1417`	`struct ggml_tensor* x,`
`1418`	`1418`	`struct ggml_tensor* timesteps,`
`1419`	`1419`	`struct ggml_tensor* context,`
`@@ -1434,7 +1434,7 @@ namespace Flux {`
`1434`	`1434`	`return build_graph(x, timesteps, context, c_concat, y, guidance, ref_latents, increase_ref_index, skip_layers);`
`1435`	`1435`	`};`
`1436`	`1436`
`1437`		`- GGMLRunner::compute(get_graph, n_threads, false, output, output_ctx);`
	`1437`	`+ return GGMLRunner::compute(get_graph, n_threads, false, output, output_ctx);`
`1438`	`1438`	`}`
`1439`	`1439`
`1440`	`1440`	`void test() {`
Original file line number	Diff line number	Diff line change
`@@ -894,7 +894,7 @@ struct MMDiTRunner : public GGMLRunner {`
`894`	`894`	`return gf;`
`895`	`895`	`}`
`896`	`896`
`897`		`- void compute(int n_threads,`
	`897`	`+ bool compute(int n_threads,`
`898`	`898`	`struct ggml_tensor* x,`
`899`	`899`	`struct ggml_tensor* timesteps,`
`900`	`900`	`struct ggml_tensor* context,`
`@@ -910,7 +910,7 @@ struct MMDiTRunner : public GGMLRunner {`
`910`	`910`	`return build_graph(x, timesteps, context, y, skip_layers);`
`911`	`911`	`};`
`912`	`912`
`913`		`- GGMLRunner::compute(get_graph, n_threads, false, output, output_ctx);`
	`913`	`+ return GGMLRunner::compute(get_graph, n_threads, false, output, output_ctx);`
`914`	`914`	`}`
`915`	`915`
`916`	`916`	`void test() {`