Skip to content

Commit bcc9c0d

Browse files
stduhpfidostyleleejet
authored
feat: handle ggml compute failures without crashing the program (leejet#1003)
* Feat: handle compute failures more gracefully * fix Unreachable code after return Co-authored-by: idostyle <[email protected]> * adjust z_image.hpp --------- Co-authored-by: idostyle <[email protected]> Co-authored-by: leejet <[email protected]>
1 parent 5865b5e commit bcc9c0d

20 files changed

+163
-79
lines changed

clip.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -963,7 +963,7 @@ struct CLIPTextModelRunner : public GGMLRunner {
963963
return gf;
964964
}
965965

966-
void compute(const int n_threads,
966+
bool compute(const int n_threads,
967967
struct ggml_tensor* input_ids,
968968
int num_custom_embeddings,
969969
void* custom_embeddings_data,
@@ -975,7 +975,7 @@ struct CLIPTextModelRunner : public GGMLRunner {
975975
auto get_graph = [&]() -> struct ggml_cgraph* {
976976
return build_graph(input_ids, num_custom_embeddings, custom_embeddings_data, max_token_idx, return_pooled, clip_skip);
977977
};
978-
GGMLRunner::compute(get_graph, n_threads, true, output, output_ctx);
978+
return GGMLRunner::compute(get_graph, n_threads, true, output, output_ctx);
979979
}
980980
};
981981

conditioner.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -703,7 +703,7 @@ struct FrozenCLIPVisionEmbedder : public GGMLRunner {
703703
return gf;
704704
}
705705

706-
void compute(const int n_threads,
706+
bool compute(const int n_threads,
707707
ggml_tensor* pixel_values,
708708
bool return_pooled,
709709
int clip_skip,
@@ -712,7 +712,7 @@ struct FrozenCLIPVisionEmbedder : public GGMLRunner {
712712
auto get_graph = [&]() -> struct ggml_cgraph* {
713713
return build_graph(pixel_values, return_pooled, clip_skip);
714714
};
715-
GGMLRunner::compute(get_graph, n_threads, true, output, output_ctx);
715+
return GGMLRunner::compute(get_graph, n_threads, true, output, output_ctx);
716716
}
717717
};
718718

control.hpp

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -414,7 +414,7 @@ struct ControlNet : public GGMLRunner {
414414
return gf;
415415
}
416416

417-
void compute(int n_threads,
417+
bool compute(int n_threads,
418418
struct ggml_tensor* x,
419419
struct ggml_tensor* hint,
420420
struct ggml_tensor* timesteps,
@@ -430,8 +430,12 @@ struct ControlNet : public GGMLRunner {
430430
return build_graph(x, hint, timesteps, context, y);
431431
};
432432

433-
GGMLRunner::compute(get_graph, n_threads, false, output, output_ctx);
434-
guided_hint_cached = true;
433+
bool res = GGMLRunner::compute(get_graph, n_threads, false, output, output_ctx);
434+
if (res) {
435+
// cache guided_hint
436+
guided_hint_cached = true;
437+
}
438+
return res;
435439
}
436440

437441
bool load_from_file(const std::string& file_path, int n_threads) {

denoiser.hpp

Lines changed: 42 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -666,7 +666,7 @@ struct Flux2FlowDenoiser : public FluxFlowDenoiser {
666666
typedef std::function<ggml_tensor*(ggml_tensor*, float, int)> denoise_cb_t;
667667

668668
// k diffusion reverse ODE: dx = (x - D(x;\sigma)) / \sigma dt; \sigma(t) = t
669-
static void sample_k_diffusion(sample_method_t method,
669+
static bool sample_k_diffusion(sample_method_t method,
670670
denoise_cb_t model,
671671
ggml_context* work_ctx,
672672
ggml_tensor* x,
@@ -685,6 +685,9 @@ static void sample_k_diffusion(sample_method_t method,
685685

686686
// denoise
687687
ggml_tensor* denoised = model(x, sigma, i + 1);
688+
if (denoised == nullptr) {
689+
return false;
690+
}
688691

689692
// d = (x - denoised) / sigma
690693
{
@@ -738,6 +741,9 @@ static void sample_k_diffusion(sample_method_t method,
738741

739742
// denoise
740743
ggml_tensor* denoised = model(x, sigma, i + 1);
744+
if (denoised == nullptr) {
745+
return false;
746+
}
741747

742748
// d = (x - denoised) / sigma
743749
{
@@ -769,6 +775,9 @@ static void sample_k_diffusion(sample_method_t method,
769775
for (int i = 0; i < steps; i++) {
770776
// denoise
771777
ggml_tensor* denoised = model(x, sigmas[i], -(i + 1));
778+
if (denoised == nullptr) {
779+
return false;
780+
}
772781

773782
// d = (x - denoised) / sigma
774783
{
@@ -803,7 +812,10 @@ static void sample_k_diffusion(sample_method_t method,
803812
}
804813

805814
ggml_tensor* denoised = model(x2, sigmas[i + 1], i + 1);
806-
float* vec_denoised = (float*)denoised->data;
815+
if (denoised == nullptr) {
816+
return false;
817+
}
818+
float* vec_denoised = (float*)denoised->data;
807819
for (int j = 0; j < ggml_nelements(x); j++) {
808820
float d2 = (vec_x2[j] - vec_denoised[j]) / sigmas[i + 1];
809821
vec_d[j] = (vec_d[j] + d2) / 2;
@@ -819,6 +831,9 @@ static void sample_k_diffusion(sample_method_t method,
819831
for (int i = 0; i < steps; i++) {
820832
// denoise
821833
ggml_tensor* denoised = model(x, sigmas[i], i + 1);
834+
if (denoised == nullptr) {
835+
return false;
836+
}
822837

823838
// d = (x - denoised) / sigma
824839
{
@@ -855,7 +870,10 @@ static void sample_k_diffusion(sample_method_t method,
855870
}
856871

857872
ggml_tensor* denoised = model(x2, sigma_mid, i + 1);
858-
float* vec_denoised = (float*)denoised->data;
873+
if (denoised == nullptr) {
874+
return false;
875+
}
876+
float* vec_denoised = (float*)denoised->data;
859877
for (int j = 0; j < ggml_nelements(x); j++) {
860878
float d2 = (vec_x2[j] - vec_denoised[j]) / sigma_mid;
861879
vec_x[j] = vec_x[j] + d2 * dt_2;
@@ -871,6 +889,9 @@ static void sample_k_diffusion(sample_method_t method,
871889
for (int i = 0; i < steps; i++) {
872890
// denoise
873891
ggml_tensor* denoised = model(x, sigmas[i], i + 1);
892+
if (denoised == nullptr) {
893+
return false;
894+
}
874895

875896
// get_ancestral_step
876897
float sigma_up = std::min(sigmas[i + 1],
@@ -907,6 +928,9 @@ static void sample_k_diffusion(sample_method_t method,
907928
}
908929

909930
ggml_tensor* denoised = model(x2, sigmas[i + 1], i + 1);
931+
if (denoised == nullptr) {
932+
return false;
933+
}
910934

911935
// Second half-step
912936
for (int j = 0; j < ggml_nelements(x); j++) {
@@ -937,6 +961,9 @@ static void sample_k_diffusion(sample_method_t method,
937961
for (int i = 0; i < steps; i++) {
938962
// denoise
939963
ggml_tensor* denoised = model(x, sigmas[i], i + 1);
964+
if (denoised == nullptr) {
965+
return false;
966+
}
940967

941968
float t = t_fn(sigmas[i]);
942969
float t_next = t_fn(sigmas[i + 1]);
@@ -976,6 +1003,9 @@ static void sample_k_diffusion(sample_method_t method,
9761003
for (int i = 0; i < steps; i++) {
9771004
// denoise
9781005
ggml_tensor* denoised = model(x, sigmas[i], i + 1);
1006+
if (denoised == nullptr) {
1007+
return false;
1008+
}
9791009

9801010
float t = t_fn(sigmas[i]);
9811011
float t_next = t_fn(sigmas[i + 1]);
@@ -1026,7 +1056,10 @@ static void sample_k_diffusion(sample_method_t method,
10261056

10271057
// Denoising step
10281058
ggml_tensor* denoised = model(x_cur, sigma, i + 1);
1029-
float* vec_denoised = (float*)denoised->data;
1059+
if (denoised == nullptr) {
1060+
return false;
1061+
}
1062+
float* vec_denoised = (float*)denoised->data;
10301063
// d_cur = (x_cur - denoised) / sigma
10311064
struct ggml_tensor* d_cur = ggml_dup_tensor(work_ctx, x_cur);
10321065
float* vec_d_cur = (float*)d_cur->data;
@@ -1169,6 +1202,9 @@ static void sample_k_diffusion(sample_method_t method,
11691202

11701203
// denoise
11711204
ggml_tensor* denoised = model(x, sigma, i + 1);
1205+
if (denoised == nullptr) {
1206+
return false;
1207+
}
11721208

11731209
// x = denoised
11741210
{
@@ -1561,8 +1597,9 @@ static void sample_k_diffusion(sample_method_t method,
15611597

15621598
default:
15631599
LOG_ERROR("Attempting to sample with nonexisting sample method %i", method);
1564-
abort();
1600+
return false;
15651601
}
1602+
return true;
15661603
}
15671604

15681605
#endif // __DENOISER_HPP__

diffusion_model.hpp

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ struct DiffusionParams {
2727

2828
struct DiffusionModel {
2929
virtual std::string get_desc() = 0;
30-
virtual void compute(int n_threads,
30+
virtual bool compute(int n_threads,
3131
DiffusionParams diffusion_params,
3232
struct ggml_tensor** output = nullptr,
3333
struct ggml_context* output_ctx = nullptr) = 0;
@@ -87,7 +87,7 @@ struct UNetModel : public DiffusionModel {
8787
unet.set_flash_attention_enabled(enabled);
8888
}
8989

90-
void compute(int n_threads,
90+
bool compute(int n_threads,
9191
DiffusionParams diffusion_params,
9292
struct ggml_tensor** output = nullptr,
9393
struct ggml_context* output_ctx = nullptr) override {
@@ -148,7 +148,7 @@ struct MMDiTModel : public DiffusionModel {
148148
mmdit.set_flash_attention_enabled(enabled);
149149
}
150150

151-
void compute(int n_threads,
151+
bool compute(int n_threads,
152152
DiffusionParams diffusion_params,
153153
struct ggml_tensor** output = nullptr,
154154
struct ggml_context* output_ctx = nullptr) override {
@@ -210,7 +210,7 @@ struct FluxModel : public DiffusionModel {
210210
flux.set_flash_attention_enabled(enabled);
211211
}
212212

213-
void compute(int n_threads,
213+
bool compute(int n_threads,
214214
DiffusionParams diffusion_params,
215215
struct ggml_tensor** output = nullptr,
216216
struct ggml_context* output_ctx = nullptr) override {
@@ -277,7 +277,7 @@ struct WanModel : public DiffusionModel {
277277
wan.set_flash_attention_enabled(enabled);
278278
}
279279

280-
void compute(int n_threads,
280+
bool compute(int n_threads,
281281
DiffusionParams diffusion_params,
282282
struct ggml_tensor** output = nullptr,
283283
struct ggml_context* output_ctx = nullptr) override {
@@ -343,7 +343,7 @@ struct QwenImageModel : public DiffusionModel {
343343
qwen_image.set_flash_attention_enabled(enabled);
344344
}
345345

346-
void compute(int n_threads,
346+
bool compute(int n_threads,
347347
DiffusionParams diffusion_params,
348348
struct ggml_tensor** output = nullptr,
349349
struct ggml_context* output_ctx = nullptr) override {
@@ -406,7 +406,7 @@ struct ZImageModel : public DiffusionModel {
406406
z_image.set_flash_attention_enabled(enabled);
407407
}
408408

409-
void compute(int n_threads,
409+
bool compute(int n_threads,
410410
DiffusionParams diffusion_params,
411411
struct ggml_tensor** output = nullptr,
412412
struct ggml_context* output_ctx = nullptr) override {

esrgan.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -353,14 +353,14 @@ struct ESRGAN : public GGMLRunner {
353353
return gf;
354354
}
355355

356-
void compute(const int n_threads,
356+
bool compute(const int n_threads,
357357
struct ggml_tensor* x,
358358
ggml_tensor** output,
359359
ggml_context* output_ctx = nullptr) {
360360
auto get_graph = [&]() -> struct ggml_cgraph* {
361361
return build_graph(x);
362362
};
363-
GGMLRunner::compute(get_graph, n_threads, false, output, output_ctx);
363+
return GGMLRunner::compute(get_graph, n_threads, false, output, output_ctx);
364364
}
365365
};
366366

flux.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1413,7 +1413,7 @@ namespace Flux {
14131413
return gf;
14141414
}
14151415

1416-
void compute(int n_threads,
1416+
bool compute(int n_threads,
14171417
struct ggml_tensor* x,
14181418
struct ggml_tensor* timesteps,
14191419
struct ggml_tensor* context,
@@ -1434,7 +1434,7 @@ namespace Flux {
14341434
return build_graph(x, timesteps, context, c_concat, y, guidance, ref_latents, increase_ref_index, skip_layers);
14351435
};
14361436

1437-
GGMLRunner::compute(get_graph, n_threads, false, output, output_ctx);
1437+
return GGMLRunner::compute(get_graph, n_threads, false, output, output_ctx);
14381438
}
14391439

14401440
void test() {

ggml_extend.hpp

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1938,25 +1938,35 @@ struct GGMLRunner {
19381938
return ggml_get_tensor(cache_ctx, name.c_str());
19391939
}
19401940

1941-
void compute(get_graph_cb_t get_graph,
1941+
bool compute(get_graph_cb_t get_graph,
19421942
int n_threads,
19431943
bool free_compute_buffer_immediately = true,
19441944
struct ggml_tensor** output = nullptr,
19451945
struct ggml_context* output_ctx = nullptr) {
19461946
if (!offload_params_to_runtime_backend()) {
19471947
LOG_ERROR("%s offload params to runtime backend failed", get_desc().c_str());
1948-
return;
1948+
return false;
1949+
}
1950+
if (!alloc_compute_buffer(get_graph)) {
1951+
LOG_ERROR("%s alloc compute buffer failed", get_desc().c_str());
1952+
return false;
19491953
}
1950-
alloc_compute_buffer(get_graph);
19511954
reset_compute_ctx();
19521955
struct ggml_cgraph* gf = get_compute_graph(get_graph);
1953-
GGML_ASSERT(ggml_gallocr_alloc_graph(compute_allocr, gf));
1956+
if (!ggml_gallocr_alloc_graph(compute_allocr, gf)) {
1957+
LOG_ERROR("%s alloc compute graph failed", get_desc().c_str());
1958+
return false;
1959+
}
19541960
copy_data_to_backend_tensor();
19551961
if (ggml_backend_is_cpu(runtime_backend)) {
19561962
ggml_backend_cpu_set_n_threads(runtime_backend, n_threads);
19571963
}
19581964

1959-
ggml_backend_graph_compute(runtime_backend, gf);
1965+
ggml_status status = ggml_backend_graph_compute(runtime_backend, gf);
1966+
if (status != GGML_STATUS_SUCCESS) {
1967+
LOG_ERROR("%s compute failed: %s", get_desc().c_str(), ggml_status_to_string(status));
1968+
return false;
1969+
}
19601970
#ifdef GGML_PERF
19611971
ggml_graph_print(gf);
19621972
#endif
@@ -1974,6 +1984,7 @@ struct GGMLRunner {
19741984
if (free_compute_buffer_immediately) {
19751985
free_compute_buffer();
19761986
}
1987+
return true;
19771988
}
19781989

19791990
void set_flash_attention_enabled(bool enabled) {

llm.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1191,7 +1191,7 @@ namespace LLM {
11911191
return gf;
11921192
}
11931193

1194-
void compute(const int n_threads,
1194+
bool compute(const int n_threads,
11951195
struct ggml_tensor* input_ids,
11961196
std::vector<std::pair<int, ggml_tensor*>> image_embeds,
11971197
std::set<int> out_layers,
@@ -1200,7 +1200,7 @@ namespace LLM {
12001200
auto get_graph = [&]() -> struct ggml_cgraph* {
12011201
return build_graph(input_ids, image_embeds, out_layers);
12021202
};
1203-
GGMLRunner::compute(get_graph, n_threads, true, output, output_ctx);
1203+
return GGMLRunner::compute(get_graph, n_threads, true, output, output_ctx);
12041204
}
12051205

12061206
int64_t get_num_image_tokens(int64_t t, int64_t h, int64_t w) {

mmdit.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -894,7 +894,7 @@ struct MMDiTRunner : public GGMLRunner {
894894
return gf;
895895
}
896896

897-
void compute(int n_threads,
897+
bool compute(int n_threads,
898898
struct ggml_tensor* x,
899899
struct ggml_tensor* timesteps,
900900
struct ggml_tensor* context,
@@ -910,7 +910,7 @@ struct MMDiTRunner : public GGMLRunner {
910910
return build_graph(x, timesteps, context, y, skip_layers);
911911
};
912912

913-
GGMLRunner::compute(get_graph, n_threads, false, output, output_ctx);
913+
return GGMLRunner::compute(get_graph, n_threads, false, output, output_ctx);
914914
}
915915

916916
void test() {

0 commit comments

Comments
 (0)