Skip to content
Merged
Changes from 1 commit
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
09f716d
Add llama_model_quantize_params parameters
EAddario Mar 13, 2025
ac908af
Add new quantize parameters parsing and validation
EAddario Mar 13, 2025
337d979
Update usage
EAddario Mar 13, 2025
6f8d16d
Add new parameters defaults
EAddario Mar 13, 2025
71c9f93
Add new quantization parameters logic
EAddario Mar 13, 2025
8e18131
Add llama_model_quantize_params parameters
EAddario Mar 13, 2025
a77d947
Add new quantize parameters parsing and validation
EAddario Mar 13, 2025
2414eaa
Update usage
EAddario Mar 13, 2025
0dd66b8
Add new parameters defaults
EAddario Mar 13, 2025
1d841c6
Add new quantization parameters logic
EAddario Mar 13, 2025
120f71b
Merge main changes into branch
EAddario Mar 14, 2025
dbcc0b5
Merge branch 'master' into quantize
EAddario Mar 14, 2025
d86de03
Minor refactoring as per the contributors' coding guidelines
EAddario Mar 14, 2025
99bae5e
Update descriptions to match existing style
EAddario Mar 14, 2025
60b0a53
Merge branch 'master' into quantize
EAddario Mar 14, 2025
3e2063d
Merge branch 'master' into quantize
EAddario Mar 16, 2025
b99fa62
Merge branch 'master' into quantize
EAddario Mar 19, 2025
f97b693
Add llama_model_quantize_params parameters
EAddario Mar 19, 2025
f11e3da
Add new quantize parameters parsing and validation
EAddario Mar 19, 2025
ad1e352
Update usage
EAddario Mar 19, 2025
4e5c96a
Add new parameters defaults
EAddario Mar 19, 2025
9b3ccb5
Add new quantization parameters logic
EAddario Mar 19, 2025
35f45f1
Minor refactoring as per the contributors' guidelines
EAddario Mar 19, 2025
071e9ef
Merge branch 'master' into quantize
EAddario Mar 22, 2025
54e13cf
Implement general --tensor-type instead of tensor-specific command op…
EAddario Mar 29, 2025
31d642c
Merge branch 'master' into quantize
EAddario Mar 29, 2025
b3c7db5
Fix implied type bug
EAddario Mar 30, 2025
625f0ae
Restore missing #includes
EAddario Mar 31, 2025
2fd0b41
Add regex capability for tensor selection
EAddario Apr 1, 2025
3e9f565
Merge branch 'master' into quantize
EAddario Apr 2, 2025
054ede4
Refactor function name and update ALLOWED_TENSOR_TYPE
EAddario Apr 3, 2025
5a304b8
Add missing #include
EAddario Apr 3, 2025
1acb9f4
Handle edge case when tensor name is cls.output
EAddario Apr 3, 2025
04604a4
Minor logging improvement
EAddario Apr 7, 2025
30443a5
Merge branch 'master' into quantize
EAddario Apr 7, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Add new quantize parameters parsing and validation
  • Loading branch information
EAddario committed Mar 19, 2025
commit f11e3da291f2bf367d4d528640d3f1ebdabcdb3c
114 changes: 111 additions & 3 deletions examples/quantize/quantize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -285,6 +285,15 @@ int main(int argc, char ** argv) {
} else {
usage(argv[0]);
}
} else if (strcmp(argv[arg_idx], "--attention-qkv-type") == 0) {
if (arg_idx < argc-1) {
params.attn_qkv_tensor_type = parse_ggml_type(argv[++arg_idx]);
if (params.attn_qkv_tensor_type == GGML_TYPE_COUNT) {
usage(argv[0]);
}
} else {
usage(argv[0]);
}
} else if (strcmp(argv[arg_idx], "--attention-q-type") == 0) {
if (arg_idx < argc-1) {
params.attn_q_tensor_type = parse_ggml_type(argv[++arg_idx]);
Expand Down Expand Up @@ -312,10 +321,37 @@ int main(int argc, char ** argv) {
} else {
usage(argv[0]);
}
} else if (strcmp(argv[arg_idx], "--attention-qkv-type") == 0) {
} else if (strcmp(argv[arg_idx], "--attention-qa-type") == 0) {
if (arg_idx < argc-1) {
params.attn_qkv_tensor_type = parse_ggml_type(argv[++arg_idx]);
if (params.attn_qkv_tensor_type == GGML_TYPE_COUNT) {
params.attn_qa_tensor_type = parse_ggml_type(argv[++arg_idx]);
if (params.attn_qa_tensor_type == GGML_TYPE_COUNT) {
usage(argv[0]);
}
} else {
usage(argv[0]);
}
} else if (strcmp(argv[arg_idx], "--attention-qb-type") == 0) {
if (arg_idx < argc-1) {
params.attn_qb_tensor_type = parse_ggml_type(argv[++arg_idx]);
if (params.attn_qb_tensor_type == GGML_TYPE_COUNT) {
usage(argv[0]);
}
} else {
usage(argv[0]);
}
} else if (strcmp(argv[arg_idx], "--attention-kva-type") == 0) {
if (arg_idx < argc-1) {
params.attn_kva_tensor_type = parse_ggml_type(argv[++arg_idx]);
if (params.attn_kva_tensor_type == GGML_TYPE_COUNT) {
usage(argv[0]);
}
} else {
usage(argv[0]);
}
} else if (strcmp(argv[arg_idx], "--attention-kvb-type") == 0) {
if (arg_idx < argc-1) {
params.attn_kvb_tensor_type = parse_ggml_type(argv[++arg_idx]);
if (params.attn_kvb_tensor_type == GGML_TYPE_COUNT) {
usage(argv[0]);
}
} else {
Expand Down Expand Up @@ -357,6 +393,78 @@ int main(int argc, char ** argv) {
} else {
usage(argv[0]);
}
} else if (strcmp(argv[arg_idx], "--feedforward-up-exp-type") == 0) {
if (arg_idx < argc-1) {
params.ffn_up_exp_tensor_type = parse_ggml_type(argv[++arg_idx]);
if (params.ffn_up_exp_tensor_type == GGML_TYPE_COUNT) {
usage(argv[0]);
}
} else {
usage(argv[0]);
}
} else if (strcmp(argv[arg_idx], "--feedforward-gate-exp-type") == 0) {
if (arg_idx < argc-1) {
params.ffn_gate_exp_tensor_type = parse_ggml_type(argv[++arg_idx]);
if (params.ffn_gate_exp_tensor_type == GGML_TYPE_COUNT) {
usage(argv[0]);
}
} else {
usage(argv[0]);
}
} else if (strcmp(argv[arg_idx], "--feedforward-down-exp-type") == 0) {
if (arg_idx < argc-1) {
params.ffn_down_exp_tensor_type = parse_ggml_type(argv[++arg_idx]);
if (params.ffn_down_exp_tensor_type == GGML_TYPE_COUNT) {
usage(argv[0]);
}
} else {
usage(argv[0]);
}
} else if (strcmp(argv[arg_idx], "--feedforward-up-shexp_type") == 0) {
if (arg_idx < argc-1) {
params.ffn_up_shexp_tensor_type = parse_ggml_type(argv[++arg_idx]);
if (params.ffn_up_shexp_tensor_type == GGML_TYPE_COUNT) {
usage(argv[0]);
}
} else {
usage(argv[0]);
}
} else if (strcmp(argv[arg_idx], "--feedforward-gate-shexp-type") == 0) {
if (arg_idx < argc-1) {
params.ffn_gate_shexp_tensor_type = parse_ggml_type(argv[++arg_idx]);
if (params.ffn_gate_shexp_tensor_type == GGML_TYPE_COUNT) {
usage(argv[0]);
}
} else {
usage(argv[0]);
}
} else if (strcmp(argv[arg_idx], "--feedforward-down-shexp-type") == 0) {
if (arg_idx < argc-1) {
params.ffn_down_shexp_tensor_type = parse_ggml_type(argv[++arg_idx]);
if (params.ffn_down_shexp_tensor_type == GGML_TYPE_COUNT) {
usage(argv[0]);
}
} else {
usage(argv[0]);
}
} else if (strcmp(argv[arg_idx], "--classifier-type") == 0) {
if (arg_idx < argc-1) {
params.cls_tensor_type = parse_ggml_type(argv[++arg_idx]);
if (params.cls_tensor_type == GGML_TYPE_COUNT) {
usage(argv[0]);
}
} else {
usage(argv[0]);
}
} else if (strcmp(argv[arg_idx], "--classifier-output-type") == 0) {
if (arg_idx < argc-1) {
params.cls_output_tensor_type = parse_ggml_type(argv[++arg_idx]);
if (params.cls_output_tensor_type == GGML_TYPE_COUNT) {
usage(argv[0]);
}
} else {
usage(argv[0]);
}
} else if (strcmp(argv[arg_idx], "--override-kv") == 0) {
if (arg_idx == argc-1 || !string_parse_kv_override(argv[++arg_idx], kv_overrides)) {
usage(argv[0]);
Expand Down