Skip to content
Merged
Changes from 1 commit
Commits
Show all changes
70 commits
Select commit Hold shift + click to select a range
d8e902e
Add --show-statistics option
EAddario Apr 1, 2025
f46693b
Add --show-statistics logic
EAddario Apr 1, 2025
b3ac78b
Merge branch 'master' into imatrix
EAddario Apr 1, 2025
dc3373e
Add tensor name parsing
EAddario Apr 2, 2025
0589c3e
Tidy output format
EAddario Apr 2, 2025
e1fd1af
Fix typo in title
EAddario Apr 2, 2025
490a8fe
Merge branch 'master' into imatrix
EAddario Apr 7, 2025
62ac268
Improve tensor influence ranking
EAddario Apr 8, 2025
73d8ecb
Add better statistics
EAddario Apr 13, 2025
200d88c
Merge branch 'master' into imatrix
EAddario Apr 13, 2025
0b7f9c4
Change statistics' sort order
EAddario Apr 15, 2025
52e86e2
Merge branch 'master' into imatrix
EAddario Apr 15, 2025
91d48da
Merge branch 'master' into imatrix
EAddario Apr 19, 2025
755c1ef
Add Cosine Similarity
EAddario Apr 22, 2025
72a5ec1
Merge branch 'master' into imatrix
EAddario May 3, 2025
5cd20e4
Add header search path
EAddario May 3, 2025
1dbe6c3
Change header search path to private
EAddario May 3, 2025
bb47f0d
Merge branch 'master' into imatrix
EAddario May 11, 2025
a3ac66c
Merge branch 'master' into imatrix
EAddario May 25, 2025
3eb556e
Add weighted statistics per layer
EAddario May 25, 2025
0276d71
Merge branch 'master' into imatrix
EAddario Jun 3, 2025
1f8dc23
Merge branch 'master' into imatrix
EAddario Jun 13, 2025
8ecd5fa
Merge branch 'master' into imatrix
EAddario Jun 14, 2025
8302a8a
Merge branch 'master' into imatrix
EAddario Jun 15, 2025
bfc0dfc
Merge branch 'master' into imatrix
EAddario Jun 21, 2025
5cfc443
Update report title
EAddario Jun 21, 2025
280dfdd
Merge branch 'master' into imatrix
EAddario Jun 22, 2025
235442a
Refactor compute_statistics out of main
EAddario Jun 22, 2025
c823d16
Refactor compute_cossim out of load_imatrix
EAddario Jun 22, 2025
a5c4640
Refactor compute_statistics out of load_imatrix
EAddario Jun 22, 2025
655be19
Move imatrix statistics calculation into its own functions
EAddario Jun 22, 2025
23ecca8
Add checks and validations
EAddario Jun 22, 2025
a4166a8
Remove unnecessary include directory
EAddario Jun 22, 2025
ed4ba31
Merge branch 'master' into imatrix
EAddario Jun 23, 2025
19f8e15
Rename labels
EAddario Jun 24, 2025
f5fd2b7
Add m_stats getter and refactor compute_statistics out of load_imatrix
EAddario Jun 24, 2025
bc3bd57
Refactor variable names
EAddario Jun 24, 2025
c3ede42
Merge branch 'master' into imatrix
EAddario Jun 24, 2025
1389753
Merge branch 'master' into imatrix
EAddario Jun 29, 2025
fde3089
Minor cosmetic change
EAddario Jun 29, 2025
c5a3d0a
Retrigger checks (empty commit)
EAddario Jul 1, 2025
688d0c2
Merge branch 'master' into imatrix
EAddario Jul 5, 2025
b1c481a
Rerun checks (empty commit)
EAddario Jul 5, 2025
dd13175
Fix unnecessary type promotion
EAddario Jul 7, 2025
0cd8e67
Reverting change to improve code readability
EAddario Jul 7, 2025
6c72d8e
Merge branch 'master' into imatrix
EAddario Jul 7, 2025
6826341
Rerun checks (empty commit)
EAddario Jul 7, 2025
432650b
Rerun checks (empty commit)
EAddario Jul 8, 2025
61a21a4
Rerun checks - third time's the Charm 🤞 (empty commit)
EAddario Jul 9, 2025
1a43247
Merge branch 'master' into imatrix
EAddario Jul 11, 2025
a3fdb2b
Minor cosmetic change
EAddario Jul 12, 2025
f9391bd
Update README
EAddario Jul 12, 2025
98bcd3e
Fix typo
EAddario Jul 12, 2025
71d8492
Merge branch 'master' into imatrix
EAddario Jul 13, 2025
69a0b17
Update README
EAddario Jul 13, 2025
9f2c558
Rerun checks (empty commit)
EAddario Jul 13, 2025
2b45dca
Merge branch 'master' into imatrix
EAddario Jul 17, 2025
3c1e250
Merge branch 'master' into imatrix
EAddario Jul 19, 2025
f7b1ab2
Re-implement changes on top of #9400
EAddario Jul 19, 2025
fa7c3da
Update README.md
EAddario Jul 19, 2025
e9ab48f
Update README
EAddario Jul 19, 2025
c02952a
Update README.md
EAddario Jul 19, 2025
75cd6e0
Update README.md
EAddario Jul 19, 2025
50e2ecf
Update README.md
EAddario Jul 19, 2025
139ff4b
Remove duplicate option in print_usage()
EAddario Jul 19, 2025
ac0922f
Update README.md
EAddario Jul 19, 2025
693dfb2
Update README.md
EAddario Jul 19, 2025
aef8914
Update README.md
EAddario Jul 19, 2025
fc77a54
Remove input check
EAddario Jul 20, 2025
360c8a5
Remove commented out code
EAddario Jul 20, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Re-implement changes on top of #9400
  • Loading branch information
EAddario committed Jul 19, 2025
commit f7b1ab2772a8114147d109b4853da799ba6178e9
268 changes: 263 additions & 5 deletions tools/imatrix/imatrix.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
#include <fstream>
#include <unordered_map>
#include <map>
#include <regex>
#include <numeric>

#if defined(_MSC_VER)
#pragma warning(disable: 4244 4267) // possible loss of data
Expand All @@ -24,10 +26,10 @@
static void print_usage(int, char ** argv) {
LOG("\nexample usage:\n");
LOG("\n %s \\\n"
" -m model.gguf -f some-text.txt [-o imatrix.gguf] [--process-output] \\\n"
" [--no-ppl] [--chunk 123] [--output-frequency 10] [--save-frequency 0] \\\n"
" [--in-file imatrix-prev-0.gguf --in-file imatrix-prev-1.gguf ...] \\\n"
" [--parse-special]\n" , argv[0]);
" -m model.gguf -f some-text.txt [-o imatrix.gguf] [--process-output] [--no-ppl] \\\n"
" [--chunk 123] [--output-frequency 10] [--save-frequency 0] [--show-statistics] \\\n"
" [--no-ppl] [--in-file imatrix-prev-0.gguf --in-file imatrix-prev-1.gguf ...] \\\n"
" [--parse-special] [...]\n" , argv[0]);
LOG("\n");
}

Expand All @@ -40,6 +42,21 @@ struct Stats {
std::vector<int64_t> counts;
};

struct tensor_statistics {
std::string tensor;
Stats stats;
float total_sqract = 0.0f;
float mean_sqract = 0.0f;
float max_sqract = 0.0f;
float min_sqract = 0.0f;
int elements = 0;
float stddev = 0.0f;
float active = 0.0f;
float entropy = 0.0f;
float zd = 0.0f;
float cossim = 0.0f;
};

class IMatrixCollector {
public:
IMatrixCollector() = default;
Expand All @@ -49,6 +66,7 @@ class IMatrixCollector {
void save_imatrix(int32_t n_chunk = -1) const;
bool load_imatrix_legacy(const char * fname);
bool load_imatrix(const char * file_name);
const std::unordered_map<std::string, Stats> & get_mstats() const { return m_stats; }
private:
std::unordered_map<std::string, Stats> m_stats;
common_params m_params;
Expand Down Expand Up @@ -78,6 +96,127 @@ static std::string filter_tensor_name(const char * name) {
return wname;
}

static void process_tensor_name(const std::string & input, std::string & layer, std::string & tensor) {
std::vector<std::string> name;
std::istringstream stream(input);
std::string item;

while (std::getline(stream, item, '.')) {
name.push_back(item);
}
for (size_t i = 0; i < name.size(); ++i) {
if (name[i] == "blk" && i + 1 < name.size()) {
layer = name[i + 1];
break;
}
}
for (size_t i = 0; i < name.size(); ++i) {
if (name[i] == "weight" && i > 0) {
tensor = name[i - 1];
break;
}
}

if (tensor.empty()) {
tensor = input;
}
if (layer.empty()) {
layer = "-";
}
}

static void compute_statistics(std::vector<tensor_statistics> & tstats, const std::string & name, const Stats & e) {
// if (e.values.size() != e.counts.size()) {
if (e.values.size() % e.counts.size() != 0) {
LOG_ERR("%s: activation size mismatch for tensor %s (%zu vs %zu)\n", __func__, name.c_str(), e.counts.size(), e.values.size());
return;
}
if (e.counts.empty()) {
LOG_ERR("%s: there are no activations for tensor %s. The imatrix may be suboptimal\n", __func__, name.c_str());
return;
}

const int n_mat = e.counts.size();
const int row_size = e.values.size() / n_mat;

std::vector<float> activations;
activations.reserve(e.values.size());

for (int i = 0; i < n_mat; ++i) {
for (int j = 0; j < row_size; ++j) {
activations.push_back(e.values[i*row_size + j] / e.counts[i]);
}
}

const float act_total = std::accumulate(activations.begin(), activations.end(), 0.0f);
const float act_max = *std::max_element(activations.begin(), activations.end());
const float act_min = *std::min_element(activations.begin(), activations.end());
const float act_mean = act_total / activations.size();
const float act_sqr_total = std::inner_product(activations.begin(), activations.end(), activations.begin(), 0.0f);
const float act_var = (act_sqr_total / activations.size()) - (act_mean * act_mean);
const float act_dev = std::sqrt(std::max(0.0f, act_var));
float threshold = 1e-5f;
const int inactive_count = std::count_if(activations.begin(), activations.end(),
[threshold](const float v) { return fabsf(v) <= threshold; });
const float active_ratio = 1 - static_cast<float>(inactive_count) / activations.size();

float entropy = 0;
if (act_total > 0) {
for (const auto act : activations) {
if (const float p = act / act_total; p > 0) {
entropy -= p * std::log2(p);
}
}
}

int z_score = 0;
if (act_dev > 0.0f) {
for (const auto act : activations) {
if (const float p = (act - act_mean) / act_dev; p > 1) {
z_score++;
}
}
}

auto & ts = tstats.emplace_back();
ts.tensor = name;
ts.stats = e;
ts.total_sqract = act_total;
ts.mean_sqract = act_mean;
ts.max_sqract = act_max;
ts.min_sqract = act_min;
ts.elements = static_cast<int>(activations.size());
ts.stddev = act_dev;
ts.active = active_ratio;
ts.entropy = entropy;
ts.zd = static_cast<float>(z_score) / ts.elements;
}

static void compute_cossim(std::vector<tensor_statistics> & tstats) {
static const std::regex pattern(R"(blk\.(\d+)\.)");
for (auto & ts : tstats) {
if (std::smatch match; std::regex_search(ts.tensor, match, pattern)) {
const int blk = std::stoi(match[1]);
std::string tname(ts.tensor);
tname.replace(match.position(1), match.length(1), std::to_string(blk-1));
auto prev = std::find_if(tstats.begin(), tstats.end(),
[tname](const tensor_statistics & t) { return t.tensor == tname; });
if (prev != tstats.end()) {
const float dp = std::inner_product(ts.stats.values.begin(), ts.stats.values.end(),
prev->stats.values.begin(), 0.0f);
const float curr_mag = std::sqrt(std::inner_product(ts.stats.values.begin(), ts.stats.values.end(),
ts.stats.values.begin(), 0.0f));
const float prev_mag = std::sqrt(std::inner_product(prev->stats.values.begin(), prev->stats.values.end(),
prev->stats.values.begin(), 0.0f));
const float cs = dp / (curr_mag * prev_mag);
ts.cossim = cs;
}
} else {
ts.cossim = 0;
}
}
}

bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void * user_data) {
GGML_UNUSED(user_data);

Expand Down Expand Up @@ -678,7 +817,6 @@ static bool ik_collect_imatrix(struct ggml_tensor * t, bool ask, void * user_dat
return g_collector.collect_imatrix(t, ask, user_data);
}


struct results_log_softmax {
double log_softmax;
float logit;
Expand Down Expand Up @@ -926,6 +1064,113 @@ static bool compute_imatrix(llama_context * ctx, const common_params & params, c
return true;
}

static bool show_statistics(const common_params & params) {
std::vector<tensor_statistics> ts;
if (params.in_files.empty() || params.in_files.size() > 1) {
LOG_ERR("\nError: a single imatrix file is required to compute tensor statistics\n\n");
return false;
}
if (g_collector.load_imatrix(params.in_files[0].c_str())) {
for (const auto & [name, stats] :g_collector.get_mstats()) {
compute_statistics(ts, name, stats);
}
} else {
LOG_ERR("\nError: %s is not a valid imatrix file\n\n", params.in_files[0].c_str());
return false;
}
if (!ts.empty()) {
compute_cossim(ts);
} else {
LOG_ERR("Error: cannot compute statistics for %s\n\n", params.in_files[0].c_str());
return false;
}

struct tensor_comparer {
bool operator()(const tensor_statistics & a, const tensor_statistics & b) const {
std::string layer, name_a, name_b;
;
process_tensor_name(a.tensor, layer, name_a);
process_tensor_name(b.tensor, layer, name_b);
return name_a < name_b || (name_a == name_b && a.total_sqract > b.total_sqract);
}
};
std::sort(ts.begin(), ts.end(), tensor_comparer());

struct weighted_stats {
float weighted_bias = 0.0f;
float weighted_zd = 0.0f;
float weighted_cossim = 0.0f;
int total_elements = 0;
};
std::map<int, weighted_stats> ws;

LOG_INF("\nComputing statistics for %s (%d tensors)\n", params.in_files[0].c_str(), static_cast<int>(ts.size()));
LOG_INF("\n%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n", " Layer", " Tensor", " Σ(Act²)",
" Min", " Max", " μ", " σ", " % Active", "N", " Entropy", "E (norm)", "ZD",
" CosSim");
LOG_INF(
"=============================================================================================================="
"===========================================================\n");
for (const auto & tstat : ts) {
std::string layer, name;
process_tensor_name(tstat.tensor, layer, name);

int blk;
try {
blk = std::stoi(layer);
} catch (const std::exception & e) {
blk = -1; // not a block layer
}

LOG_INF("%5s\t%-20s\t%10.2f\t%8.4f\t%11.4f\t%6.2f\t%6.2f\t%8.2f%%\t%6d\t%10.4f\t%6.2f%%\t%10.2f%%\t%8.4f\n",
layer.c_str(), name.c_str(), tstat.total_sqract, tstat.min_sqract, tstat.max_sqract, tstat.mean_sqract,
tstat.stddev, tstat.active * 100.0f, tstat.elements, tstat.entropy,
100.0f * (tstat.entropy / std::log2(tstat.elements)), 100.0f * tstat.zd, tstat.cossim);

const float weighted_bias = tstat.elements * tstat.total_sqract;
const float weighted_zd = tstat.elements * tstat.zd;
const float weighted_cossim = tstat.elements * tstat.cossim;

if (ws.find(blk) != ws.end()) {
ws[blk].weighted_bias += weighted_bias;
ws[blk].weighted_zd += weighted_zd;
ws[blk].weighted_cossim += weighted_cossim;
ws[blk].total_elements += tstat.elements;
} else {
weighted_stats temp_ws;
temp_ws.weighted_bias = weighted_bias;
temp_ws.weighted_zd = weighted_zd;
temp_ws.weighted_cossim = weighted_cossim;
temp_ws.total_elements = tstat.elements;
ws[blk] = temp_ws;
}
}

const int layers = std::count_if(ws.begin(), ws.end(), [](const auto & kv) { return kv.first >= 0; });
LOG_INF("\nComputing weighted average statistics per layer (%d layers)\n", layers);
LOG_INF("\n%s\t%s\t%s\t%s\n", " Layer", " μΣ(Act²)", " μZD", "μCosSim");
LOG_INF("================================================\n");
for (const auto & [first, second] : ws) {
const auto & layer = first;
const auto & stats = second;

if (stats.total_elements == 0) {
continue;
}

if (layer >= 0) {
const float bias = stats.weighted_bias / stats.total_elements;
const float zd = stats.weighted_zd / stats.total_elements;
const float cossim = stats.weighted_cossim / stats.total_elements;

LOG_INF("%5d\t%14.2f\t%10.4f%%\t%6.4f\n", layer, bias, 100.0f * zd, cossim);
}
}
LOG_INF("\n");

return true;
}

int main(int argc, char ** argv) {
common_params params;

Expand All @@ -938,6 +1183,19 @@ int main(int argc, char ** argv) {
return 1;
}

if (params.in_files.empty() || params.model.path.empty()) {
LOG_ERR("%s: an input file is required", __func__);
print_usage(argc, argv);
return 1;
}

if (params.show_statistics) {
if (!show_statistics(params)) {
return 1;
}
return 0;
}

common_init();

const int32_t n_ctx = params.n_ctx;
Expand Down