Skip to content

Commit 289c329

Browse files
committed
use atomic
1 parent e7cd3ca commit 289c329

File tree

1 file changed

+48
-53
lines changed

1 file changed

+48
-53
lines changed

model.cpp

Lines changed: 48 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -1970,24 +1970,22 @@ std::vector<TensorStorage> remove_duplicates(const std::vector<TensorStorage>& v
19701970
}
19711971

19721972
bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb, int n_threads_p) {
1973-
int64_t process_time_ms = 0;
1974-
int64_t read_time_ms = 0;
1975-
int64_t memcpy_time_ms = 0;
1976-
int64_t copy_to_backend_time_ms = 0;
1977-
int64_t convert_time_ms = 0;
1978-
1979-
int64_t prev_time_ms = 0;
1980-
int64_t curr_time_ms = 0;
1981-
int64_t start_time = ggml_time_ms();
1982-
prev_time_ms = start_time;
1973+
int64_t process_time_ms = 0;
1974+
std::atomic<int64_t> read_time_ms(0);
1975+
std::atomic<int64_t> memcpy_time_ms(0);
1976+
std::atomic<int64_t> copy_to_backend_time_ms(0);
1977+
std::atomic<int64_t> convert_time_ms(0);
1978+
1979+
int num_threads_to_use = n_threads_p > 0 ? n_threads_p : (int)std::thread::hardware_concurrency();
1980+
1981+
int64_t start_time = ggml_time_ms();
19831982
std::vector<TensorStorage> processed_tensor_storages;
19841983

19851984
{
19861985
std::unordered_map<std::string, TensorStorage> processed_map;
19871986
std::mutex map_mutex;
19881987

1989-
int num_threads = n_threads_p > 0 ? n_threads_p : (int)std::thread::hardware_concurrency();
1990-
int n_threads = std::min(num_threads, (int)tensor_storages.size());
1988+
int n_threads = std::min(num_threads_to_use, (int)tensor_storages.size());
19911989
if (n_threads < 1) {
19921990
n_threads = 1;
19931991
}
@@ -2028,14 +2026,13 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb, int n_thread
20282026
}
20292027
}
20302028

2031-
curr_time_ms = ggml_time_ms();
2032-
process_time_ms = curr_time_ms - prev_time_ms;
2033-
prev_time_ms = curr_time_ms;
2029+
process_time_ms = ggml_time_ms() - start_time;
20342030

20352031
bool success = true;
20362032
size_t total_tensors_processed = 0;
20372033
const size_t total_tensors_to_process = processed_tensor_storages.size();
20382034
const int64_t t_start = ggml_time_ms();
2035+
int last_n_threads = 1;
20392036

20402037
for (size_t file_index = 0; file_index < file_paths_.size(); file_index++) {
20412038
std::string file_path = file_paths_[file_index];
@@ -2059,11 +2056,11 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb, int n_thread
20592056
}
20602057
}
20612058

2062-
int num_threads = n_threads_p > 0 ? n_threads_p : (int)std::thread::hardware_concurrency();
2063-
int n_threads = is_zip ? 1 : std::min(num_threads, (int)file_tensors.size());
2059+
int n_threads = is_zip ? 1 : std::min(num_threads_to_use, (int)file_tensors.size());
20642060
if (n_threads < 1) {
20652061
n_threads = 1;
20662062
}
2063+
last_n_threads = n_threads;
20672064

20682065
std::atomic<size_t> tensor_idx(0);
20692066
std::atomic<bool> failed(false);
@@ -2093,6 +2090,7 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb, int n_thread
20932090
std::vector<uint8_t> convert_buffer;
20942091

20952092
while (true) {
2093+
int64_t t0, t1;
20962094
size_t idx = tensor_idx.fetch_add(1);
20972095
if (idx >= file_tensors.size() || failed) {
20982096
break;
@@ -2101,13 +2099,17 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb, int n_thread
21012099
const TensorStorage& tensor_storage = *file_tensors[idx];
21022100
ggml_tensor* dst_tensor = NULL;
21032101

2102+
t0 = ggml_time_ms();
2103+
21042104
if (!on_new_tensor_cb(tensor_storage, &dst_tensor)) {
21052105
LOG_WARN("process tensor failed: '%s'", tensor_storage.name.c_str());
21062106
failed = true;
21072107
break;
21082108
}
21092109

21102110
if (dst_tensor == NULL) {
2111+
t1 = ggml_time_ms();
2112+
read_time_ms.fetch_add(t1 - t0);
21112113
continue;
21122114
}
21132115

@@ -2118,28 +2120,19 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb, int n_thread
21182120
zip_entry_openbyindex(zip, tensor_storage.index_in_zip);
21192121
size_t entry_size = zip_entry_size(zip);
21202122
if (entry_size != n) {
2123+
int64_t t_memcpy_start;
21212124
read_buffer.resize(entry_size);
2122-
prev_time_ms = ggml_time_ms();
21232125
zip_entry_noallocread(zip, (void*)read_buffer.data(), entry_size);
2124-
curr_time_ms = ggml_time_ms();
2125-
read_time_ms += curr_time_ms - prev_time_ms;
2126-
prev_time_ms = curr_time_ms;
2126+
t_memcpy_start = ggml_time_ms();
21272127
memcpy((void*)buf, (void*)(read_buffer.data() + tensor_storage.offset), n);
2128-
curr_time_ms = ggml_time_ms();
2129-
memcpy_time_ms += curr_time_ms - prev_time_ms;
2128+
memcpy_time_ms.fetch_add(ggml_time_ms() - t_memcpy_start);
21302129
} else {
2131-
prev_time_ms = ggml_time_ms();
21322130
zip_entry_noallocread(zip, (void*)buf, n);
2133-
curr_time_ms = ggml_time_ms();
2134-
read_time_ms += curr_time_ms - prev_time_ms;
21352131
}
21362132
zip_entry_close(zip);
21372133
} else {
2138-
prev_time_ms = ggml_time_ms();
21392134
file.seekg(tensor_storage.offset);
21402135
file.read(buf, n);
2141-
curr_time_ms = ggml_time_ms();
2142-
read_time_ms += curr_time_ms - prev_time_ms;
21432136
if (!file) {
21442137
LOG_ERROR("read tensor data failed: '%s'", file_path.c_str());
21452138
failed = true;
@@ -2156,8 +2149,10 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb, int n_thread
21562149
} else {
21572150
read_data((char*)dst_tensor->data, nbytes_to_read);
21582151
}
2152+
t1 = ggml_time_ms();
2153+
read_time_ms.fetch_add(t1 - t0);
21592154

2160-
prev_time_ms = ggml_time_ms();
2155+
t0 = ggml_time_ms();
21612156
if (tensor_storage.is_bf16) {
21622157
// inplace op
21632158
bf16_to_f32_vec((uint16_t*)dst_tensor->data, (float*)dst_tensor->data, tensor_storage.nelements());
@@ -2172,13 +2167,15 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb, int n_thread
21722167
} else if (tensor_storage.is_i64) {
21732168
i64_to_i32_vec((int64_t*)read_buffer.data(), (int32_t*)dst_tensor->data, tensor_storage.nelements());
21742169
}
2175-
curr_time_ms = ggml_time_ms();
2176-
convert_time_ms += curr_time_ms - prev_time_ms;
2170+
t1 = ggml_time_ms();
2171+
convert_time_ms.fetch_add(t1 - t0);
21772172
} else {
21782173
read_buffer.resize(std::max(tensor_storage.nbytes(), tensor_storage.nbytes_to_read()));
21792174
read_data((char*)read_buffer.data(), nbytes_to_read);
2175+
t1 = ggml_time_ms();
2176+
read_time_ms.fetch_add(t1 - t0);
21802177

2181-
prev_time_ms = ggml_time_ms();
2178+
t0 = ggml_time_ms();
21822179
if (tensor_storage.is_bf16) {
21832180
// inplace op
21842181
bf16_to_f32_vec((uint16_t*)read_buffer.data(), (float*)read_buffer.data(), tensor_storage.nelements());
@@ -2195,17 +2192,17 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb, int n_thread
21952192
// inplace op
21962193
i64_to_i32_vec((int64_t*)read_buffer.data(), (int32_t*)read_buffer.data(), tensor_storage.nelements());
21972194
}
2198-
2199-
convert_tensor((void*)read_buffer.data(), tensor_storage.type, dst_tensor->data,
2200-
dst_tensor->type, (int)tensor_storage.nelements() / (int)tensor_storage.ne[0], (int)tensor_storage.ne[0]);
2201-
curr_time_ms = ggml_time_ms();
2202-
convert_time_ms += curr_time_ms - prev_time_ms;
2195+
convert_tensor((void*)read_buffer.data(), tensor_storage.type, dst_tensor->data, dst_tensor->type, (int)tensor_storage.nelements() / (int)tensor_storage.ne[0], (int)tensor_storage.ne[0]);
2196+
t1 = ggml_time_ms();
2197+
convert_time_ms.fetch_add(t1 - t0);
22032198
}
22042199
} else {
22052200
read_buffer.resize(std::max(tensor_storage.nbytes(), tensor_storage.nbytes_to_read()));
22062201
read_data((char*)read_buffer.data(), nbytes_to_read);
2202+
t1 = ggml_time_ms();
2203+
read_time_ms.fetch_add(t1 - t0);
22072204

2208-
prev_time_ms = ggml_time_ms();
2205+
t0 = ggml_time_ms();
22092206
if (tensor_storage.is_bf16) {
22102207
// inplace op
22112208
bf16_to_f32_vec((uint16_t*)read_buffer.data(), (float*)read_buffer.data(), tensor_storage.nelements());
@@ -2229,20 +2226,18 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb, int n_thread
22292226
convert_time_ms += curr_time_ms - prev_time_ms;
22302227
prev_time_ms = curr_time_ms;
22312228
ggml_backend_tensor_set(dst_tensor, read_buffer.data(), 0, ggml_nbytes(dst_tensor));
2232-
curr_time_ms = ggml_time_ms();
2233-
copy_to_backend_time_ms += curr_time_ms - prev_time_ms;
2229+
t1 = ggml_time_ms();
2230+
copy_to_backend_time_ms.fetch_add(t1 - t0);
22342231
} else {
22352232
// convert first, then copy to device memory
22362233
convert_buffer.resize(ggml_nbytes(dst_tensor));
2237-
convert_tensor((void*)read_buffer.data(), tensor_storage.type,
2238-
(void*)convert_buffer.data(), dst_tensor->type,
2239-
(int)tensor_storage.nelements() / (int)tensor_storage.ne[0], (int)tensor_storage.ne[0]);
2240-
curr_time_ms = ggml_time_ms();
2241-
convert_time_ms += curr_time_ms - prev_time_ms;
2242-
prev_time_ms = curr_time_ms;
2234+
convert_tensor((void*)read_buffer.data(), tensor_storage.type, (void*)convert_buffer.data(), dst_tensor->type, (int)tensor_storage.nelements() / (int)tensor_storage.ne[0], (int)tensor_storage.ne[0]);
2235+
t1 = ggml_time_ms();
2236+
convert_time_ms.fetch_add(t1 - t0);
2237+
t0 = ggml_time_ms();
22432238
ggml_backend_tensor_set(dst_tensor, convert_buffer.data(), 0, ggml_nbytes(dst_tensor));
2244-
curr_time_ms = ggml_time_ms();
2245-
copy_to_backend_time_ms += curr_time_ms - prev_time_ms;
2239+
t1 = ggml_time_ms();
2240+
copy_to_backend_time_ms.fetch_add(t1 - t0);
22462241
}
22472242
}
22482243
}
@@ -2281,10 +2276,10 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb, int n_thread
22812276
LOG_INFO("loading tensors completed, taking %.2fs (process: %.2fs, read: %.2fs, memcpy: %.2fs, convert: %.2fs, copy_to_backend: %.2fs)",
22822277
(end_time - start_time) / 1000.f,
22832278
process_time_ms / 1000.f,
2284-
read_time_ms / 1000.f,
2285-
memcpy_time_ms / 1000.f,
2286-
convert_time_ms / 1000.f,
2287-
copy_to_backend_time_ms / 1000.f);
2279+
(read_time_ms.load() / (float)last_n_threads) / 1000.f,
2280+
(memcpy_time_ms.load() / (float)last_n_threads) / 1000.f,
2281+
(convert_time_ms.load() / (float)last_n_threads) / 1000.f,
2282+
(copy_to_backend_time_ms.load() / (float)last_n_threads) / 1000.f);
22882283
return success;
22892284
}
22902285

0 commit comments

Comments
 (0)