@@ -1973,57 +1973,48 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb) {
19731973 std::vector<TensorStorage> processed_tensor_storages;
19741974
19751975 {
1976+ std::unordered_map<std::string, TensorStorage> processed_map;
1977+ std::mutex map_mutex;
1978+
19761979 int n_threads = std::min ((int )std::thread::hardware_concurrency (), (int )tensor_storages.size ());
19771980 if (n_threads < 1 ) {
19781981 n_threads = 1 ;
19791982 }
1980-
1981- std::vector<std::unordered_map<std::string, TensorStorage> > local_maps (n_threads);
19821983 std::vector<std::thread> workers;
1983- size_t chunk_size = (tensor_storages.size () + n_threads - 1 ) / n_threads;
19841984
19851985 for (int i = 0 ; i < n_threads; ++i) {
19861986 workers.emplace_back ([&, thread_id = i]() {
1987- const size_t start = thread_id * chunk_size;
1988- const size_t end = std::min (start + chunk_size, tensor_storages.size ());
19891987
1988+ std::unordered_map<std::string, TensorStorage> local_processed_map;
19901989 std::vector<TensorStorage> temp_storages;
1991- for (size_t j = start; j < end; ++j) {
1990+
1991+ for (size_t j = thread_id; j < tensor_storages.size (); j += n_threads) {
19921992 const auto & tensor_storage = tensor_storages[j];
19931993 if (is_unused_tensor (tensor_storage.name )) {
19941994 continue ;
19951995 }
1996-
1996+
19971997 temp_storages.clear ();
19981998 preprocess_tensor (tensor_storage, temp_storages);
1999-
2000- for (size_t k = 0 ; k < temp_storages. size (); ++k ) {
2001- local_maps[thread_id][temp_storages[k] .name ] = temp_storages[k] ;
1999+
2000+ for (const auto & ts : temp_storages) {
2001+ local_processed_map[ts .name ] = ts ;
20022002 }
20032003 }
2004- });
2005- }
2006-
2007- for (size_t i = 0 ; i < workers.size (); ++i) {
2008- workers[i].join ();
2009- }
20102004
2011- std::unordered_map<std::string, TensorStorage> processed_map;
2012- size_t total_keys = 0 ;
2013- for (int i = 0 ; i < n_threads; ++i) {
2014- total_keys += local_maps[i].size ();
2005+ if (!local_processed_map.empty ()) {
2006+ std::lock_guard<std::mutex> lock (map_mutex);
2007+ processed_map.merge (local_processed_map);
2008+ }
2009+ });
20152010 }
2016- processed_map.reserve (total_keys);
2017-
2018- for (int i = 0 ; i < n_threads; ++i) {
2019- for (std::unordered_map<std::string, TensorStorage>::const_iterator it = local_maps[i].begin (); it != local_maps[i].end (); ++it) {
2020- processed_map[it->first ] = it->second ;
2021- }
2011+ for (auto & w : workers) {
2012+ w.join ();
20222013 }
2023-
2014+
20242015 processed_tensor_storages.reserve (processed_map.size ());
2025- for (std::unordered_map<std::string, TensorStorage>::const_iterator it = processed_map. begin (); it != processed_map. end (); ++it ) {
2026- processed_tensor_storages.push_back (it-> second );
2016+ for (auto const & [name, ts] : processed_map) {
2017+ processed_tensor_storages.push_back (ts );
20272018 }
20282019 }
20292020
0 commit comments