@@ -1973,48 +1973,57 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb) {
19731973 std::vector<TensorStorage> processed_tensor_storages;
19741974
19751975 {
1976- std::unordered_map<std::string, TensorStorage> processed_map;
1977- std::mutex map_mutex;
1978-
19791976 int n_threads = std::min ((int )std::thread::hardware_concurrency (), (int )tensor_storages.size ());
19801977 if (n_threads < 1 ) {
19811978 n_threads = 1 ;
19821979 }
1980+
1981+ std::vector<std::unordered_map<std::string, TensorStorage> > local_maps (n_threads);
19831982 std::vector<std::thread> workers;
1983+ size_t chunk_size = (tensor_storages.size () + n_threads - 1 ) / n_threads;
19841984
19851985 for (int i = 0 ; i < n_threads; ++i) {
19861986 workers.emplace_back ([&, thread_id = i]() {
1987+ const size_t start = thread_id * chunk_size;
1988+ const size_t end = std::min (start + chunk_size, tensor_storages.size ());
19871989
1988- std::unordered_map<std::string, TensorStorage> local_processed_map;
19891990 std::vector<TensorStorage> temp_storages;
1990-
1991- for (size_t j = thread_id; j < tensor_storages.size (); j += n_threads) {
1991+ for (size_t j = start; j < end; ++j) {
19921992 const auto & tensor_storage = tensor_storages[j];
19931993 if (is_unused_tensor (tensor_storage.name )) {
19941994 continue ;
19951995 }
1996-
1996+
19971997 temp_storages.clear ();
19981998 preprocess_tensor (tensor_storage, temp_storages);
1999-
2000- for (const auto & ts : temp_storages) {
2001- local_processed_map[ts.name ] = ts;
2002- }
2003- }
20041999
2005- if (!local_processed_map. empty () ) {
2006- std::lock_guard<std::mutex> lock (map_mutex) ;
2007- processed_map. merge (local_processed_map);
2000+ for ( size_t k = 0 ; k < temp_storages. size (); ++k ) {
2001+ local_maps[thread_id][temp_storages[k]. name ] = temp_storages[k] ;
2002+ }
20082003 }
20092004 });
20102005 }
2011- for (auto & w : workers) {
2012- w.join ();
2006+
2007+ for (size_t i = 0 ; i < workers.size (); ++i) {
2008+ workers[i].join ();
2009+ }
2010+
2011+ std::unordered_map<std::string, TensorStorage> processed_map;
2012+ size_t total_keys = 0 ;
2013+ for (int i = 0 ; i < n_threads; ++i) {
2014+ total_keys += local_maps[i].size ();
20132015 }
2014-
2016+ processed_map.reserve (total_keys);
2017+
2018+ for (int i = 0 ; i < n_threads; ++i) {
2019+ for (std::unordered_map<std::string, TensorStorage>::const_iterator it = local_maps[i].begin (); it != local_maps[i].end (); ++it) {
2020+ processed_map[it->first ] = it->second ;
2021+ }
2022+ }
2023+
20152024 processed_tensor_storages.reserve (processed_map.size ());
2016- for (auto const & [name, ts] : processed_map) {
2017- processed_tensor_storages.push_back (ts );
2025+ for (std::unordered_map<std::string, TensorStorage>::const_iterator it = processed_map. begin (); it != processed_map. end (); ++it ) {
2026+ processed_tensor_storages.push_back (it-> second );
20182027 }
20192028 }
20202029
0 commit comments