我们知道线程数量很重要,太多反而增大切换线程的开销,太少又不能完全释放cpu的性能。
std::thread::hardware_concurrency()在新版C++标准库中是一个很有用的函数。这个函数将返回能同时并发在一个程序中的线程数量。例如,多核系统中,返回值可以是CPU核芯的数量。返回值也仅仅是一个提示,当系统信息无法获取时,函数也会返回0。
例子:
#include <iostream>
#include <iterator>
#include <numeric>
#include <thread>
#include <vector>
template <typename Iterator, typename T>
struct accumulate_block {
void operator()(Iterator first, Iterator last, T& result) {
result = std::accumulate(first, last, result);
}
};
template <typename Iterator, typename T>
T parallel_accumulate(Iterator first, Iterator last, T init) {
// 容器中没有数据
const unsigned long length = std::distance(first, last);
if (!length) {
return init;
}
// 每个线程处理的数据个数
const unsigned long min_per_thread = 25;
// 计算出共需要的最大线程数
const unsigned long max_threads =
(length + min_per_thread) / min_per_thread;
// 计算机核数/硬件线程数
const unsigned long hardware_threads = std::thread::hardware_concurrency();
// min(计算机核数,处理数据需要的最大线程数)
const unsigned long num_threads =
std::min(hardware_threads != 0 ? hardware_threads : 2, max_threads);
// 对处理的数据分段,每个线程处理一段
const unsigned long block_size = length / num_threads;
std::vector<T> results(num_threads);
// 主线程也参与计算,需创建的线程数数减一
std::vector<std::thread> threads(num_threads - 1);
Iterator block_start = first;
for (unsigned long i = 0; i < (num_threads - 1); i++) {
Iterator block_end = block_start;
std::advance(block_end, block_size);
threads[i] = std::thread(accumulate_block<Iterator, T>(), block_start,
block_end, std::ref(results[i]));
block_start = block_end;
}
// 主线程也不要浪费,计算最后一段
accumulate_block<Iterator, T>()(block_start, last,
results[num_threads - 1]);
std::for_each(threads.begin(), threads.end(),
std::mem_fn(&std::thread::join));
return std::accumulate(results.begin(), results.end(), init);
}
int main() {
std::vector<int> values{1, 2, 3, 4, 6, 66, 34, 233, 67, 89, 112,
33, 10, 43, 1, 5, 43, 645, 654, 6, 45, 7,
54, 63, 45, 2, 5, 2, 342, 5, 2, 99};
int result = parallel_accumulate(values.begin(), values.end(), 0);
std::cout << "result: " << result << std::endl;
return 0;
}
代码来自《C++并发编程实战》