一直以来对 io 有一个疑问, 多线程下并行做 io 会发生什么?
今天我写了个小实验来试一下
首先是一次写入, 打开文件后, 按 BATCH_SIZE 写入 N 次.
其次是测试维度, 写入文件个数 * 不同写入方案
写入方案有:
单线程全部完成, 主线程 + io 线程, 多个线程轮询(模拟线程池), 每个任务一个线程.
在我的 macbook 上得到的结果如下
one_thread_per_task = [20202, 39416, 204716, 445375, 2275959, 4546641]
fake_thread_pool = [15302, 31865, 143714, 292649, 1492380, 2948938]
one_io_thread = [12598, 25251, 121179, 243781, 1188644, 4753603, 2389170]
one_thread_in_all = [12910, 24894, 122469, 238501, 1251449, 2471593]
write_time = [5, 10, 50, 100, 500, 1000]
读的数据与之相当, 可以看到每个任务跑一个线程是最慢的, 单独起一个 io 线程的速度与全部任务都在主线程实现效率相当, 多个线程轮训投递任务的效率介于他们之间.
挺符合预期, 做 io 的核数不会造成瓶颈. 当然关于 io 还有诸多因素没有考虑, 但比较多线程io的性能也足够了.
下面贴一下测试的源代码
#include <ctime>
#include <fcntl.h>
#include <iostream>
#include <queue>
#include <thread>
#include <unistd.h>
#include <vector>
using namespace std;
// #define APPEND_MODE
const int N = 10 * 512;
const int BATCH_SIZE = 1024 * 16;
const int _1MB = 1024 * 1024;
void fileWriter(string filename) {
#ifdef APPEND_MODE
int fd = open(filename.c_str(), O_WRONLY | O_APPEND | O_CREAT);
#else
int fd = open(filename.c_str(), O_WRONLY | O_CREAT);
#endif
char buf[BATCH_SIZE + 1];
for (int i = 0; i < N; ++i) {
write(fd, buf, BATCH_SIZE);
}
// cout << "wirte " << filename << " done\n";
fsync(fd);
close(fd);
// unlink(filename.c_str());
}
class IoThread {
public:
void Run() {
thread_ = thread([this]() {
while (true) {
unique_lock<mutex> lc(mu_);
cv_.wait(lc, [&] { return !data_.empty() || exit_; });
vector<string> files;
while (!data_.empty()) {
string file = data_.front();
data_.pop();
files.push_back(file);
}
lc.unlock();
for (string file : files)
fileWriter(file);
if (exit_ && data_.empty()) {
break;
}
}
});
}
void WaitAndExit() {
cv_.notify_one();
exit_ = true;
thread_.join();
}
void PostTask(string task) {
{
lock_guard lg(mu_);
data_.push(task);
}
cv_.notify_one();
}
private:
condition_variable cv_;
atomic<bool> exit_;
mutex mu_;
queue<string> data_;
thread thread_;
};
void one_thread_per_task(int write_time) {
string file = "ft";
vector<thread> tv;
for (int i = 0; i < write_time; ++i) {
tv.push_back(thread(fileWriter, file + char(i + '0')));
}
for (auto &&th : tv) {
th.join();
}
}
void one_thread_in_all(int write_time) {
string file = "ft";
for (int i = 0; i < write_time; ++i) {
fileWriter(file + char(i + '0'));
}
}
void one_io_thread(int write_time) {
string file = "ft";
IoThread io;
io.Run();
for (int i = 0; i < write_time; ++i) {
io.PostTask(file + char(i + '0'));
}
io.WaitAndExit();
}
void fake_thread_pool(int write_time) {
const int n_thread = 3;
string file = "ft";
vector<IoThread> iothreads(n_thread);
for (int i = 0; i < n_thread; ++i)
iothreads[i].Run();
for (int i = 0; i < write_time; ++i)
iothreads[i % n_thread].PostTask(file + char(i + '0'));
for (int i = 0; i < n_thread; ++i)
iothreads[i].WaitAndExit();
}
int main() {
vector<int> write_times = {5, 10, 50, 100, 500, 1000};
vector<pair<string, function<void(int)>>> bench_functions = {
{"one_thread_per_task", one_thread_per_task},
{"one_thread_in_all", one_thread_in_all},
{"fake_thread_pool", fake_thread_pool},
{"one_io_thread", one_io_thread}};
int start, end, cost;
// cout << "CLOCKS_PER_SEC " << CLOCKS_PER_SEC << '\n';
for (size_t time : write_times) {
size_t total_writed = N * BATCH_SIZE * time;
cout << "write_times " << time << " total write " << total_writed << '\n';
for (auto p : bench_functions) {
start = clock();
p.second(time);
end = clock();
cost = (end - start);
cout << p.first << ": " << cost << ", speed "
<< total_writed * CLOCKS_PER_SEC / cost / _1MB << " MB/s" << '\n';
}
cout << '\n';
}
return 0;
}