并行 io 的一个小实验

300 阅读2分钟

一直以来对 io 有一个疑问, 多线程下并行做 io 会发生什么?

今天我写了个小实验来试一下

首先是一次写入, 打开文件后, 按 BATCH_SIZE 写入 N 次.

其次是测试维度, 写入文件个数 * 不同写入方案

写入方案有:

单线程全部完成, 主线程 + io 线程, 多个线程轮询(模拟线程池), 每个任务一个线程.

在我的 macbook 上得到的结果如下

one_thread_per_task = [20202, 39416, 204716, 445375, 2275959, 4546641]
fake_thread_pool = [15302, 31865, 143714, 292649, 1492380, 2948938]
one_io_thread = [12598, 25251, 121179, 243781, 1188644, 4753603, 2389170]
one_thread_in_all = [12910, 24894, 122469, 238501, 1251449, 2471593]
write_time = [5, 10, 50, 100, 500, 1000]

读的数据与之相当, 可以看到每个任务跑一个线程是最慢的, 单独起一个 io 线程的速度与全部任务都在主线程实现效率相当, 多个线程轮训投递任务的效率介于他们之间.

挺符合预期, 做 io 的核数不会造成瓶颈. 当然关于 io 还有诸多因素没有考虑, 但比较多线程io的性能也足够了.

下面贴一下测试的源代码

#include <ctime>
#include <fcntl.h>
#include <iostream>
#include <queue>
#include <thread>
#include <unistd.h>
#include <vector>
using namespace std;

// #define APPEND_MODE

const int N = 10 * 512;
const int BATCH_SIZE = 1024 * 16;
const int _1MB = 1024 * 1024;

void fileWriter(string filename) {
#ifdef APPEND_MODE
  int fd = open(filename.c_str(), O_WRONLY | O_APPEND | O_CREAT);
#else
  int fd = open(filename.c_str(), O_WRONLY | O_CREAT);
#endif
  char buf[BATCH_SIZE + 1];
  for (int i = 0; i < N; ++i) {
    write(fd, buf, BATCH_SIZE);
  }
  // cout << "wirte " << filename << " done\n";
  fsync(fd);
  close(fd);
  // unlink(filename.c_str());
}

class IoThread {
public:
  void Run() {
    thread_ = thread([this]() {
      while (true) {
        unique_lock<mutex> lc(mu_);
        cv_.wait(lc, [&] { return !data_.empty() || exit_; });

        vector<string> files;

        while (!data_.empty()) {
          string file = data_.front();
          data_.pop();
          files.push_back(file);
        }
        lc.unlock();
        for (string file : files)
          fileWriter(file);

        if (exit_ && data_.empty()) {
          break;
        }
      }
    });
  }

  void WaitAndExit() {
    cv_.notify_one();
    exit_ = true;
    thread_.join();
  }

  void PostTask(string task) {
    {
      lock_guard lg(mu_);
      data_.push(task);
    }

    cv_.notify_one();
  }

private:
  condition_variable cv_;
  atomic<bool> exit_;
  mutex mu_;
  queue<string> data_;
  thread thread_;
};

void one_thread_per_task(int write_time) {
  string file = "ft";
  vector<thread> tv;
  for (int i = 0; i < write_time; ++i) {
    tv.push_back(thread(fileWriter, file + char(i + '0')));
  }
  for (auto &&th : tv) {
    th.join();
  }
}

void one_thread_in_all(int write_time) {
  string file = "ft";
  for (int i = 0; i < write_time; ++i) {
    fileWriter(file + char(i + '0'));
  }
}

void one_io_thread(int write_time) {
  string file = "ft";
  IoThread io;
  io.Run();
  for (int i = 0; i < write_time; ++i) {
    io.PostTask(file + char(i + '0'));
  }
  io.WaitAndExit();
}

void fake_thread_pool(int write_time) {
  const int n_thread = 3;
  string file = "ft";
  vector<IoThread> iothreads(n_thread);
  for (int i = 0; i < n_thread; ++i)
    iothreads[i].Run();
  for (int i = 0; i < write_time; ++i)
    iothreads[i % n_thread].PostTask(file + char(i + '0'));
  for (int i = 0; i < n_thread; ++i)
    iothreads[i].WaitAndExit();
}

int main() {
  vector<int> write_times = {5, 10, 50, 100, 500, 1000};
  vector<pair<string, function<void(int)>>> bench_functions = {
      {"one_thread_per_task", one_thread_per_task},
      {"one_thread_in_all", one_thread_in_all},
      {"fake_thread_pool", fake_thread_pool},
      {"one_io_thread", one_io_thread}};
  int start, end, cost;
  // cout << "CLOCKS_PER_SEC " << CLOCKS_PER_SEC << '\n';
  for (size_t time : write_times) {
    size_t total_writed = N * BATCH_SIZE * time;
    cout << "write_times " << time << " total write " << total_writed << '\n';
    for (auto p : bench_functions) {
      start = clock();

      p.second(time);
      end = clock();
      cost = (end - start);
      cout << p.first << ": " << cost << ", speed "
           << total_writed * CLOCKS_PER_SEC / cost / _1MB << " MB/s" << '\n';
    }
    cout << '\n';
  }
  return 0;
}