4K相机数据异步处理及分发处理器：瑞芯微RK3588 相机：4K。需要将4K图像拷贝至DMA使用RGA加速进行转格式和缩

环境

处理器：瑞芯微RK3588
相机：

Format Video Capture Multiplanar:
        Width/Height      : 3840/2160
        Pixel Format      : 'NV12' (Y/CbCr 4:2:0)
        Field             : None
        Number of planes  : 1
        Flags             :
        Colorspace        : Default
        Transfer Function : Default
        YCbCr/HSV Encoding: Default
        Quantization      : Full Range
        Plane 0           :
           Bytes per Line : 3840
           Size Image     : 12441600

实现

需要将4K图像拷贝至DMA使用RGA加速进行转格式和缩放，但是4K拷贝耗时久，对于需要高帧率、实时流的场景影响很大，使用异步的操作，同时又要保持frame_id的顺序。
同时高帧率场景下，RGA_CORE的负载较高，并且在不同的核心上来回切换，导致图像不停的抖动（可以参考github.com/airockchip/… ），使用imconfig将处理的线程绑定到核心上即可解决抖动问题。

# ros2_utils.h

#ifndef __ROS2_UTILS_H__
#define __ROS2_UTILS_H__

#ifdef PLATFORM_RK3588
#ifdef __ROS2_SDK__

#include <vector>
#include <atomic>
#include <semaphore.h>
#include <unordered_map>
#include <cstddef>
#include <memory>
#include <thread>
#include <future>
#include <condition_variable>
#include <unordered_set>
#include <utility>
#include <list>

#include "CEvent.h"
#include "buffer.h"
#include "CRkRgaGraphicOp.h"

namespace ros2 {

typedef enum {
  IM_SCHEDULER_RGA3_CORE0 = 1 << 0,
  IM_SCHEDULER_RGA3_CORE1 = 1 << 1,
  IM_SCHEDULER_RGA2_CORE0 = 1 << 2,
  IM_SCHEDULER_RGA3_DEFAULT = IM_SCHEDULER_RGA3_CORE0,
  IM_SCHEDULER_RGA2_DEFAULT = IM_SCHEDULER_RGA2_CORE0,
  IM_SCHEDULER_MASK = 0x7,
  IM_SCHEDULER_DEFAULT = 0,
} IM_SCHEDULER_CORE;

typedef enum {
  IM_CONFIG_SCHEDULER_CORE,
  IM_CONFIG_PRIORITY,
  IM_CONFIG_CHECK,
} IM_CONFIG_NAME;

class ThreadPool {
 public:
  typedef std::shared_ptr<ThreadPool> ptr;

  explicit ThreadPool(size_t numThreads,
                      IM_CONFIG_NAME im_config_name = IM_CONFIG_SCHEDULER_CORE,
                      IM_SCHEDULER_CORE im_scheduler_core = IM_SCHEDULER_RGA3_CORE0,
                      int time_out_ms = 30)
      : stop(false)
      , nextFrameId(0)
      , time_out_ms(time_out_ms)
      , im_config_name(im_config_name)
      , im_scheduler_core(im_scheduler_core) {
    workers.reserve(numThreads);
    for (size_t i = 0; i < numThreads; ++i) {
      workers.emplace_back(&ThreadPool::worker, this);
    }
  }

  ~ThreadPool() {
    {
      std::lock_guard<std::mutex> lock(queueMutex);
      stop = true;
    }
    cv.notify_all();
    for (auto& worker: workers) {
      worker.join();
    }
  }

  void enqueue(uint64_t frame_id, std::function<void()> task) {
    auto promisePtr = std::make_shared<std::promise<void>>();
    auto future = promisePtr->get_future();

    auto wrappedTask = [task = std::move(task), promisePtr]() mutable {
      try {
        task();
        promisePtr->set_value();
      } catch (...) {
        promisePtr->set_exception(std::current_exception());
      }
    };

    handlePreviousFrameDependency(frame_id);

    {
      std::lock_guard<std::mutex> lock(queueMutex);
      taskQueue[frame_id] = std::move(wrappedTask);
      frameCompletion[frame_id] = std::move(future);
    }
    cv.notify_one();
  }

 private:
  void worker() {
    CRkRgaApi::imconfig(im_config_name, im_scheduler_core);

    while (true) {
      std::function<void()> task;
      {
        std::unique_lock<std::mutex> lock(queueMutex);
        cv.wait(lock, [this] { return stop || !taskQueue.empty(); });

        if (stop && taskQueue.empty()) return;

        auto it = taskQueue.find(nextFrameId);
        if (it != taskQueue.end()) {
          task = std::move(it->second);
          taskQueue.erase(it);
          ++nextFrameId;
        } else {
          ++nextFrameId;
          continue;
        }
      }
      if (task) task();
    }
  }

  void handlePreviousFrameDependency(uint64_t frame_id) {
    if (frame_id == 0) return;

    auto prevFutureIt = frameCompletion.find(frame_id - 1);
    if (prevFutureIt != frameCompletion.end()) {
      auto& prevFuture = prevFutureIt->second;

      auto timeout = std::chrono::milliseconds(time_out_ms);
      prevFuture.wait_for(timeout);

      {
        std::lock_guard<std::mutex> lock(queueMutex);
        frameCompletion.erase(frame_id - 1);
      }
    }
  }

  std::vector<std::thread> workers;
  std::unordered_map<uint64_t, std::function<void()>> taskQueue;
  std::unordered_map<uint64_t, std::future<void>> frameCompletion;
  std::mutex queueMutex;
  std::condition_variable cv;
  bool stop;
  uint64_t nextFrameId;
  int time_out_ms;
  int im_config_name;
  uint64_t im_scheduler_core;
};

class BufferPool {
 public:
  typedef std::shared_ptr<BufferPool> ptr;

  BufferPool(size_t poolSize, int image_w, int image_h, int fps,
                   int size, EN_BUFF_TYPE buff_type, EN_DEV_BUFF_TYPE dev_buff_type)
      : indexMap(poolSize) {
    buffers.reserve(poolSize);
    for (size_t i = 0; i < poolSize; ++i) {
      auto buf = std::make_shared<Buffer>();
      buf->Init(size, buff_type, dev_buff_type, image_w, image_h);
      buf->m_iFramerate = fps;
      buffers.push_back(buf);
      freeList.push_back(i);
      indexMap[buf.get()] = i;
    }
  }

  std::shared_ptr<Buffer> acquire() {
    std::unique_lock<std::mutex> lock(mutex);
    cv.wait(lock, [this] { return !freeList.empty(); });

    size_t idx = freeList.front();
    freeList.pop_front();
    return buffers[idx];
  }

  void release(const std::shared_ptr<Buffer>& buf) {
    std::lock_guard<std::mutex> lock(mutex);
    auto it = indexMap.find(buf.get());
    if (it != indexMap.end()) {
      freeList.push_back(it->second);
      cv.notify_one();
    }
  }

 private:
  std::vector<std::shared_ptr<Buffer>> buffers;
  std::list<size_t> freeList;
  std::unordered_map<Buffer*, size_t> indexMap;
  std::mutex mutex;
  std::condition_variable cv;
};

} // namespace ros2

#endif
#endif

#endif

# test.cpp 将图像处理和后续操作异步处理。

auto process_task = [this, image, frame_id] {
  auto nv12_buf = m_nv12BufferPool->acquire();
  auto bgr_buf = m_bgrBufferPool->acquire();

  process_image(image, bgr_buf, nv12_buf);

  m_nv12BufferPool->release(nv12_buf);

  {
    std::lock_guard<std::mutex> lock(m_bgrQueueMutex);
    m_processedBGRs.emplace(frame_id, bgr_buf);
  }
  m_bgrCV.notify_one();
};

auto send_task = [this, pointcloud_imus, frame_id, timeStamp]() mutable {
  std::shared_ptr<Buffer> target_bgr;

  {
    std::unique_lock<std::mutex> lock(m_bgrQueueMutex);
    m_bgrCV.wait(lock, [&] {
      return m_processedBGRs.count(frame_id) > 0;
    });
    target_bgr = m_processedBGRs[frame_id];
    m_processedBGRs.erase(frame_id);
  }

  PostDataToPipeline(frame_id, timeStamp, pointcloud_imus, target_bgr);
  m_bgrBufferPool->release(target_bgr);
};

m_imageProcessingPool->enqueue(frame_id, std::move(process_task));
m_dataSendingPool->enqueue(frame_id, std::move(send_task));