Android native fence sync 实现(virgl)

1,419 阅读3分钟

Android 系统会检测当前EGL是否支持ANDROID_native_fence_sync 扩展。支持将使用该扩展属性等待GPU 完成渲染,不支持使用EGL sync 等待GPU 渲染完成。 registry.khronos.org/EGL/extensi…

ANDROID_native_fence_sync 相关EGL API

eglCreateSyncKHR
eglDupNativeFenceFDANDROID
eglDestroySyncKHR
eglWaitSyncKHR
sync_wait

等待fence 的demo

void WaitOnNativeFence(int fd)

{
  if (fd != -1) {
    EGLint attribs[] = {EGL_SYNC_NATIVE_FENCE_FD_ANDROID, fd, EGL_NONE};

    EGLSyncKHR sync = eglCreateSyncKHR(eglGetCurrentDisplay(),         EGL_SYNC_NATIVE_FENCE_ANDROID, attribs);

    if (sync == EGL_NO_SYNC_KHR) {
      ALOGE("%s - Failed to Create sync from source fd", __FUNCTION__);
    } else {
      // the gpu will wait for this sync - not this cpu thread.
      EGL(eglWaitSyncKHR(eglGetCurrentDisplay(), sync, 0));
      EGL(eglDestroySyncKHR(eglGetCurrentDisplay(), sync));
    }
  }
}

创建fence demo

int CreateNativeFence()
//-----------------------------------------------------------------------------
{
  int fd = -1;

  EGLSyncKHR sync = eglCreateSyncKHR(eglGetCurrentDisplay(), EGL_SYNC_NATIVE_FENCE_ANDROID, NULL);
  GL(glFlush());
  if (sync == EGL_NO_SYNC_KHR) {
    ALOGE("%s - Failed to Create Native Fence sync", __FUNCTION__);
  } else {
    fd = eglDupNativeFenceFDANDROID(eglGetCurrentDisplay(), sync);
    if (fd == EGL_NO_NATIVE_FENCE_FD_ANDROID) {
      ALOGE("%s - Failed to dup sync", __FUNCTION__);
    }
    EGL(eglDestroySyncKHR(eglGetCurrentDisplay(), sync));
  }

  return fd;
}

eglCreateSyncKHR api 可以指定扩展,我们指定EGL_SYNC_NATIVE_FENCE_ANDROID扩展,attrlist 可以设置参数, 可以为空也可以传递下去要等待的fd。 根据EGL 内容 如果attrlist 没有指定数据,为空那么 遵循下面的规则

  Attribute Name                     Initial Attribute Value(s)
      ---------------                    --------------------------
      EGL_SYNC_TYPE_KHR                  EGL_SYNC_NATIVE_FENCE_ANDROID
      EGL_SYNC_STATUS_KHR                EGL_UNSIGNALED_KHR
      EGL_SYNC_CONDITION_KHR             EGL_SYNC_PRIOR_COMMANDS_COMPLETE_KHR
      EGL_SYNC_NATIVE_FENCE_FD_ANDROID   EGL_NO_NATIVE_FENCE_FD_ANDROID

mesa 中eglCreateSyncKHR 的实现

static _EGLSync *
dri2_create_sync(const _EGLDriver *drv, _EGLDisplay *disp,
                 EGLenum type, const EGLAttrib *attrib_list)
{
   _EGLContext *ctx = _eglGetCurrentContext();
   struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp);
   struct dri2_egl_context *dri2_ctx = dri2_egl_context(ctx);
   struct dri2_egl_sync *dri2_sync;
   EGLint ret;
   pthread_condattr_t attr;

   dri2_sync = calloc(1, sizeof(struct dri2_egl_sync));
   if (!dri2_sync) {
      _eglError(EGL_BAD_ALLOC, "eglCreateSyncKHR");
      return NULL;
   }
    // 检测attrib_list 是否为空,如果是dri2 中fence fd 赋值为EGL_NO_NATIVE_FENCE_FD_ANDROID
    // 如果attrlist 指定了fd,更改dri fence fd 为新的fd
   if (!_eglInitSync(&dri2_sync->base, disp, type, attrib_list)) {
      free(dri2_sync);
      return NULL;
   }
  ............
   case EGL_SYNC_NATIVE_FENCE_ANDROID:
      if (dri2_dpy->fence->create_fence_fd) {
      //根据新的fd 创建一个sync fence
         dri2_sync->fence = dri2_dpy->fence->create_fence_fd(
                                    dri2_ctx->dri_context,
                                    dri2_sync->base.SyncFd);
      }
      if (!dri2_sync->fence) {
         _eglError(EGL_BAD_ATTRIBUTE, "eglCreateSyncKHR");
         free(dri2_sync);
         return NULL;
      }
      break;
   }

   p_atomic_set(&dri2_sync->refcount, 1);
   return &dri2_sync->base;
}

对应的底层驱动实现(virgl),如果eglCreateSyncKHR attrlist参数是null,这里并不会产生新的fd,如果不是空 有fd 传进来会更新新的fd

static struct pipe_fence_handle *
virgl_drm_fence_create(struct virgl_winsys *vws, int fd, bool external)
{
   struct virgl_drm_fence *fence;

   assert(vws->supports_fences);

   if (external) {
      fd = os_dupfd_cloexec(fd); //生成一个进程的fd
      if (fd < 0)
         return NULL;
   }

   fence = CALLOC_STRUCT(virgl_drm_fence);
   if (!fence) {
      close(fd);
      return NULL;
   }

   fence->fd = fd;
   fence->external = external;

   pipe_reference_init(&fence->reference, 1);

   return (struct pipe_fence_handle *)fence;
}

mesa中eglWaitSyncKHR实现

virgl 实现这里并没有真正等待,而是和gpu batch in fence 结合。 如果waitsync 中的fd有效则gpu 执行batch buffer 命令之前先等待这个fence。 同时生成一个新的out fence ,这个新的out fence fd 传入到用户态中,再执行eglDupNativeFenceFDANDROID 能获取新的out fence fd。 gpu 完成batch buffer 里的命令会触发这个新的fence。

dri2_server_wait_sync(const _EGLDriver *drv, _EGLDisplay *disp, _EGLSync *sync)
{
   _EGLContext *ctx = _eglGetCurrentContext();
   struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp);
   struct dri2_egl_context *dri2_ctx = dri2_egl_context(ctx);
   struct dri2_egl_sync *dri2_sync = dri2_egl_sync(sync);

   dri2_dpy->fence->server_wait_sync(dri2_ctx->dri_context,
                                     dri2_sync->fence, 0);
   return EGL_TRUE;
}

调用驱动中wait sync (virgl)

static void virgl_fence_server_sync(struct virgl_winsys *vws,
                                    struct virgl_cmd_buf *_cbuf,
                                    struct pipe_fence_handle *_fence)
{
   struct virgl_drm_cmd_buf *cbuf = virgl_drm_cmd_buf(_cbuf);
   struct virgl_drm_fence *fence = virgl_drm_fence(_fence);

   if (!vws->supports_fences)
      return;

   /* if not an external fence, then nothing more to do without preemption: */
   if (!fence->external)
      return;
     //将fence fd 和in_fence_fd 合成一个fence
   sync_accumulate("virgl", &cbuf->in_fence_fd, fence->fd);
}

in_fence_fd 是virgl 下发batch buffer 给gpu 带的fence, gpu 在执行batch buffer 里所有的命令钱要等待这个fence, 也就是说virgl eglWaitSyncKHR 并没有真正的等待。 而是让gpu 完成命令后触发这个fence

static int virgl_drm_winsys_submit_cmd(struct virgl_winsys *qws,
                                       struct virgl_cmd_buf *_cbuf,
                                       struct pipe_fence_handle **fence)
{
   struct virgl_drm_winsys *qdws = virgl_drm_winsys(qws);
   struct virgl_drm_cmd_buf *cbuf = virgl_drm_cmd_buf(_cbuf);
   struct drm_virtgpu_execbuffer eb;
   ...
   memset(&eb, 0, sizeof(struct drm_virtgpu_execbuffer));
   eb.command = (unsigned long)(void*)cbuf->buf;
   eb.size = cbuf->base.cdw * 4;
   eb.num_bo_handles = cbuf->cres;
   eb.bo_handles = (unsigned long)(void *)cbuf->res_hlist;

   eb.fence_fd = -1;
   if (qws->supports_fences) {
      if (cbuf->in_fence_fd >= 0) {
         eb.flags |= VIRTGPU_EXECBUF_FENCE_FD_IN;
         eb.fence_fd = cbuf->in_fence_fd;//gpu执行batch buffer 前需要等待的fence
      }

      if (fence != NULL)
         eb.flags |= VIRTGPU_EXECBUF_FENCE_FD_OUT;
   } else {
      assert(cbuf->in_fence_fd < 0);
   }

   ret = drmIoctl(qdws->fd, DRM_IOCTL_VIRTGPU_EXECBUFFER, &eb);
   if (ret == -1)
   
   if (qws->supports_fences) {
      if (cbuf->in_fence_fd >= 0) {
         close(cbuf->in_fence_fd);
         cbuf->in_fence_fd = -1;
      }

      if (fence != NULL && ret == 0)
         *fence = virgl_drm_fence_create(qws, eb.fence_fd, false);//获得驱动返回的fd,驱动返回的fd 已经和dma sync file 同步完成。
   } else {
      if (fence != NULL && ret == 0)
         *fence = virgl_drm_fence_create_legacy(qws);
   }
   ...
  }

virtio_gpu_execbuffer_ioctl get fence 实现等待fence 完成

static int virtio_gpu_execbuffer_ioctl(struct drm_device *dev, void *data,
				 struct drm_file *file)
{
      ....
	uint32_t *bo_handles = NULL;
	void __user *user_bo_handles = NULL;
	struct virtio_gpu_object_array *buflist = NULL;
	struct sync_file *sync_file;
	int in_fence_fd = exbuf->fence_fd;
	int out_fence_fd = -1;
	void *buf;

	if (vgdev->has_virgl_3d == false)
		return -ENOSYS;

	if ((exbuf->flags & ~VIRTGPU_EXECBUF_FLAGS))
		return -EINVAL;

	exbuf->fence_fd = -1;

	virtio_gpu_create_context(dev, file);
	if (exbuf->flags & VIRTGPU_EXECBUF_FENCE_FD_IN) {
		struct dma_fence *in_fence;

		in_fence = sync_file_get_fence(in_fence_fd);

		if (!in_fence)
			return -EINVAL;

		/*
		 * Wait if the fence is from a foreign context, or if the fence
		 * array contains any fence from a foreign context.
		 */
		ret = 0;
		if (!dma_fence_match_context(in_fence, vgdev->fence_drv.context))
			ret = dma_fence_wait(in_fence, true);

		dma_fence_put(in_fence);
		if (ret)
			return ret;
                        ```
	if (exbuf->flags & VIRTGPU_EXECBUF_FENCE_FD_OUT) {
		out_fence_fd = get_unused_fd_flags(O_CLOEXEC);
		if (out_fence_fd < 0)
			return out_fence_fd;
	}
	......
	if (out_fence_fd >= 0) {
		sync_file = sync_file_create(&out_fence->f);
		if (!sync_file) {
			dma_fence_put(&out_fence->f);
			ret = -ENOMEM;
			goto out_unresv;
		}

		exbuf->fence_fd = out_fence_fd;
		fd_install(out_fence_fd, sync_file->file); //将fd 和驱动层dma sync file 联系起来。
	}

用户态获获得out fence fd 后 可以通过poll 等待事件完成。 会调用fd->file_ops 对应的poll 函数


static const struct file_operations sync_file_fops = {
	.release = sync_file_release,
	.poll = sync_file_poll,
	.unlocked_ioctl = sync_file_ioctl,
	.compat_ioctl = compat_ptr_ioctl,
};

dma sync file 实现了对应的函数。

eglDupNativeFenceFDANDROID 函数功能比较简单 就是将create sync 绑定的fd 读取回来。