XV6学习(15)Lab mmap: Mmap

1,699 阅读4分钟

代码在Github上。

这一个实验是要实现最基础的mmap功能。mmap即内存映射文件,将一个文件直接映射到内存当中,之后对文件的读写就可以直接通过对内存进行读写来进行,而对文件的同步则由操作系统来负责完成。使用mmap可以避免对文件大量readwrite操作带来的内核缓冲区和用户缓冲区之间的频繁的数据拷贝。在Kafka消息队列等软件中借助mmap来实现零拷贝(zero-copy)。

首先定义vma结构体用于保存内存映射信息,并在proc结构体中加入struct vma *vma指针:

#define NVMA 16
#define VMA_START (MAXVA / 2)
struct vma{
  uint64 start;
  uint64 end;
  uint64 length; // 0 means vma not used
  uint64 off;
  int permission;
  int flags;
  struct file *file;
  struct vma *next;

  struct spinlock lock;
};

// Per-process state
struct proc {
  ...
  struct vma *vma;
  ...
};

之后实现对vma分配的代码:

struct vma vma_list[NVMA];

struct vma* vma_alloc(){
  for(int i = 0; i < NVMA; i++){
    acquire(&vma_list[i].lock);
    if(vma_list[i].length == 0){
      return &vma_list[i];
    }else{
      release(&vma_list[i].lock);
    }
  }
  panic("no enough vma");
}

实现mmap系统调用,这个函数主要就是申请一个vma,之后查找一块空闲内存,填入相关信息,将vma插入到进程的vma链表中去:

uint64
sys_mmap(void)
{
  uint64 addr;
  int length, prot, flags, fd, offset;
  if(argaddr(0, &addr) < 0 || argint(1, &length) < 0 || argint(2, &prot) < 0 || argint(3, &flags) < 0 || argint(4, &fd) < 0 || argint(5, &offset) < 0){
    return -1;
  }

  if(addr != 0)
    panic("mmap: addr not 0");
  if(offset != 0)
    panic("mmap: offset not 0");

  struct proc *p = myproc();
  struct file* f = p->ofile[fd];

  int pte_flag = PTE_U;
  if (prot & PROT_WRITE) {
    if(!f->writable && !(flags & MAP_PRIVATE)) return -1; // map to a unwritable file with PROT_WRITE
    pte_flag |= PTE_W;
  }
  if (prot & PROT_READ) {
    if(!f->readable) return -1; // map to a unreadable file with PROT_READ
    pte_flag |= PTE_R;
  }

  struct vma* v = vma_alloc();
  v->permission = pte_flag;
  v->length = length;
  v->off = offset;
  v->file = myproc()->ofile[fd];
  v->flags = flags;
  filedup(f);
  struct vma* pv = p->vma;
  if(pv == 0){
    v->start = VMA_START;
    v->end = v->start + length;
    p->vma = v;
  }else{
    while(pv->next) pv = pv->next;
    v->start = PGROUNDUP(pv->end);
    v->end = v->start + length;
    pv->next = v;
    v->next = 0;
  }
  addr = v->start;
  printf("mmap: [%p, %p)\n", addr, v->end);

  release(&v->lock);
  return addr;
}

接下来就可以在usertrap中对缺页中断进行处理:查找进程的vma链表,判断该地址是否为映射地址,如果不是就说明出错,直接返回;如果在vma链表中,就可以申请并映射一个页面,之后根据vma从对应的文件中读取数据:

int
mmap_handler(uint64 va, int scause)
{
  struct proc *p = myproc();
  struct vma* v = p->vma;
  while(v != 0){
    if(va >= v->start && va < v->end){
      break;
    }
    //printf("%p\n", v);
    v = v->next;
  }

  if(v == 0) return -1; // not mmap addr
  if(scause == 13 && !(v->permission & PTE_R)) return -2; // unreadable vma
  if(scause == 15 && !(v->permission & PTE_W)) return -3; // unwritable vma

  // load page from file
  va = PGROUNDDOWN(va);
  char* mem = kalloc();
  if (mem == 0) return -4; // kalloc failed
  
  memset(mem, 0, PGSIZE);

  if(mappages(p->pagetable, va, PGSIZE, (uint64)mem, v->permission) != 0){
    kfree(mem);
    return -5; // map page failed
  }

  struct file *f = v->file;
  ilock(f->ip);
  readi(f->ip, 0, (uint64)mem, v->off + va - v->start, PGSIZE);
  iunlock(f->ip);
  return 0;
}

之后就是munmap的实现,同样先从链表中找到对应的vma结构体,之后根据三种不同情况(头部、尾部、整个)来写回并释放对应的页面并更新vma信息,如果整个区域都被释放就将vma和文件释放。

uint64
sys_munmap(void)
{
  uint64 addr;
  int length;
  if(argaddr(0, &addr) < 0 || argint(1, &length) < 0){
    return -1;
  }

  struct proc *p = myproc();
  struct vma *v = p->vma;
  struct vma *pre = 0;
  while(v != 0){
    if(addr >= v->start && addr < v->end) break; // found
    pre = v;
    v = v->next;
  }

  if(v == 0) return -1; // not mapped
  printf("munmap: %p %d\n", addr, length);
  if(addr != v->start && addr + length != v->end) panic("munmap middle of vma");

  if(addr == v->start){
    writeback(v, addr, length);
    uvmunmap(p->pagetable, addr, length / PGSIZE, 1);
    if(length == v->length){
      // free all
      fileclose(v->file);
      if(pre == 0){
        p->vma = v->next; // head
      }else{
        pre->next = v->next;
        v->next = 0;
      }
      acquire(&v->lock);
      v->length = 0;
      release(&v->lock);
    }else{
      // free head
      v->start -= length;
      v->off += length;
      v->length -= length;
    }
  }else{
    // free tail
    v->length -= length;
    v->end -= length;
  }
  return 0;
}

写回函数先判断是否需要写回,当需要写回时就仿照filewrite的实现,将数据写回到对应的文件当中去,这里的实现是直接写回所有页面,但实际可以根据PTE_D来判断内存是否被写入,如果没有写入就不用写回:

void
writeback(struct vma* v, uint64 addr, int n)
{
  if(!(v->permission & PTE_W) || (v->flags & MAP_PRIVATE)) // no need to writeback
    return;

  if((addr % PGSIZE) != 0)
    panic("unmap: not aligned");

  printf("starting writeback: %p %d\n", addr, n);

  struct file* f = v->file;

  int max = ((MAXOPBLOCKS-1-1-2) / 2) * BSIZE;
  int i = 0;
  while(i < n){
    int n1 = n - i;
    if(n1 > max)
      n1 = max;

    begin_op();
    ilock(f->ip);
    printf("%p %d %d\n",addr + i, v->off + v->start - addr, n1);
    int r = writei(f->ip, 1, addr + i, v->off + v->start - addr + i, n1);
    iunlock(f->ip);
    end_op();
    i += r;
  }
}

最后就是在fork当中复制vma到子进程,在exit中当前进程的vma链表释放,在exit时要对页面进行写回:

int
fork(void)
{
  ...
  np->state = RUNNABLE;

  np->vma = 0;
  struct vma *pv = p->vma;
  struct vma *pre = 0;
  while(pv){
    struct vma *vma = vma_alloc();
    vma->start = pv->start;
    vma->end = pv->end;
    vma->off = pv->off;
    vma->length = pv->length;
    vma->permission = pv->permission;
    vma->flags = pv->flags;
    vma->file = pv->file;
    filedup(vma->file);
    vma->next = 0;
    if(pre == 0){
      np->vma = vma;
    }else{
      pre->next = vma;
    }
    pre = vma;
    release(&vma->lock);
    pv = pv->next;
  }
  ...
}

void
exit(int status)
{
  struct proc *p = myproc();

  if(p == initproc)
    panic("init exiting");

  // munmap all mmap vma
  struct vma* v = p->vma;
  struct vma* pv;
  while(v){
    writeback(v, v->start, v->length);
    uvmunmap(p->pagetable, v->start, PGROUNDUP(v->length) / PGSIZE, 1);
    fileclose(v->file);
    pv = v->next;
    acquire(&v->lock);
    v->next = 0;
    v->length = 0;
    release(&v->lock);
    v = pv;
  }
  ...
}