MIT 操作系统课程XV6 lab1: 简易shell实现

532 阅读3分钟

作业要求

参见pdos.csail.mit.edu/6.828/2018/…

目标是实现一个简单的shell程序。

环境准备

需要去参考官方的文档 pdos.csail.mit.edu/6.828/2018/… 我这里采用的是docker的形式,去docker hub上搜索xv6,找到下载次数最多的image,然后docker pull拉下来到本地,通过共享目录挂载,docker exec 进入容器进行调试。需要注意的是编译的时候需要执行 make qemu-nox(选择nox模式)。

xv6 Book(在做每一个lab之前你都需要阅读相应的章节) pdos.csail.mit.edu/6.828/2018/…

中文的翻译版本:

github.com/ranxian/xv6…

解读

shell 程序其实是横跨在kernel 与 user program 之间的一个中间层。

image.png

文档提供的 shell 示例程序如下,我们需要在此基础上作修改。完善ls,IO重定向,以及pipe。

#include <stdlib.h>
#include <unistd.h>
#include <stdio.h>
#include <fcntl.h>
#include <string.h>
#include <assert.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/wait.h>

// Simplifed xv6 shell.

#define MAXARGS 10

// All commands have at least a type. Have looked at the type, the code
// typically casts the *cmd to some specific cmd type.
struct cmd {
  int type;          //  ' ' (exec), | (pipe), '<' or '>' for redirection
};

struct execcmd {
  int type;              // ' '
  char *argv[MAXARGS];   // arguments to the command to be exec-ed
};

struct redircmd {
  int type;          // < or > 
  struct cmd *cmd;   // the command to be run (e.g., an execcmd)
  char *file;        // the input/output file
  int flags;         // flags for open() indicating read or write
  int fd;            // the file descriptor number to use for the file
};

struct pipecmd {
  int type;          // |
  struct cmd *left;  // left side of pipe
  struct cmd *right; // right side of pipe
};

int fork1(void);  // Fork but exits on failure.
struct cmd *parsecmd(char*);

// Execute cmd.  Never returns.
void
runcmd(struct cmd *cmd)
{
  int p[2], r;
  struct execcmd *ecmd;
  struct pipecmd *pcmd;
  struct redircmd *rcmd;

  if(cmd == 0)
    _exit(0);
  
  switch(cmd->type){
  default:
    fprintf(stderr, "unknown runcmd\n");
    _exit(-1);

  case ' ':
    ecmd = (struct execcmd*)cmd;
    if(ecmd->argv[0] == 0)
      _exit(0);
    fprintf(stderr, "exec not implemented\n");
    // Your code here ...
    break;

  case '>':
  case '<':
    rcmd = (struct redircmd*)cmd;
    fprintf(stderr, "redir not implemented\n");
    // Your code here ...
    runcmd(rcmd->cmd);
    break;

  case '|':
    pcmd = (struct pipecmd*)cmd;
    fprintf(stderr, "pipe not implemented\n");
    // Your code here ...
    break;
  }    
  _exit(0);
}

int
getcmd(char *buf, int nbuf)
{
  if (isatty(fileno(stdin)))
    fprintf(stdout, "6.828$ ");
  memset(buf, 0, nbuf);
  if(fgets(buf, nbuf, stdin) == 0)
    return -1; // EOF
  return 0;
}

int
main(void)
{
  static char buf[100];
  int fd, r;

  // Read and run input commands.
  while(getcmd(buf, sizeof(buf)) >= 0){
    if(buf[0] == 'c' && buf[1] == 'd' && buf[2] == ' '){
      // Clumsy but will have to do for now.
      // Chdir has no effect on the parent if run in the child.
      buf[strlen(buf)-1] = 0;  // chop \n
      if(chdir(buf+3) < 0)
        fprintf(stderr, "cannot cd %s\n", buf+3);
      continue;
    }
    if(fork1() == 0)
      runcmd(parsecmd(buf));
    wait(&r);
  }
  exit(0);
}

int
fork1(void)
{
  int pid;
  
  pid = fork();
  if(pid == -1)
    perror("fork");
  return pid;
}

struct cmd*
execcmd(void)
{
  struct execcmd *cmd;

  cmd = malloc(sizeof(*cmd));
  memset(cmd, 0, sizeof(*cmd));
  cmd->type = ' ';
  return (struct cmd*)cmd;
}

struct cmd*
redircmd(struct cmd *subcmd, char *file, int type)
{
  struct redircmd *cmd;

  cmd = malloc(sizeof(*cmd));
  memset(cmd, 0, sizeof(*cmd));
  cmd->type = type;
  cmd->cmd = subcmd;
  cmd->file = file;
  cmd->flags = (type == '<') ?  O_RDONLY : O_WRONLY|O_CREAT|O_TRUNC;
  cmd->fd = (type == '<') ? 0 : 1;
  return (struct cmd*)cmd;
}

struct cmd*
pipecmd(struct cmd *left, struct cmd *right)
{
  struct pipecmd *cmd;

  cmd = malloc(sizeof(*cmd));
  memset(cmd, 0, sizeof(*cmd));
  cmd->type = '|';
  cmd->left = left;
  cmd->right = right;
  return (struct cmd*)cmd;
}

// Parsing

char whitespace[] = " \t\r\n\v";
char symbols[] = "<|>";

int
gettoken(char **ps, char *es, char **q, char **eq)
{
  char *s;
  int ret;
  
  s = *ps;
  while(s < es && strchr(whitespace, *s))
    s++;
  if(q)
    *q = s;
  ret = *s;
  switch(*s){
  case 0:
    break;
  case '|':
  case '<':
    s++;
    break;
  case '>':
    s++;
    break;
  default:
    ret = 'a';
    while(s < es && !strchr(whitespace, *s) && !strchr(symbols, *s))
      s++;
    break;
  }
  if(eq)
    *eq = s;
  
  while(s < es && strchr(whitespace, *s))
    s++;
  *ps = s;
  return ret;
}

int
peek(char **ps, char *es, char *toks)
{
  char *s;
  
  s = *ps;
  while(s < es && strchr(whitespace, *s))
    s++;
  *ps = s;
  return *s && strchr(toks, *s);
}

struct cmd *parseline(char**, char*);
struct cmd *parsepipe(char**, char*);
struct cmd *parseexec(char**, char*);

// make a copy of the characters in the input buffer, starting from s through es.
// null-terminate the copy to make it a string.
char 
*mkcopy(char *s, char *es)
{
  int n = es - s;
  char *c = malloc(n+1);
  assert(c);
  strncpy(c, s, n);
  c[n] = 0;
  return c;
}

struct cmd*
parsecmd(char *s)
{
  char *es;
  struct cmd *cmd;

  es = s + strlen(s);
  cmd = parseline(&s, es);
  peek(&s, es, "");
  if(s != es){
    fprintf(stderr, "leftovers: %s\n", s);
    exit(-1);
  }
  return cmd;
}

struct cmd*
parseline(char **ps, char *es)
{
  struct cmd *cmd;
  cmd = parsepipe(ps, es);
  return cmd;
}

struct cmd*
parsepipe(char **ps, char *es)
{
  struct cmd *cmd;

  cmd = parseexec(ps, es);
  if(peek(ps, es, "|")){
    gettoken(ps, es, 0, 0);
    cmd = pipecmd(cmd, parsepipe(ps, es));
  }
  return cmd;
}

struct cmd*
parseredirs(struct cmd *cmd, char **ps, char *es)
{
  int tok;
  char *q, *eq;

  while(peek(ps, es, "<>")){
    tok = gettoken(ps, es, 0, 0);
    if(gettoken(ps, es, &q, &eq) != 'a') {
      fprintf(stderr, "missing file for redirection\n");
      exit(-1);
    }
    switch(tok){
    case '<':
      cmd = redircmd(cmd, mkcopy(q, eq), '<');
      break;
    case '>':
      cmd = redircmd(cmd, mkcopy(q, eq), '>');
      break;
    }
  }
  return cmd;
}

struct cmd*
parseexec(char **ps, char *es)
{
  char *q, *eq;
  int tok, argc;
  struct execcmd *cmd;
  struct cmd *ret;
  
  ret = execcmd();
  cmd = (struct execcmd*)ret;

  argc = 0;
  ret = parseredirs(ret, ps, es);
  while(!peek(ps, es, "|")){
    if((tok=gettoken(ps, es, &q, &eq)) == 0)
      break;
    if(tok != 'a') {
      fprintf(stderr, "syntax error\n");
      exit(-1);
    }
    cmd->argv[argc] = mkcopy(q, eq);
    argc++;
    if(argc >= MAXARGS) {
      fprintf(stderr, "too many args\n");
      exit(-1);
    }
    ret = parseredirs(ret, ps, es);
  }
  cmd->argv[argc] = 0;
  return ret;
}

shell程序执行流程

main->runcmd->具体每个命令实现

int
main(void)
{
  static char buf[100];
  int fd, r;

  // Read and run input commands.
  // 读取命令字符到buf
  while(getcmd(buf, sizeof(buf)) >= 0){
    if(buf[0] == 'c' && buf[1] == 'd' && buf[2] == ' '){
      // cd 的简易实现
      // Clumsy but will have to do for now.
      // Chdir has no effect on the parent if run in the child.
      buf[strlen(buf)-1] = 0;  // chop \n
      if(chdir(buf+3) < 0)
        fprintf(stderr, "cannot cd %s\n", buf+3);
      continue;
    }
    // fork 出新的子进程执行 runcmd
    if(fork1() == 0)
      runcmd(parsecmd(buf));
    wait(&r);
  }
  exit(0);
}

For many use cases the POSIX function isatty() is all what it is needed to detect if stdin is connected to a terminal。用来判断标准输入是否是terminal.

int
getcmd(char *buf, int nbuf)
{
  if (isatty(fileno(stdin)))
/*
The function fileno() examines the argument stream and returns
       the integer file descriptor used to implement this stream.  The
       file descriptor is still owned by stream and will be closed when
       fclose(3) is called.  Duplicate the file descriptor with dup(2)
       before passing it to code that might close it
       

*/
    fprintf(stdout, "6.828$ ");
  memset(buf, 0, nbuf);
  if(fgets(buf, nbuf, stdin) == 0)
    return -1; // EOF
  return 0;
}

ls命令实现

题目这里其实已经给了暗示:

6.828$ ls

Your shell may print an error message (unless there is a program named ls in your working directory or you are using a version of exec that searches PATH). Now type to your shell:

6.828$ /bin/ls

This should execute the program /bin/ls, which should print out the file names in your working directory. You can stop the 6.828 shell by typing ctrl-d, which should put you back in your computer's shell.

You may want to change the 6.828 shell to always try /bin, if the program doesn't exist in the current working directory, so that below you don't have to type "/bin" for each program. If you are ambitious you can implement support for a PATH variable.

我们要做的就是把ls 与/bin/ls 连接在一起。这里使用了

int execv(const char *pathname, char *const argv[]); 参考man7.org/linux/man-p…

  case ' ':
    ecmd = (struct execcmd*)cmd;
    char path[100] = "/bin/";
    if(ecmd->argv[0] == 0)
      _exit(0);
    // fprintf(stderr, "exec not implemented\n");
    // Your code here ...
    // int execv(const char *pathname, char *const argv[]);
    strcat(path,ecmd->argv[0]);
    if(execv(path,ecmd->argv)==-1)
    {
      fprintf(stderr,"exec ls failed\n");
    }
    break;

I/O redirection IO重定向

Implement I/O redirection commands so that you can run:

echo "6.828 is cool" > x.txt
cat < x.txt

The parser already recognizes ">" and "<", and builds a redircmd for you, so your job is just filling out the missing code in runcmd for those symbols. You might find the man pages for open and close useful.

Note that the mode field in redircmd contains access modes (e.g., O_RDONLY), which you should pass in the flags argument to open; see parseredirs for the mode values that the shell is using and the manual page for open for the flags argument.

Make sure you print an error message if one of the system calls you are using fails.

Make sure your implementation runs correctly with the above test input. A common error is to forget to specify the permission with which the file must be created (i.e., the 3rd argument to open).

实现redircmd部分 主要思想是先关闭原来的file descriptor 然后打开新的作为输入。 进一步需要阅读linux系统函数open的实现 可以参照: 需要看一下xv6 book,里面其实有IO重定向的实现如下:

文件描述符和 fork 的交叉使用使得 I/O 重定向能够轻易实现。fork 会复制父进程的文件描述符和内存,所以子进程和父进程的文件描述符一模一样。exec 会替换调用它的进程的内存但是会保留它的文件描述符表。这种行为使得 shell 可以这样实现重定向:fork 一个进程,重新打开指定文件的文件描述符,然后执行新的程序。下面是一个简化版的 shell 执行 cat<input.txt 的代码:

char *argv[2];
argv[0] = "cat";
### argv[1] = 0;
if(fork() == 0) {
    close(0);
    open("input.txt", O_RDONLY);
    exec("cat", argv);
}

函数原型 int open(const char * pathname , int flags , mode_t mode ); 需要对mode参数做权限的设置。 man7.org/linux/man-p…

  case '>':
  case '<':
    rcmd = (struct redircmd*)cmd;
    // fprintf(stderr, "redir not implemented\n");
    // Your code here ...
    close(rcmd->fd);
    if(open(rcmd->file,rcmd->flags,S_IRWXU | S_IRWXO | S_IRWXG)<0)
    {
      fprintf(stderr,"open failed");
      exit(0);
    }
    
    runcmd(rcmd->cmd);
    break;

Implement pipes 管道实现

Implement pipes so that you can run command pipelines such as:

$ ls | sort | uniq | wc

The parser already recognizes "|", and builds a pipecmd for you, so the only code you must write is for the '|' case in runcmd. You might find the man pages for pipe, fork, close, and dup useful.

Test that you can run the above pipeline. The sort program may be in the directory /usr/bin/ and in that case you can type the absolute pathname /usr/bin/sort to run sort. (In your computer's shell you can type which sort to find out which directory in the shell's search path has an executable named "sort".)

Now you should be able to run the following command correctly:

6.828$ a.out < t.sh

Make sure you use the right absolute pathnames for the programs.

Don't forget to submit your solution to the submission web site (using the "hwN.c" naming convention as outlined above), with or without challenge solutions.

管道是一个小的内核缓冲区,它以文件描述符对的形式提供给进程,一个用于写操作,一个用于读操作。从管道的一端写的数据可以从管道的另一端读取。管道提供了一种进程间交互的方式 这一部分可以参考xv6自己的shell实现。可以看出是fork出两个子进程,分别复制pipe的两个端口到文件描述符上。一个是0(标准输入),另外一个是1(标准输出),这样就是达到了管道的一端写的数据可以从管道的另一端读取的效果。 创建管道函数pipe()

pipe() creates a pipe, a unidirectional data channel that can be
       used for interprocess communication.  The array pipefd is used to
       return two file descriptors referring to the ends of the pipe.
       pipefd[0] refers to the read end of the pipe.  pipefd[1] refers
       to the write end of the pipe.  Data written to the write end of
       the pipe is buffered by the kernel until it is read from the read
       end of the pipe.

参考案例:

 case PIPE:
    pcmd = (struct pipecmd*)cmd;
    if(pipe(p) < 0)
      panic("pipe");
    if(fork1() == 0){
      close(1);
      dup(p[1]);
      close(p[0]);
      close(p[1]);
      runcmd(pcmd->left);
    }
    if(fork1() == 0){
      close(0);
      dup(p[0]);
      close(p[0]);
      close(p[1]);
      runcmd(pcmd->right);
    }
    close(p[0]);
    close(p[1]);
    wait();
    wait();
    break;

作业中的实际案例:

  case '|':
  
    pcmd = (struct pipecmd*)cmd;
    if(pipe(p)==-1)
    {
      fprintf(stderr,"create pipe failed");
    }
    if(fork1==0)
    {
      close(0);
      dup(0);
      close(p[0]);
      close(p[1]);
      runcmd(pcmd->right);
    }
    if(fork1==0)
    {
      close(1);
      dup(1);
      close(p[0]);
      close(p[1]);
      runcmd(pcmd->left);
    }
    close(p[0]);
    close(p[1]);
    wait(&r);
    wait(&r);
    // Your code here ...
    
    break;