6.S081-Lab 2 system calls

前言

本博客为6.S081课程的第二个Lab: system calls，内容为实现两个操作系统的系统调用：trace和sysinfo，通过这次Lab熟悉 xv6 操作系统中系统调用的入口、流程。Lab链接：https://pdos.csail.mit.edu/6.828/2021/labs/syscall.html。

System call tracing

添加一个系统调用信息打印的功能，传入一个mask参数，这个参数通过将第i位设置为 1 来表示需要跟踪第 i 个系统调用。

为了解决这道题，需要清楚 xv6 中的系统调用执行的流程。

首先，user/usys.pl文件是一个脚本，用于生成系统调用汇编文件usys.S，该汇编文件的部分内容为：

# generated by usys.pl - do not edit
#include "kernel/syscall.h"
.global fork
fork:
 li a7, SYS_fork
 ecall
 ret
.global exit
exit:
 li a7, SYS_exit
 ecall
 ret
.global wait
wait:
 li a7, SYS_wait
 ecall
 ret
.global pipe
pipe:
 li a7, SYS_pipe
 ecall
 ret

可以看到，每个系统调用对应了一个汇编函数，以 fork 调用为例，首先将系统调用号 SYS_fork 放入 a7 寄存器，然后执行 ecall 指令，该指令用于陷入内核，之后执行内核中的 syscall 函数(在kernel/syscall.c中)；在 syscall 函数中，通过 myproc() 获取当前进程结构体，并获取 trapframe中的a7寄存器值，用于查表调用相关的系统调用实现。

因此，为了添加trace系统调用，首先在user/usys.pl中添加入口：

1	entry("trace");

在user/user.h中添加系统调用声明：

1	int trace(int);

在kernel/syscall.h中添加系统调用号：

1	#define SYS_trace 22

在kernel/syscall.c中添加系统调用实现声明以及在系统调用函数表中添加相应的项：

extern uint64 sys_trace(void);  // 声明系统调用实现函数(在kernel/sysproc.c中实现)

static uint64 (*syscalls[])(void) = {
[SYS_fork]    sys_fork,
[SYS_exit]    sys_exit,
[SYS_wait]    sys_wait,
[SYS_pipe]    sys_pipe,
[SYS_read]    sys_read,
[SYS_kill]    sys_kill,
[SYS_exec]    sys_exec,
[SYS_fstat]   sys_fstat,
[SYS_chdir]   sys_chdir,
[SYS_dup]     sys_dup,
[SYS_getpid]  sys_getpid,
[SYS_sbrk]    sys_sbrk,
[SYS_sleep]   sys_sleep,
[SYS_uptime]  sys_uptime,
[SYS_open]    sys_open,
[SYS_write]   sys_write,
[SYS_mknod]   sys_mknod,
[SYS_unlink]  sys_unlink,
[SYS_link]    sys_link,
[SYS_mkdir]   sys_mkdir,
[SYS_close]   sys_close,
[SYS_trace]   sys_trace,    // 添加系统调用项
}

为了根据系统调用号打印出系统调用的名称，还需要在 kernel/syscall.c 中添加一个系统调用名称表：

static const char* syscall_names[] = {
[SYS_fork]    "fork",
[SYS_exit]    "exit",
[SYS_wait]    "wait",
[SYS_pipe]    "pipe",
[SYS_read]    "read",
[SYS_kill]    "kill",
[SYS_exec]    "exec",
[SYS_fstat]   "fstat",
[SYS_chdir]   "chdir",
[SYS_dup]     "dup",
[SYS_getpid]  "getpid",
[SYS_sbrk]    "sbrk",
[SYS_sleep]   "sleep",
[SYS_uptime]  "uptime",
[SYS_open]    "open",
[SYS_write]   "write",
[SYS_mknod]   "mknod",
[SYS_unlink]  "unlink",
[SYS_link]    "link",
[SYS_mkdir]   "mkdir",
[SYS_close]   "close",
[SYS_trace]   "trace",
};

为了记录进程调用 trace 时的掩码mask(trace调用的参数)，需要在kernel/proc.h的 proc 结构体中添加一项 mask，这里我用int类型来表示：

// Per-process state
struct proc {
  struct spinlock lock;

  // p->lock must be held when using these:
  enum procstate state;        // Process state
  void *chan;                  // If non-zero, sleeping on chan
  int killed;                  // If non-zero, have been killed
  int xstate;                  // Exit status to be returned to parent's wait
  int pid;                     // Process ID

  // wait_lock must be held when using this:
  struct proc *parent;         // Parent process

  // these are private to the process, so p->lock need not be held.
  uint64 kstack;               // Virtual address of kernel stack
  uint64 sz;                   // Size of process memory (bytes)
  pagetable_t pagetable;       // User page table
  struct trapframe *trapframe; // data page for trampoline.S
  struct context context;      // swtch() here to run process
  struct file *ofile[NOFILE];  // Open files
  struct inode *cwd;           // Current directory
  char name[16];               // Process name (debugging)
  int mask;                    // 用于跟踪系统调用
};

为了正确的跟踪 fork 系统调用，需要在 fork 系统调用实现函数中也将 mask 拷贝给子进程，修改kernel/proc.c：

// Create a new process, copying the parent.
// Sets up child kernel stack to return as if from fork() system call.
int
fork(void)
{
  int i, pid;
  struct proc *np;
  struct proc *p = myproc();

  // Allocate process.
  if((np = allocproc()) == 0){
    return -1;
  }

  // Copy user memory from parent to child.
  if(uvmcopy(p->pagetable, np->pagetable, p->sz) < 0){
    freeproc(np);
    release(&np->lock);
    return -1;
  }
  np->sz = p->sz;

  // 添加以下语句，用于拷贝mask
  np->mask = p->mask;

  // copy saved user registers.
  *(np->trapframe) = *(p->trapframe);

  // Cause fork to return 0 in the child.
  np->trapframe->a0 = 0;

  // increment reference counts on open file descriptors.
  for(i = 0; i < NOFILE; i++)
    if(p->ofile[i])
      np->ofile[i] = filedup(p->ofile[i]);
  np->cwd = idup(p->cwd);

  safestrcpy(np->name, p->name, sizeof(p->name));

  pid = np->pid;

  release(&np->lock);

  acquire(&wait_lock);
  np->parent = p;
  release(&wait_lock);

  acquire(&np->lock);
  np->state = RUNNABLE;
  release(&np->lock);

  return pid;
}

然后在kernel/sysproc.c中添加系统调用sys_trace的具体实现，实际上就是通过argint()获取系统调用的参数mask，然后将其保存至当前进程的mask字段中：

uint64
sys_trace(void)
{
  int mask;
  if(argint(0, &mask) < 0)
    return -1;
  // printf("trace mask: %d\n", mask);
  myproc()->mask = mask;
  return 0;
}

这里需要说明的是，系统调用实现函数都是不带参数的，实际上系统调用传入的参数会被放在当前的寄存器中，通过kernel/syscall.c文件中的argint,argaddr,argstr等函数能够获取到。

最后就是修改kernel/syscall.c中的syscall()函数了，之前提过每次系统调用发生时都会先陷入内核，然后执行这个函数，通过获取进程的mask值来打印当前的系统调用信息。

void
syscall(void)
{
  int num;
  struct proc *p = myproc();

  num = p->trapframe->a7;
  if(num > 0 && num < NELEM(syscalls) && syscalls[num]) {
    p->trapframe->a0 = syscalls[num]();
    int mask = p->mask;
    // trace syscall masked
    if ((1 << num) & mask) {
      printf("%d: syscall %s -> %d\n", p->pid, syscall_names[num], p->trapframe->a0);
    }
  } else {
    printf("%d %s: unknown sys call %d\n",
            p->pid, p->name, num);
    p->trapframe->a0 = -1;
  }
}

sysinfo

这道题也是添加一个系统调用，这个系统调用传入一个 struct sysinfo* 参数用于获取当前系统的空闲内存以及进程数。添加系统调用的过程和上面类似。

首先在user/usys.pl中添加入口：

1	entry("sysinfo");

在user/user.h中添加系统调用声明：

1 2	struct sysinfo; //前置声明 int sysinfo(struct sysinfo*);

在kernel/syscall.h中添加系统调用号：

1	#define SYS_sysinfo 23

在kernel/syscall.c中添加系统调用实现声明以及在系统调用函数表中添加相应的项：

extern uint64 sys_sysinfo(void);  // 声明系统调用实现函数(在kernel/sysproc.c中实现)

static uint64 (*syscalls[])(void) = {
[SYS_fork]    sys_fork,
[SYS_exit]    sys_exit,
[SYS_wait]    sys_wait,
[SYS_pipe]    sys_pipe,
[SYS_read]    sys_read,
[SYS_kill]    sys_kill,
[SYS_exec]    sys_exec,
[SYS_fstat]   sys_fstat,
[SYS_chdir]   sys_chdir,
[SYS_dup]     sys_dup,
[SYS_getpid]  sys_getpid,
[SYS_sbrk]    sys_sbrk,
[SYS_sleep]   sys_sleep,
[SYS_uptime]  sys_uptime,
[SYS_open]    sys_open,
[SYS_write]   sys_write,
[SYS_mknod]   sys_mknod,
[SYS_unlink]  sys_unlink,
[SYS_link]    sys_link,
[SYS_mkdir]   sys_mkdir,
[SYS_close]   sys_close,
[SYS_trace]   sys_trace,
[SYS_sysinfo] sys_sysinfo,  // 增加系统调用
};

在 kernel/sysproc.c 中添加系统调用的实现，首先获取用户传递给系统调用的参数放入addr中(用户地址空间地址)，然后在内核空间中申请一个struct sysinfo，分别调用getfreemem()和getnproc()函数(具体实现稍后添加)填充该结构体相应字段之后，调用copyout()将内核空间的内存复制到用户空间的地址中：

uint64
sys_sysinfo(void) {
  uint64 addr;
  if (argaddr(0, &addr) < 0)
    return -1;
  struct sysinfo info;
  info.freemem = getfreemem();  // 获取系统空闲内存(在kernel/kalloc.c中实现)
  info.nproc = getnproc();      // 获取系统当前的进程数量(在kernel/proc.c中实现)
  struct proc *p = myproc();
  if(copyout(p->pagetable, addr, (char *)&info, sizeof(info)) < 0)
    return -1;
  return 0;
}

在kernel/kalloc.c中添加getfreemem()的实现，空闲内存是使用一个链表来维护的，为了获取系统空闲内存字节数，需要遍历空闲内存链表，每遍历一个节点就增加一个PGSIZE字节：

uint64 getfreemem(void)
{
  uint64 freemem = 0;
  acquire(&kmem.lock);
  struct run *r = kmem.freelist;
  while (r) {
    freemem += PGSIZE;
    r = r->next;
  }
  release(&kmem.lock);
  return freemem;
}

在kernel/proc.c中添加getnproc()的实现，遍历进程数组，将其中状态不是UNUSED的进程加和：

// get the number of processes whose state is not UNUSED
uint64 getnproc(void)
{
  struct proc *p;
  uint64 nproc = 0;
  for (p=proc; p<&proc[NPROC]; p++) {
    if (p->state != UNUSED)
      ++nproc;
  }
  return nproc;
}

最后不要忘记在kernel/defs.h中添加这两个函数的声明：

1 2	uint64 getfreemem(void); uint64 getnproc(void);

测试结果

2022/6/1测试通过：

1	make grade

参考文献

https://pdos.csail.mit.edu/6.828/2021/schedule.html