6.828: Operating System Engineering (81)

Required reading: xv6 trapasm.S, trap.c, syscall.c, initcode.S, usys.S. Skim vectors.S, lapic.c, ioapic.c, picirq.c.

とのこと。initcode.S の exec が云々とあります。

# exec(init, argv)
.globl start
start:
  pushl $argv
  pushl $init
  pushl $0  // where caller pc would be
  movl $SYS_exec, %eax
  int $T_SYSCALL

これはおそらく init なプロセスを kickoff しとるのでしょうな。
argv とか init とかなシンボルも直下で定義されてて以下。

# char init[] = "/init\0";
init:
  .string "/init\0"

# char *argv[] = { init, 0 };
.p2align 2
argv:
  .long init
  .long 0

あるいは SYS_exec が syscall.h て定義されてて以下。

#define SYS_exec    9

T_SYSCALL が traps.h で定義されてて以下。

#define T_SYSCALL       64      // system call

0x40 です。ええと、この根拠は、ということなのですが trap.c の以下あたりが鍵かと。

void
tvinit(void)
{
  int i;

  for(i = 0; i < 256; i++)
    SETGATE(idt[i], 0, SEG_KCODE<<3, vectors[i], 0);
  SETGATE(idt[T_SYSCALL], 1, SEG_KCODE<<3, vectors[T_SYSCALL], DPL_USER);
  
  initlock(&tickslock, "time");
}

void
idtinit(void)
{
  lidt(idt, sizeof(idt));
}

tvinit 手続き (idt を初期化しつつ 0x40 なベクタを設定) は main.c の mainc 手続きから呼び出されてて、idtinit 手続きは mainc 手続きから呼び出される mpmain 手続きから呼び出されております。

ちょっと気になった

init なプロセスはどこから kickoff されているのかと思ったら mainc 手続きの以下の部分と思われます。

  userinit();      // first user process
  bootothers();    // start other processors

  // Finish setting up this processor in mpmain.
  mpmain();

userinit は proc.c で定義されてます。これが又微妙な操作をナニされている模様。

// Set up first user process.
void
userinit(void)
{
  struct proc *p;
  extern char _binary_initcode_start[], _binary_initcode_size[];
  
  p = allocproc();

allocproc 手続きにより proc 構造体のオブジェクトを取得。実は allocproc 手続きは直上で定義されてます。冒頭コメントのみ引用。

// Look in the process table for an UNUSED proc.
// If found, change state to EMBRYO and initialize
// state required to run in the kernel.
// Otherwise return 0.

で、以下なあたりの処理を、なのですが

  initproc = p;
  if((p->pgdir = setupkvm()) == 0)
    panic("userinit: out of memory?");
  inituvm(p->pgdir, _binary_initcode_start, (int)_binary_initcode_size);

xv6 の proc 構造体の定義が確認したい。proc.h で定義されてまして以下な模様。

// Per-process state
struct proc {
  uint sz;                     // Size of process memory (bytes)
  pde_t* pgdir;                // Page table
  char *kstack;                // Bottom of kernel stack for this process
  enum procstate state;        // Process state
  volatile int pid;            // Process ID
  struct proc *parent;         // Parent process
  struct trapframe *tf;        // Trap frame for current syscall
  struct context *context;     // swtch() here to run process
  void *chan;                  // If non-zero, sleeping on chan
  int killed;                  // If non-zero, have been killed
  struct file *ofile[NOFILE];  // Open files
  struct inode *cwd;           // Current directory
  char name[16];               // Process name (debugging)
};

プロセス毎に pgdir 持ってるのが分かります。setupkvm 手続きの戻りをセットしてますね。その setupkvm 手続きなのですが、pgdir な領域を確保して

//   640K..1M         : mapped direct (for IO space)
//   1M..end          : mapped direct (for the kernel's text and data)
//   end..PHYSTOP     : mapped direct (kernel heap and user pages)
//   0xfe000000..0    : mapped direct (devices such as ioapic)

なナニを mappages 手続きを使って設定してます。
そしてここでブラックホールにハマりました。mapping してるソレは以下なんですが、

  k = kmap;
  for(k = kmap; k < &kmap[NELEM(kmap)]; k++)
    if(mappages(pgdir, k->p, k->e - k->p, (uint)k->p, k->perm) < 0)
      return 0;

kmap という配列が上記コメントにある通りの定義になってて以下 (直上で定義されてます)。

static struct kmap {
  void *p;
  void *e;
  int perm;
} kmap[] = {
  {(void*)USERTOP,    (void*)0x100000, PTE_W},  // I/O space
  {(void*)0x100000,   data,            0    },  // kernel text, rodata
  {data,              (void*)PHYSTOP,  PTE_W},  // kernel data, memory
  {(void*)0xFE000000, 0,               PTE_W},  // device mappings
};

0 (k->e) から 0xFE000000 (k->p) 引いた値って何だよ、って言いつつ gdb でもごもごしてて暫くして 2 の補数という言葉が頭に出てきました。駄目すぎる。
しかしこれで丁度のサイズが計算できるんだからコンピュータ作った人って賢いよね。

閑話休題

userinit 手続きに戻ります。次は inituvm という手続きですが、冒頭コメントにあるように、initcode を Load する手続きです。

// Load the initcode into address 0 of pgdir.
// sz must be less than a page.

基本的に 1 ページ領域を確保して address 0 of pgdir に map して _binary_initcode_start から _binary_initcode_size 分その領域にコピーしている形。
で、プロセスディスクリプタの属性を適切に設定して

  p->sz = PGSIZE;
  memset(p->tf, 0, sizeof(*p->tf));
  p->tf->cs = (SEG_UCODE << 3) | DPL_USER;
  p->tf->ds = (SEG_UDATA << 3) | DPL_USER;
  p->tf->es = p->tf->ds;
  p->tf->ss = p->tf->ds;
  p->tf->eflags = FL_IF;
  p->tf->esp = PGSIZE;
  p->tf->eip = 0;  // beginning of initcode.S

  safestrcpy(p->name, "initcode", sizeof(p->name));
  p->cwd = namei("/");

  p->state = RUNNABLE;

あとはスケジューラ任せ、ということかと。eip に 0 をセットしてるのは initcode は 0 番地から始まります、ということになってるから、という理解。
で、ようやく最初に戻って initcode.S の先頭のブロックが動きはじめるのか。

割り込み

スタックに諸々を push して SYS_exec を eax レジスタに格納して割り込み発行。

  pushl $argv
  pushl $init
  pushl $0  // where caller pc would be
  movl $SYS_exec, %eax
  int $T_SYSCALL

割り込みディスクリプタテーブルは上記引用してる tvinit という手続きでセットしてます。SETGATE に渡してる vectors という配列は vectors.S で定義されてますな。vectors[64] には以下な手続きの入口なポインタが格納されてます。

.globl vector64
vector64:
  pushl $0
  pushl $64
  jmp alltraps

SETGATE の中身の確認はスルーします。マニュアルに書いてある通りのはずなので。ただ、ソフトウェア割り込みは特権レベルを下げてるはず。

  SETGATE(idt[T_SYSCALL], 1, SEG_KCODE<<3, vectors[T_SYSCALL], DPL_USER);

DPL_USER を渡してるあたり。
で、alltraps に jmp してるわけなのですが、trapasm.S で定義されてて以下ですな。全部引用しちゃえ。

  # vectors.S sends all traps here.
.globl alltraps
alltraps:
  # Build trap frame.
  pushl %ds
  pushl %es
  pushl %fs
  pushl %gs
  pushal
  
  # Set up data and per-cpu segments.
  movw $(SEG_KDATA<<3), %ax
  movw %ax, %ds
  movw %ax, %es
  movw $(SEG_KCPU<<3), %ax
  movw %ax, %fs
  movw %ax, %gs

  # Call trap(tf), where tf=%esp
  pushl %esp
  call trap
  addl $4, %esp

  # Return falls through to trapret...
.globl trapret
trapret:
  popal
  popl %gs
  popl %fs
  popl %es
  popl %ds
  addl $0x8, %esp  # trapno and errcode
  iret

trap という手続きを call されていらっしゃいます。引数が trapframe 構造体ということなのですが、コメントにある通り、_Call trap(tf), where tf=%esp_ ということになってます。
色々確認したところでは trap 手続きの先頭部分をさっさと済ませて return するカンジですね。

void
trap(struct trapframe *tf)
{
  if(tf->trapno == T_SYSCALL){
    if(proc->killed)
      exit();
    proc->tf = tf;
    syscall();
    if(proc->killed)
      exit();
    return;
  }

ちなみに上記の proc というシンボルはどこで宣言されてるのかというと proc.h な模様。ちょっとここは一旦スルーした方が良さげ。
で、syscall 手続きですがこれまた微妙。

void
syscall(void)
{
  int num;
  
  num = proc->tf->eax;
  if(num >= 0 && num < NELEM(syscalls) && syscalls[num])
    proc->tf->eax = syscalls[num]();
  else {
    cprintf("%d %s: unknown sys call %d\n",
            proc->pid, proc->name, num);
    proc->tf->eax = -1;
  }
}

syscall.c にて定義されてます。proc->tf->eax からシステムコールテーブルなインデクス取得してますね。SYS_exec が入ってるソレです。こんな参照の仕方しなきゃいかんとは微妙だなぁ。ちょっと proc.h の該当部分を以下に引用しときます。

// Per-CPU variables, holding pointers to the
// current cpu and to the current process.
// The asm suffix tells gcc to use "%gs:0" to refer to cpu
// and "%gs:4" to refer to proc.  seginit sets up the
// %gs segment register so that %gs refers to the memory
// holding those two variables in the local cpu's struct cpu.
// This is similar to how thread-local variables are implemented
// in thread libraries such as Linux pthreads.
extern struct cpu *cpu asm("%gs:0");       // &cpus[cpunum()]
extern struct proc *proc asm("%gs:4");     // cpus[cpunum()].proc

マルチプロセッサ云々はとりあえずスルーします。とは言えこの情報の渡しかたはなんとなく胸にすとんと落ちないな。
このあたり、Linux はどんなだったかとか全然覚えてないので見てみます。確かこの割り込み云々のあたりは確認した覚えがあるような気がするのですが、マルチプロセッサなソレをどう処理してたか、とかって記憶に無い。