...
# now we want to move to protected mode ...
cli # no interrupts allowed !
# 因为后面我们要把原本是 BIOS 写好的中断向量表给覆盖掉,也就是给破坏掉了,写上我们自己的中断向量表,所以这个时候是不允许中断进来的。
# first we move the system to it's rightful place
mov $0x0000, %ax
cld # 'direction'=0, movs moves forward
do_move:
mov %ax, %es # destination segment
add $0x1000, %ax
cmp $0x9000, %ax
jz end_move
mov %ax, %ds # source segment
sub %di, %di
sub %si, %si
mov $0x8000, %cx
rep
movsw
jmp do_move
于是,现在的内存布局变成了:
# then we load the segment descriptors
end_move:
mov $SETUPSEG, %ax # right, forgot this at first. didn't work :-)
mov %ax, %ds
lidt idt_48 # load idt with 0,0
lgdt gdt_48 # load gdt with whatever appropriate
这里会加载idt和gdt。以gdt为例解释一下:
gdt:
.word 0,0,0,0 # dummy
.word 0x07FF # 8Mb - limit=2047 (2048*4096=8Mb),代码段描述符
.word 0x0000 # base address=0,数据段描述符
.word 0x9A00 # code read/exec
.word 0x00C0 # granularity=4096, 386
.word 0x07FF # 8Mb - limit=2047 (2048*4096=8Mb)
.word 0x0000 # base address=0
.word 0x9200 # data read/write
.word 0x00C0 # granularity=4096, 386
gdt_48: # 注意是小端序,0x800在低16位,0x9在高16位
.word 0x800 # gdt limit=2048, 256 GDT entries
.word 512+gdt, 0x9 # gdt base = 0X9xxxx,
# 512+gdt is the real gdt after setup is moved to 0x9020 * 0x10
void trap_init(void)
{
int i;
set_trap_gate(0,÷_error);
set_trap_gate(1,&debug);
set_trap_gate(2,&nmi);
set_system_gate(3,&int3); /* int3-5 can be called from all */
set_system_gate(4,&overflow);
set_system_gate(5,&bounds);
set_trap_gate(6,&invalid_op);
set_trap_gate(7,&device_not_available);
set_trap_gate(8,&double_fault);
set_trap_gate(9,&coprocessor_segment_overrun);
set_trap_gate(10,&invalid_TSS);
set_trap_gate(11,&segment_not_present);
set_trap_gate(12,&stack_segment);
set_trap_gate(13,&general_protection);
set_trap_gate(14,&page_fault); // 缺页中断
set_trap_gate(15,&reserved);
set_trap_gate(16,&coprocessor_error);
for (i=17;i<48;i++)
set_trap_gate(i,&reserved);
set_trap_gate(45,&irq13);
outb_p(inb_p(0x21)&0xfb,0x21);
outb(inb_p(0xA1)&0xdf,0xA1);
set_trap_gate(39,¶llel_interrupt);
}
tty_init:设置键盘中断的中断处理函数
sti:开启中断(set interrupt flag)
#define sti() __asm__ ("sti"::)
块设备初始化
一次读盘的请求用一个request结果来表示,使用request数组维护所有的请求。
/*
* The request-struct contains all necessary data
* to load a nr of sectors into memory
*/
struct request request[NR_REQUEST];
/*
* Ok, this is an expanded form so that we can use the same
* request for paging requests when that is implemented. In
* paging, 'bh' is NULL, and 'waiting' is used to wait for
* read/write completion.
*/
struct request {
int dev; /* 设备号,-1 表示无请求 */
int cmd; /* READ or WRITE */
int errors;
unsigned long sector; /* 起始扇区 */
unsigned long nr_sectors; /* 扇区数 */
char * buffer; /* 数据缓冲区,读盘后数据放在内存中的位置 */
struct task_struct * waiting; /* 哪个进程发起的请求 */
struct buffer_head * bh; /* 缓冲区头指针 */
struct request * next; /* 链表,指向下一个 */
};
void blk_dev_init(void)
{
int i;
for (i=0 ; i<NR_REQUEST ; i++) {
request[i].dev = -1;
request[i].next = NULL;
}
}
struct tss_struct {
long back_link; /* 16 high bits zero */
long esp0;
long ss0; /* 16 high bits zero */
long esp1;
long ss1; /* 16 high bits zero */
long esp2;
long ss2; /* 16 high bits zero */
long cr3;
long eip;
long eflags;
long eax,ecx,edx,ebx;
long esp;
long ebp;
long esi;
long edi;
long es; /* 16 high bits zero */
long cs; /* 16 high bits zero */
long ss; /* 16 high bits zero */
long ds; /* 16 high bits zero */
long fs; /* 16 high bits zero */
long gs; /* 16 high bits zero */
long ldt; /* 16 high bits zero */
long trace_bitmap; /* bits: trace 0, bitmap 16-31 */
struct i387_struct i387;
};
struct task_struct {
/* these are hardcoded - don't touch */
long state; /* -1 unrunnable, 0 runnable, >0 stopped */
long counter;
long priority;
long signal;
struct sigaction sigaction[32];
long blocked; /* bitmap of masked signals */
/* various fields */
int exit_code;
unsigned long start_code,end_code,end_data,brk,start_stack;
long pid,father,pgrp,session,leader;
unsigned short uid,euid,suid;
unsigned short gid,egid,sgid;
long alarm;
long utime,stime,cutime,cstime,start_time;
unsigned short used_math;
/* file system info */
int tty; /* -1 if no tty, so it must be signed */
unsigned short umask;
struct m_inode * pwd;
struct m_inode * root;
struct m_inode * executable;
unsigned long close_on_exec;
struct file * filp[NR_OPEN];
/* ldt for this task 0 - zero 1 - cs 2 - ds&ss */
struct desc_struct ldt[3];
/* tss for this task */
struct tss_struct tss;
};
.align 2
timer_interrupt:
push %ds # save ds,es and put kernel data space
push %es # into them. %fs is used by _system_call
push %fs
pushl %edx # we save %eax,%ecx,%edx as gcc doesn't
pushl %ecx # save those across function calls. %ebx
pushl %ebx # is saved as we use that in ret_sys_call
pushl %eax
movl $0x10,%eax
mov %ax,%ds
mov %ax,%es
movl $0x17,%eax
mov %ax,%fs
incl jiffies
movb $0x20,%al # EOI to interrupt controller #1
outb %al,$0x20
movl CS(%esp),%eax # 发生中断时处理器自动压入CS,这里读取出来,检查CPL(current privilege level)
andl $3,%eax # %eax is CPL (0 or 3, 0=supervisor)
pushl %eax # CPL 作为参数
call do_timer # 'do_timer(long CPL)' does everything from
addl $4,%esp # task switching to accounting ...
jmp ret_from_sys_call
void do_timer(long cpl)
{
extern int beepcount;
extern void sysbeepstop(void);
if (beepcount)
if (!--beepcount)
sysbeepstop();
if (cpl)
current->utime++;
else
current->stime++;
...
if (current_DOR & 0xf0)
do_floppy_timer();
if ((--current->counter)>0) return; // 时间片未到0,返回
current->counter=0;
if (!cpl) return; // 如果当前是内核态则不调度
schedule(); // 时间片到0,且为用户模式,进行调度。
}
#define FIRST_TASK task[0]
#define LAST_TASK task[NR_TASKS-1]
void schedule(void)
{
int i,next,c;
struct task_struct ** p;
/* check alarm, wake up any interruptible tasks that have got a signal */
for(p = &LAST_TASK ; p > &FIRST_TASK ; --p)
if (*p) {
if ((*p)->alarm && (*p)->alarm < jiffies) {
(*p)->signal |= (1<<(SIGALRM-1));
(*p)->alarm = 0;
}
// (*p)->signal 表示待处理的信号
// ~(_BLOCKABLE & (*p)->blocked)) 表示未被屏蔽的信号
// TASK_INTERRUPTIBLE: 处于睡眠状态,并且等待某个信号
if (((*p)->signal & ~(_BLOCKABLE & (*p)->blocked)) &&
(*p)->state==TASK_INTERRUPTIBLE)
(*p)->state=TASK_RUNNING;
}
/* this is the scheduler proper: */
while (1) {
c = -1; // 所有进程剩余时间片的最大值
next = 0; // 最大剩余时间片进程的索引
i = NR_TASKS;
p = &task[NR_TASKS];
while (--i) {
if (!*--p)
continue;
if ((*p)->state == TASK_RUNNING && (*p)->counter > c)
c = (*p)->counter, next = i;
}
if (c) break; // 如果存在一个剩余时间片不为0的任务,则break,否则设置所有任务的剩余时间片
for(p = &LAST_TASK ; p > &FIRST_TASK ; --p)
if (*p)
(*p)->counter = ((*p)->counter >> 1) +
(*p)->priority;
}
// 切换到目标进程
switch_to(next);
}
#define FIRST_TSS_ENTRY 4
#define FIRST_LDT_ENTRY (FIRST_TSS_ENTRY+1)
#define _TSS(n) ((((unsigned long) n)<<4)+(FIRST_TSS_ENTRY<<3))
// FIRST_TSS_ENTRY<<3表示左移3位,因为TI和RPL总共占3位
// n<<4,实际上索引加上 n<<1,因为一个进程占一个TSS和一个LDT
#define _LDT(n) ((((unsigned long) n)<<4)+(FIRST_LDT_ENTRY<<3))
/*
* switch_to(n) should switch tasks to task nr n, first
* checking that n isn't the current task, in which case it does nothing.
* This also clears the TS-flag if the task we switched to has used
* tha math co-processor latest.
*/
#define switch_to(n) {\
struct {long a,b;} __tmp; \
__asm__("cmpl %%ecx,current\n\t" # 先比较是不是要切换到当前任务 \
"je 1f\n\t" # 如果是就什么都不做 \
"movw %%dx,%1\n\t" # 把TSS赋给__tmp.b \
"xchgl %%ecx,current\n\t" # 交换 ecx 和 current \
"ljmp *%0\n\t" # 将__tmp.b作为段选择子 \
"cmpl %%ecx,last_task_used_math\n\t" \
"jne 1f\n\t" \
"clts\n" \
"1:" \
::"m" (*&__tmp.a),"m" (*&__tmp.b), \
"d" (_TSS(n)),"c" ((long) task[n])); \
}
/*
* Well, here is one of the most complicated functions in mm. It
* copies a range of linerar addresses by copying only the pages.
* Let's hope this is bug-free, 'cause this one I don't want to debug :-)
*
* Note! We don't copy just any chunks of memory - addresses have to
* be divisible by 4Mb (one page-directory entry), as this makes the
* function easier. It's used only by fork anyway.
*
* NOTE 2!! When from==0 we are copying kernel space for the first
* fork(). Then we DONT want to copy a full page-directory entry, as
* that would lead to some serious memory waste - we just copy the
* first 160 pages - 640kB. Even that is more than we need, but it
* doesn't take any more memory - we don't copy-on-write in the low
* 1 Mb-range, so the pages can be shared with the kernel. Thus the
* special case for nr=xxxx.
*/
int copy_page_tables(unsigned long from,unsigned long to,long size)
{
unsigned long * from_page_table;
unsigned long * to_page_table;
unsigned long this_page;
unsigned long * from_dir, * to_dir;
unsigned long nr;
if ((from&0x3fffff) || (to&0x3fffff))
panic("copy_page_tables called with wrong alignment");
from_dir = (unsigned long *) ((from>>20) & 0xffc); /* _pg_dir = 0 */
to_dir = (unsigned long *) ((to>>20) & 0xffc);
size = ((unsigned) (size+0x3fffff)) >> 22;
for( ; size-->0 ; from_dir++,to_dir++) {
if (1 & *to_dir)
panic("copy_page_tables: already exist");
if (!(1 & *from_dir))
continue;
from_page_table = (unsigned long *) (0xfffff000 & *from_dir);
if (!(to_page_table = (unsigned long *) get_free_page())) // 分配一个页作为页表
return -1; /* Out of memory, see freeing */
*to_dir = ((unsigned long) to_page_table) | 7; // 页表地址填入页目录
nr = (from==0)?0xA0:1024;
for ( ; nr-- > 0 ; from_page_table++,to_page_table++) { // 从from_page_table拷贝页表项到to_page_table
this_page = *from_page_table;
if (!(1 & this_page))
continue;
this_page &= ~2; // 设置为只读,实现Copy On Write,新老进程一开始共享同一个物理内存空间,如果只有读,那就相安无事,但如果任何一方有写操作,由于页面是只读的,将触发缺页中断,然后就会分配一块新的物理内存给产生写操作的那个进程,此时这一块内存就不再共享了。
*to_page_table = this_page;
if (this_page > LOW_MEM) {
*from_page_table = this_page;
this_page -= LOW_MEM;
this_page >>= 12;
mem_map[this_page]++;
}
}
}
invalidate();
return 0;
}
void do_no_page(unsigned long error_code,unsigned long address)
{
int nr[4];
unsigned long tmp;
unsigned long page;
int block,i;
// 对齐到 4KB
address &= 0xfffff000;
// 计算相对于进程基址的偏移
tmp = address - current->start_code;
if (!current->executable || tmp >= current->end_data) {
get_empty_page(address);
return;
}
if (share_page(tmp))
return;
if (!(page = get_free_page()))
oom();
/* remember that 1 block is used for header */
// 计算这个地址在文件中的哪个数据块
block = 1 + tmp/BLOCK_SIZE;
// 计算文件中的4个块在设备中的位置
for (i=0 ; i<4 ; block++,i++)
nr[i] = bmap(current->executable,block);
// 从设备连续读取4KB到page中
bread_page(page,current->executable->i_dev,nr);
i = tmp + 4096 - current->end_data;
tmp = page + 4096;
while (i-- > 0) {
tmp--;
*(char *)tmp = 0;
}
if (put_page(page,address))
return;
free_page(page);
oom();
}
/*
* This function puts a page in memory at the wanted address.
* It returns the physical address of the page gotten, 0 if
* out of memory (either when trying to access page-table or
* page.)
*/
unsigned long put_page(unsigned long page,unsigned long address)
{
unsigned long tmp, *page_table;
/* NOTE !!! This uses the fact that _pg_dir=0 */
if (page < LOW_MEM || page >= HIGH_MEMORY)
printk("Trying to put page %p at %p\n",page,address);
if (mem_map[(page-LOW_MEM)>>12] != 1)
printk("mem_map disagrees with %p at %p\n",page,address);
page_table = (unsigned long *) ((address>>20) & 0xffc); // 找到页目录项
if ((*page_table)&1)
page_table = (unsigned long *) (0xfffff000 & *page_table);
else {
if (!(tmp=get_free_page())) // 页目录不存在,分配一页作为页表
return 0;
*page_table = tmp|7; // 页表地址写入页目录项
page_table = (unsigned long *) tmp;
}
page_table[(address>>12) & 0x3ff] = page | 7; // 将新分配的页写入页表项中
/* no need for invalidate */
return page;
}