- 論壇徽章:
- 0
|
本帖最后由 cluter 于 2011-05-28 01:01 編輯
只解釋下關(guān)鍵代碼的流程,附件里是關(guān)于缺頁(yè)處理思路更清晰的一個(gè)整理,無(wú)代碼的。- //*********************************缺頁(yè)異常處理函數(shù)*******************************************
- do_page_fault(struct pt_regs *regs, unsigned long error_code)
- {
-
- //獲取當(dāng)前cpu正在運(yùn)行的進(jìn)程的進(jìn)程描述符
- //然后獲取該進(jìn)程的內(nèi)存描述符
- tsk = current;
- mm = tsk->mm;
- /* Get the faulting address: */
- //獲取出錯(cuò)的地址
- address = read_cr2();
- /*
- * We fault-in kernel-space virtual memory on-demand. The
- * 'reference' page table is init_mm.pgd.
- *
- * NOTE! We MUST NOT take any locks for this case. We may
- * be in an interrupt or a critical region, and should
- * only copy the information from the master page table,
- * nothing more.
- *
- * This verifies that the fault happens in kernel space
- * (error_code & 4) == 0, and that the fault was not a
- * protection error (error_code & 9) == 0.
- */
- //頁(yè)訪問(wèn)出錯(cuò)地址address在內(nèi)核空間
- if (unlikely(fault_in_kernel_space(address))) {
- //檢查標(biāo)志位確定訪問(wèn)發(fā)生在"內(nèi)核態(tài)"
- if (!(error_code & (PF_RSVD | PF_USER | PF_PROT))) {
- //如果是內(nèi)核空間"非連續(xù)內(nèi)存"的訪問(wèn),
- //則直接拷貝"內(nèi)核頁(yè)表項(xiàng)"到"用戶頁(yè)表項(xiàng)"
- //如果"內(nèi)核頁(yè)表項(xiàng)"為null,說(shuō)明內(nèi)核有BUG,返回-1
- if (vmalloc_fault(address) >= 0)
- return;
- }
- //如果在"用戶態(tài)"則直接進(jìn)入"非法訪問(wèn)"處理函數(shù)
- //如果vmalloc_fault返回-1,則表示內(nèi)核BUG
- bad_area_nosemaphore(regs, error_code, address);
- //錯(cuò)誤處理函數(shù)
- // 1 "用戶態(tài)"錯(cuò)誤-->直接終止進(jìn)程
- // 2 "內(nèi)核態(tài)"錯(cuò)誤
- // 系統(tǒng)調(diào)用參數(shù)錯(cuò)誤 ---->終止進(jìn)程/返回系統(tǒng)調(diào)用錯(cuò)誤碼
- // 內(nèi)核BUG ---->內(nèi)核panic
- return;
- }
- /*
- * If we're in an interrupt, have no user context or are running
- * in an atomic region then we must not take the fault:
- */
- // 1 在中斷中,此時(shí)沒(méi)有進(jìn)程上下文
- // 2 在原子操作流程中
- // 都不允許處理缺頁(yè)異常
- if (unlikely(in_atomic() || !mm)) {
- bad_area_nosemaphore(regs, error_code, address);
- return;
- }
- /*
- * When running in the kernel we expect faults to occur only to
- * addresses in user space. All other faults represent errors in
- * the kernel and should generate an OOPS. Unfortunately, in the
- * case of an erroneous fault occurring in a code path which already
- * holds mmap_sem we will deadlock attempting to validate the fault
- * against the address space. Luckily the kernel only validly
- * references user space from well defined areas of code, which are
- * listed in the exceptions table.
- *
- * As the vast majority of faults will be valid we will only perform
- * the source reference check when there is a possibility of a
- * deadlock. Attempt to lock the address space, if we cannot we then
- * validate the source. If this is invalid we can skip the address
- * space check, thus avoiding the deadlock:
- */
- //此時(shí)可以確定缺頁(yè)地址address在"用戶空間"了
- if (unlikely(!down_read_trylock(&mm->mmap_sem))) {
- //錯(cuò)誤發(fā)生在"內(nèi)核態(tài)",查看異常表
- //如果在內(nèi)核態(tài)引起缺頁(yè),則引起缺頁(yè)的"指令地址"一定在"異常表"中
- //如果"異常表"中返回指令地址,則說(shuō)明可能是"請(qǐng)求調(diào)頁(yè)",也可能是"非法訪問(wèn)"
- //如果"異常表"中無(wú)地址,則肯定是內(nèi)核錯(cuò)誤
- if ((error_code & PF_USER) == 0 &&
- !search_exception_tables(regs->ip)) {
- //內(nèi)核panic
- bad_area_nosemaphore(regs, error_code, address);
- return;
- }
- down_read(&mm->mmap_sem);
- } else {
- /*
- * The above down_read_trylock() might have succeeded in
- * which case we'll have missed the might_sleep() from
- * down_read():
- */
- might_sleep();
- }
- //尋找address所在的vma
- vma = find_vma(mm, address);
- //如果address之后無(wú)vma,則肯定是非法訪問(wèn)
- if (unlikely(!vma)) {
- bad_area(regs, error_code, address);
- return;
- }
- // 1 如果vma->start_address<=address,則直接跳到 "合法訪問(wèn)"階段
- // 2 如果vma->start_address>address,則也有可能是用戶的"入棧行為"導(dǎo)致缺頁(yè)
- if (likely(vma->vm_start <= address))
- goto good_area;
- // "入棧"操作,則該vma的標(biāo)志為 "向下增長(zhǎng)"
- if (unlikely(!(vma->vm_flags & VM_GROWSDOWN))) {
- bad_area(regs, error_code, address);
- return;
- }
- // 確定缺頁(yè)發(fā)生在"用戶態(tài)"
- if (error_code & PF_USER) {
- /*
- * Accessing the stack below %sp is always a bug.
- * The large cushion allows instructions like enter
- * and pusha to work. ("enter $65535, $31" pushes
- * 32 pointers and then decrements %sp by 65535.)
- */
- //驗(yàn)證缺頁(yè)address和棧頂sp的關(guān)系
- if (unlikely(address + 65536 + 32 * sizeof(unsigned long) < regs->sp)) {
- bad_area(regs, error_code, address);
- return;
- }
- }
- //擴(kuò)展棧
- if (unlikely(expand_stack(vma, address))) {
- bad_area(regs, error_code, address);
- return;
- }
- /*
- * Ok, we have a good vm_area for this memory access, so
- * we can handle it..
- */
- good_area:
- write = error_code & PF_WRITE;
- // 再次驗(yàn)證"權(quán)限"
- if (unlikely(access_error(error_code, write, vma))) {
- bad_area_access_error(regs, error_code, address);
- return;
- }
- /*
- * If for any reason at all we couldn't handle the fault,
- * make sure we exit gracefully rather than endlessly redo
- * the fault:
- */
- //分配新"頁(yè)框"
- fault = handle_mm_fault(mm, vma, address, write ? FAULT_FLAG_WRITE : 0);
- up_read(&mm->mmap_sem);
- }
- //*******************************訪問(wèn)權(quán)限驗(yàn)證函數(shù)********************************************
- access_error(unsigned long error_code, int write, struct vm_area_struct *vma)
- {
- //如果是"寫操作"引起的缺頁(yè),則該vma必須可寫
- if (write) {
- /* write, present and write, not present: */
- if (unlikely(!(vma->vm_flags & VM_WRITE)))
- return 1;
- return 0;
- }
- /* read, present: */
- //檢查該頁(yè)是否已經(jīng)在RAM中,如果"特權(quán)位"置位表示頁(yè)框在RAM中
- //表示進(jìn)程訪問(wèn)"有特權(quán)" 頁(yè)框
- if (unlikely(error_code & PF_PROT))
- return 1;
- /* read, not present: */
- //如果該頁(yè)不在內(nèi)存中,該線性區(qū)必須可"讀"
- if (unlikely(!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))))
- return 1;
- return 0;
- }
復(fù)制代碼
do_page_fault處理流程.JPG (4.15 MB, 下載次數(shù): 0)
下載附件
do_page_fault處理思路
2011-05-28 00:32 上傳
順便提出幾個(gè)問(wèn)題:
在"內(nèi)核態(tài)"訪問(wèn)非連續(xù)內(nèi)存時(shí),為何要拷貝“內(nèi)核頁(yè)表項(xiàng)”到用“戶頁(yè)表項(xiàng)”,為何不直接使用“內(nèi)核頁(yè)表”?
對(duì)于一個(gè)4G的機(jī)器,用戶進(jìn)程的頁(yè)框大部分分布在物理內(nèi)存的哪個(gè)范圍?
為什么當(dāng)物理內(nèi)存大于1G的時(shí)候,就開始劃分出”高端內(nèi)存“?
64bit的機(jī)器上不需要”高端內(nèi)存“,這是為什么? |
|