diff options
Diffstat (limited to 'boot')
| -rw-r--r-- | boot/startup-x86_64.s | 445 |
1 files changed, 445 insertions, 0 deletions
diff --git a/boot/startup-x86_64.s b/boot/startup-x86_64.s new file mode 100644 index 0000000..dfc6262 --- /dev/null +++ b/boot/startup-x86_64.s @@ -0,0 +1,445 @@ +; THis code is copied from the OSC lab project OOStuBS @ TU Dresden + +;****************************************************************************** +;* Operating-System Construction * +;*----------------------------------------------------------------------------* +;* * +;* S T A R T U P . A S M * +;* * +;*----------------------------------------------------------------------------* +;* The 'startup' function is the entry point for the whole system. Switching * +;* to 32-bit Protected Mode has already been done (by a boot loader that runs * +;* before). Here we prepare everything to be able to start running C++ code * +;* in 64-bit Long Mode as quickly as possible. * +;****************************************************************************** + +; +; Constants +; + +; stack for the main function (renamed to _entry()) +STACKSIZE: equ 65536 + +; video memory base address +CGA: equ 0xB8000 + +; 512 GB maximum RAM size for page table +; DON'T MODIFY THIS UNLESS YOU UPDATE THE setup_paging accordingly +MAX_MEM: equ 512 + +; Multiboot constants +MULTIBOOT_PAGE_ALIGN equ 1<<0 +MULTIBOOT_MEMORY_INFO equ 1<<1 + +; magic number for Multiboot +MULTIBOOT_HEADER_MAGIC equ 0x1badb002 + +; Multiboot flags (ELF specific!) +MULTIBOOT_HEADER_FLAGS equ MULTIBOOT_PAGE_ALIGN | MULTIBOOT_MEMORY_INFO +MULTIBOOT_HEADER_CHKSUM equ -(MULTIBOOT_HEADER_MAGIC + MULTIBOOT_HEADER_FLAGS) +MULTIBOOT_EAX_MAGIC equ 0x2badb002 + + +; +; System +; + +; exported symbols +[GLOBAL startup] +[GLOBAL idt] +[GLOBAL pml4] +[GLOBAL pdp] + +; functions from the C parts of the system +[EXTERN _entry] +[EXTERN guardian] + +; addresses provided by the compiler +[EXTERN ___BSS_START__] +[EXTERN ___BSS_END__] + +[SECTION .text] + +; +; system start, part 1 (in 32-bit Protected Mode) +; +; GDT and page-table initialization, and switch to 64-bit Long Mode +; + +[BITS 32] + + jmp startup ; jump over Multiboot header + align 4 ; 32-bit alignment for GRUB + +; +; Multiboot header for starting with GRUB or QEMU (w/o BIOS) +; + + dd MULTIBOOT_HEADER_MAGIC + dd MULTIBOOT_HEADER_FLAGS + dd MULTIBOOT_HEADER_CHKSUM + dd 0 ; header_addr (gets ignored) + dd 0 ; load_addr (gets ignored) + dd 0 ; load_end_addr (gets ignored) + dd 0 ; bss_end_addr (gets ignored) + dd 0 ; entry_addr (gets ignored) + dd 0 ; mode_type (gets ignored) + dd 0 ; width (gets ignored) + dd 0 ; height (gets ignored) + dd 0 ; depth (gets ignored) + +; +; GRUB entry point +; + +startup: + cld ; GCC-compiled code expects the direction flag to be 0 + cli ; disable interrupts + lgdt [gdt_80] ; set new segment descriptors + + ; global data segment + mov eax, 3 * 0x8 + ; 0x8 is the length of each entry + ; these registers point to 4th entry the GDT (see also the code there) + ; in x86 long mode these are dummy pointers + ; which are not actually used in addressing. (don't use segmentation at all) + ; all the addresses are physical addresses from 0. + mov ds, ax + mov es, ax + mov fs, ax + mov gs, ax + + ; define stack + mov ss, ax + mov esp, init_stack+STACKSIZE + +; +; Switch to 64-bit Long Mode +; + +init_longmode: + ; activate address extension (PAE) + mov eax, cr4 + or eax, 1 << 5 + mov cr4, eax + + ; create page table (mandatory) + call setup_paging + + ; activate Long Mode (for now in compatibility mode) + mov ecx, 0x0C0000080 ; select EFER (Extended Feature Enable Register) + rdmsr + or eax, 1 << 8 ; LME (Long Mode Enable) + wrmsr + + ; activate paging + mov eax, cr0 + or eax, 1 << 31 + mov cr0, eax + + ; jump to 64-bit code segment -> full activation of Long Mode + jmp 2 * 0x8 : longmode_start + +; +; Provisional identical page mapping, using 1G huge page (therefore only 2 table +; levels needed) +; + +setup_paging: + ; PML4 (Page Map Level 4 / 1st level) + mov eax, pdp + or eax, 0xf + mov dword [pml4+0], eax + mov dword [pml4+4], 0 + ; PDPE flags + mov eax, 0x0 | 0x87 ; start-address bytes bit [30:31] + flags + mov ebx, 0 ; start-address bytes bit [32:38] + mov ecx, 0 +fill_tables2: + ; fill one single PDP table, with 1G pages, 512 PDPE maps to 512 GB + cmp ecx, MAX_MEM + je fill_tables2_done + mov dword [pdp + 8*ecx + 0], eax ; low bytes + mov dword [pdp + 8*ecx + 4], ebx ; high bytes + add eax, 0x40000000 ; 1G per page + adc ebx, 0 ; overflow? -> increment higher-order half of the address + inc ecx + ja fill_tables2 +fill_tables2_done: + ; set base pointer to PML4 + mov eax, pml4 + mov cr3, eax + ret + +; +; system start, part 2 (in 64-bit Long Mode) +; +; This code clears the BSS segment and initializes IDT and PICs. Then the +; constructors of global C++ objects are called, and finally _entry() is run. +; + +longmode_start: +[BITS 64] + ; clear BSS + mov rdi, ___BSS_START__ +clear_bss: + mov byte [rdi], 0 + inc rdi + cmp rdi, ___BSS_END__ + jne clear_bss + + ; initialize IDT and PICs + call setup_idt + call reprogram_pics + call setup_cursor + + fninit ; activate FPU + + ; init SSE + ; NOTE: must NOT use sse target features for rust compiler, if sse not enabled here. + ;mov rax, cr0 + ;and rax, ~(1 << 2) ;clear coprocessor emulation CR0.EM + ;or rax, 1 << 1 ;set coprocessor monitoring CR0.MP + ;mov cr0, rax + ;mov rax, cr4 + ;or rax, 3 << 9 ;set CR4.OSFXSR and CR4.OSXMMEXCPT at the same time + ;mov cr4, rax + + call _entry ; call the OS kernel's C / C++ part + cli ; Usually we should not get here. + hlt + +; +; Interrupt handling +; + +; template for header for each interrupt-handling routine +; TODO: vectors should have their dedicated place +%macro wrapper 1 +wrapper_%1: + push rbp + mov rbp, rsp + push rax + mov al, %1 + jmp wrapper_body +%endmacro + +; automatic generation of 256 interrupt-handling routines, based on above macro +%assign i 0 +%rep 256 +wrapper i +%assign i i+1 +%endrep + +; common handler body +wrapper_body: + ; GCC expects the direction flag to be 0 + cld + ; save volatile registers + push rcx + push rdx + push rdi + push rsi + push r8 + push r9 + push r10 + push r11 + + ; the generated wrapper only gives us 8 bits, mask the rest + and rax, 0xff + + ; pass interrupt number as the first parameter + mov rdi, rax + ; call the interrupt handler wrapper here. + ; TODO implement it in rust then uncomment the line + call guardian + + ; restore volatile registers + pop r11 + pop r10 + pop r9 + pop r8 + pop rsi + pop rdi + pop rdx + pop rcx + + ; ... also those from the wrapper + pop rax + pop rbp + + ; done + iretq + +; +; Relocating of IDT entries and setting IDTR +; + +setup_idt: + mov rax, wrapper_0 + + ; bits 0..15 -> ax, 16..31 -> bx, 32..64 -> edx + mov rbx, rax + mov rdx, rax + shr rdx, 32 + shr rbx, 16 + + mov r10, idt ; pointer to the actual interrupt gate + mov rcx, 255 ; counter +.loop: + add [r10+0], ax + adc [r10+6], bx + adc [r10+8], edx + add r10, 16 + dec rcx + jge .loop + + lidt [idt_descr] + ret + +; +; make cursor blink (GRUB disables this) +; + +setup_cursor: + mov al, 0x0a + mov dx, 0x3d4 + out dx, al + call delay + mov dx, 0x3d5 + in al, dx + call delay + and al, 0xc0 + or al, 14 + out dx, al + call delay + mov al, 0x0b + mov dx, 0x3d4 + out dx, al + call delay + mov dx, 0x3d5 + in al, dx + call delay + and al, 0xe0 + or al, 15 + out dx, al + ret + +; +; Reprogram the PICs (programmable interrupt controllers) to have all 15 +; hardware interrupts in sequence in the IDT. +; + +reprogram_pics: + mov al, 0x11 ; ICW1: 8086 mode with ICW4 + out 0x20, al + call delay + out 0xa0, al + call delay + mov al, 0x20 ; ICW2 master: IRQ # offset (32) + out 0x21, al + call delay + mov al, 0x28 ; ICW2 slave: IRQ # offset (40) + out 0xa1, al + call delay + mov al, 0x04 ; ICW3 master: slaves with IRQs + out 0x21, al + call delay + mov al, 0x02 ; ICW3 slave: connected to master's IRQ2 + out 0xa1, al + call delay + mov al, 0x03 ; ICW4: 8086 mode and automatic EOI + out 0x21, al + call delay + out 0xa1, al + call delay + + mov al, 0xff ; Mask/disable hardware interrupts + out 0xa1, al ; in the PICs. Only interrupt #2, which + call delay ; serves for cascading both PICs, is + mov al, 0xfb ; allowed. + out 0x21, al + + ret + +; +; Short delay for in/out instructions +; + +delay: + jmp .L2 +.L2: + ret + +[SECTION .data] + +; +; Segment descriptors +; + +gdt: + dw 0,0,0,0 ; NULL descriptor + + ; 32-bit code segment descriptor + dw 0xFFFF ; 4Gb - (0x100000*0x1000 = 4Gb) + dw 0x0000 ; base address=0 + dw 0x9A00 ; code read/exec + dw 0x00CF ; granularity=4096, 386 (+5th nibble of limit) + + ; 64-bit code segment descriptor + dw 0xFFFF ; 4Gb - (0x100000*0x1000 = 4Gb) + dw 0x0000 ; base address=0 + dw 0x9A00 ; code read/exec + dw 0x00AF ; granularity=4096, 386 (+5th nibble of limit), Long-Mode + + ; data segment descriptor + dw 0xFFFF ; 4Gb - (0x100000*0x1000 = 4Gb) + dw 0x0000 ; base address=0 + dw 0x9200 ; data read/write + dw 0x00CF ; granularity=4096, 386 (+5th nibble of limit) + +gdt_80: + dw 4*8 - 1 ; GDT limit=24, 4 GDT entries - 1 + dq gdt ; GDT address + +; +; Interrupt descriptor table with 256 entries +; + +idt: +%macro idt_entry 1 + dw (wrapper_%1 - wrapper_0) & 0xffff ; offset 0 .. 15 + dw 0x0000 | 0x8 * 2 ; selector points to 64-bit code segment selector (GDT) + dw 0x8e00 ; 8 -> interrupt is present, e -> 80386 32-bit interrupt gate + dw ((wrapper_%1 - wrapper_0) & 0xffff0000) >> 16 ; offset 16 .. 31 + dd ((wrapper_%1 - wrapper_0) & 0xffffffff00000000) >> 32 ; offset 32..63 + dd 0x00000000 ; reserved +%endmacro + +%assign i 0 +%rep 256 +idt_entry i +%assign i i+1 +%endrep + +idt_descr: + dw 256*8 - 1 ; 256 entries + dq idt + +[SECTION .bss] + +global init_stack:data (init_stack.end - init_stack) +init_stack: + resb STACKSIZE +.end: + +[SECTION .global_pagetable] + + +pml4: + resb 4096 + alignb 4096 + +pdp: + resb 4096 + alignb 4096 |
