1
0
mirror of https://github.com/sgmarz/osblog.git synced 2024-11-24 02:16:19 +04:00

Start chapter 5

This commit is contained in:
Stephen Marz 2019-11-04 13:14:28 -05:00
parent f13948d5c0
commit 49d8d6666a
15 changed files with 2215 additions and 0 deletions

5
risc_v/ch5/.cargo/config Normal file
View File

@ -0,0 +1,5 @@
[build]
target = "riscv64gc-unknown-none-elf"
[target.riscv64gc-unknown-none-elf]
linker = "riscv64-unknown-linux-gnu-gcc"

12
risc_v/ch5/Cargo.toml Normal file
View File

@ -0,0 +1,12 @@
[package]
name = "sos"
version = "0.1.0"
authors = ["Stephen Marz <stephen.marz@utk.edu>"]
edition = "2018"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[lib]
crate-type = ["staticlib"]
[dependencies]

41
risc_v/ch5/Makefile Normal file
View File

@ -0,0 +1,41 @@
#####
## BUILD
#####
CC=riscv64-unknown-linux-gnu-gcc
CFLAGS=-Wall -Wextra -pedantic -Wextra -O0 -g
CFLAGS+=-static -ffreestanding -nostdlib -fno-rtti -fno-exceptions
CFLAGS+=-march=rv64gc -mabi=lp64
INCLUDES=
LINKER_SCRIPT=-Tsrc/lds/virt.lds
TYPE=debug
RUST_TARGET=./target/riscv64gc-unknown-none-elf/$(TYPE)
LIBS=-L$(RUST_TARGET)
SOURCES_ASM=$(wildcard src/asm/*.S)
LIB=-lsos -lgcc
OUT=os.elf
#####
## QEMU
#####
QEMU=qemu-system-riscv64
MACH=virt
CPU=rv64
CPUS=4
MEM=128M
DISK=hdd.dsk
# DRIVE= -drive if=none,format=raw,file=$(DISK),id=foo -device virtio-blk-device,scsi=off,drive=foo
DRIVE=
all:
cargo build
$(CC) $(CFLAGS) $(LINKER_SCRIPT) $(INCLUDES) -o $(OUT) $(SOURCES_ASM) $(LIBS) $(LIB)
run: all
$(QEMU) -machine $(MACH) -cpu $(CPU) -smp $(CPUS) -m $(MEM) $(DRIVE) -nographic -serial mon:stdio -bios none -kernel $(OUT)
.PHONY: clean
clean:
cargo clean
rm -f $(OUT)

3
risc_v/ch5/make_hdd.sh Executable file
View File

@ -0,0 +1,3 @@
#!/bin/sh
dd if=/dev/zero of=hdd.dsk bs=1M count=32

143
risc_v/ch5/src/asm/boot.S Normal file
View File

@ -0,0 +1,143 @@
# boot.S
# bootloader for SoS
# Stephen Marz
# 8 February 2019
# Disable generation of compressed instructions.
.option norvc
# Define a .text.init section. The .text.init is put at the
# starting address so that the entry _start is put at the RISC-V
# address 0x8000_0000.
.section .text.init
# Execution starts here.
.global _start
_start:
# Disable linker instruction relaxation for the `la` instruction below.
# This disallows the assembler from assuming that `gp` is already initialized.
# This causes the value stored in `gp` to be calculated from `pc`.
# The job of the global pointer is to give the linker the ability to address
# memory relative to GP instead of as an absolute address.
.option push
.option norelax
la gp, _global_pointer
.option pop
# SATP should be zero, but let's make sure. Each HART has its own
# SATP register.
csrw satp, zero
# Any hardware threads (hart) that are not bootstrapping
# need to wait for an IPI
csrr t0, mhartid
bnez t0, 3f
# Set all bytes in the BSS section to zero.
la a0, _bss_start
la a1, _bss_end
bgeu a0, a1, 2f
1:
sd zero, (a0)
addi a0, a0, 8
bltu a0, a1, 1b
2:
# The stack grows from bottom to top, so we put the stack pointer
# to the very end of the stack range.
la sp, _stack_end
# Setting `mstatus` register:
# 0b01 << 11: Machine's previous protection mode is 2 (MPP=2).
li t0, 0b11 << 11
csrw mstatus, t0
# Do not allow interrupts while running kinit
csrw mie, zero
# Machine's exception program counter (MEPC) is set to `kinit`.
la t1, kinit
csrw mepc, t1
# Set the return address to get us into supervisor mode
la ra, 2f
# We use mret here so that the mstatus register is properly updated.
mret
2:
# We set the return address (ra above) to this label. When kinit() is finished
# in Rust, it will return here.
# Setting `mstatus` (supervisor status) register:
# 0b01 << 11 : Previous protection mode is 1 (MPP=01 [Supervisor]).
# 1 << 7 : Previous machine interrupt-enable bit is 1 (MPIE=1 [Enabled])
# 1 << 5 : Previous interrupt-enable bit is 1 (SPIE=1 [Enabled]).
# We set the "previous" bits because the mret will write the current bits
# with the previous bits.
li t0, (0b01 << 11) | (1 << 7) | (1 << 5)
csrw mstatus, t0
# Machine's trap vector base address is set to `m_trap_vector`, for
# "machine" trap vector.
la t2, m_trap_vector
csrw mtvec, t2
# Setting `stvec` (supervisor trap vector) register:
# Essentially this is a function pointer, but the last two bits can be 00 or 01
# 00 : All exceptions set pc to BASE
# 01 : Asynchronous interrupts set pc to BASE + 4 x scause
# la t3, s_trap_vector
# csrw stvec, t3
# Jump to kmain. We put the MPP = 01 for supervisor mode, so after
# mret, we will jump to kmain in supervisor mode.
la t1, kmain
csrw mepc, t1
# Setting `sie` (supervisor interrupt enable) register:
# This register takes the same bits as mideleg
# 1 << 1 : Supervisor software interrupt enable (SSIE=1 [Enabled])
# 1 << 5 : Supervisor timer interrupt enable (STIE=1 [Enabled])
# 1 << 9 : Supervisor external interrupt enable (SEIE=1 [Enabled])
# 0xaaa = MEIP/SEIP and MTIP/STIP and MSIP/SSIP
li t2, 0x888
csrw mie, t2
mret
3:
# Parked harts go here. We need to set these
# to only awaken if it receives a software interrupt,
# which we're going to call the SIPI (Software Intra-Processor Interrupt).
# We call the SIPI by writing the software interrupt into the Core Local Interruptor (CLINT)
# Which is calculated by: base_address + hart * 4
# where base address is 0x0200_0000 (MMIO CLINT base address)
# We only use additional harts to run user-space programs, although this may
# change.
# We divide up the stack so the harts aren't clobbering one another.
la sp, _stack_end
li t0, 0x10000
csrr a0, mhartid
mul t0, t0, a0
sub sp, sp, t0
# The parked harts will be put into machine mode with interrupts enabled.
li t0, 0b11 << 11 | (1 << 7)
csrw mstatus, t0
# Allow for MSIP (Software interrupt). We will write the MSIP from hart #0 to
# awaken these parked harts.
li t3, (1 << 3)
csrw mie, t3
# Machine's exception program counter (MEPC) is set to the Rust initialization
# code and waiting loop.
la t1, kinit_hart
csrw mepc, t1
# Machine's trap vector base address is set to `m_trap_vector`, for
# "machine" trap vector. The Rust initialization routines will give each
# hart its own trap frame. We can use the same trap function and distinguish
# between each hart by looking at the trap frame.
la t2, m_trap_vector
csrw mtvec, t2
# Whenever our hart is done initializing, we want it to return to the waiting
# loop, which is just below mret.
la ra, 4f
# We use mret here so that the mstatus register is properly updated.
mret
4:
# wfi = wait for interrupt. This is a hint to the harts to shut everything needed
# down. However, the RISC-V specification allows for wfi to do nothing. Anyway,
# with QEMU, this will save some CPU!
wfi
j 4b

41
risc_v/ch5/src/asm/mem.S Normal file
View File

@ -0,0 +1,41 @@
// mem.S
// Importation of linker symbols
.section .rodata
.global HEAP_START
HEAP_START: .dword _heap_start
.global HEAP_SIZE
HEAP_SIZE: .dword _heap_size
.global TEXT_START
TEXT_START: .dword _text_start
.global TEXT_END
TEXT_END: .dword _text_end
.global DATA_START
DATA_START: .dword _data_start
.global DATA_END
DATA_END: .dword _data_end
.global RODATA_START
RODATA_START: .dword _rodata_start
.global RODATA_END
RODATA_END: .dword _rodata_end
.global BSS_START
BSS_START: .dword _bss_start
.global BSS_END
BSS_END: .dword _bss_end
.global KERNEL_STACK_START
KERNEL_STACK_START: .dword _stack_start
.global KERNEL_STACK_END
KERNEL_STACK_END: .dword _stack_end

101
risc_v/ch5/src/asm/trap.S Normal file
View File

@ -0,0 +1,101 @@
# trap.S
# Trap handler and global context
# Steve Operating System
# Stephen Marz
# 24 February 2019
.option norvc
.altmacro
.set NUM_GP_REGS, 32 # Number of registers per context
.set NUM_FP_REGS, 32
.set REG_SIZE, 8 # Register size (in bytes)
.set MAX_CPUS, 8 # Maximum number of CPUs
# Use macros for saving and restoring multiple registers
.macro save_gp i, basereg=t6
sd x\i, ((\i)*REG_SIZE)(\basereg)
.endm
.macro load_gp i, basereg=t6
ld x\i, ((\i)*REG_SIZE)(\basereg)
.endm
.macro save_fp i, basereg=t6
fsd f\i, ((NUM_GP_REGS+(\i))*REG_SIZE)(\basereg)
.endm
.macro load_fp i, basereg=t6
fld f\i, ((NUM_GP_REGS+(\i))*REG_SIZE)(\basereg)
.endm
.section .text
.global m_trap_vector
# This must be aligned by 4 since the last two bits
# of the mtvec register do not contribute to the address
# of this vector.
.align 4
m_trap_vector:
# All registers are volatile here, we need to save them
# before we do anything.
csrrw t6, mscratch, t6
# csrrw will atomically swap t6 into mscratch and the old
# value of mscratch into t6. This is nice because we just
# switched values and didn't destroy anything -- all atomically!
# in cpu.rs we have a structure of:
# 32 gp regs 0
# 32 fp regs 256
# SATP register 512
# Trap stack 520
# CPU HARTID 528
# We use t6 as the temporary register because it is the very
# bottom register (x31)
.set i, 1
.rept 30
save_gp %i
.set i, i+1
.endr
# Save the actual t6 register, which we swapped into
# mscratch
mv t5, t6
csrr t6, mscratch
save_gp 31, t5
# Restore the kernel trap frame into mscratch
csrw mscratch, t5
# Get ready to go into Rust (trap.rs)
# We don't want to write into the user's stack or whomever
# messed with us here.
csrr a0, mepc
csrr a1, mtval
csrr a2, mcause
csrr a3, mhartid
csrr a4, mstatus
mv a5, t5
ld sp, 520(a5)
call m_trap
# When we get here, we've returned from m_trap, restore registers
# and return.
# m_trap will return the return address via a0.
csrw mepc, a0
# Now load the trap frame back into t6
csrr t6, mscratch
# Restore all GP registers
.set i, 1
.rept 31
load_gp %i
.set i, i+1
.endr
# Since we ran this loop 31 times starting with i = 1,
# the last one loaded t6 back to its original value.
mret
.global make_syscall
make_syscall:
ecall
ret

163
risc_v/ch5/src/cpu.rs Executable file
View File

@ -0,0 +1,163 @@
// cpu.rs
// CPU and CPU-related routines
// Also contains the kernel's trap frame
// Stephen Marz
// 14 October 2019
use core::ptr::null_mut;
#[repr(usize)]
pub enum SatpMode {
Off = 0,
Sv39 = 8,
Sv48 = 9,
}
#[repr(C)]
#[derive(Clone, Copy)]
pub struct TrapFrame {
pub regs: [usize; 32], // 0 - 255
pub fregs: [usize; 32], // 256 - 511
pub satp: usize, // 512 - 519
pub trap_stack: *mut u8, // 520
pub hartid: usize, // 528
}
impl TrapFrame {
pub const fn zero() -> Self {
TrapFrame { regs: [0; 32],
fregs: [0; 32],
satp: 0,
trap_stack: null_mut(),
hartid: 0, }
}
}
pub static mut KERNEL_TRAP_FRAME: [TrapFrame; 8] =
[TrapFrame::zero(); 8];
pub const fn build_satp(mode: SatpMode, asid: usize, addr: usize) -> usize {
(mode as usize) << 60
| (asid & 0xffff) << 44
| (addr >> 12) & 0xff_ffff_ffff
}
pub fn mhartid_read() -> usize {
unsafe {
let rval;
asm!("csrr $0, mhartid" :"=r"(rval));
rval
}
}
pub fn mstatus_write(val: usize) {
unsafe {
asm!("csrw mstatus, $0" ::"r"(val));
}
}
pub fn mstatus_read() -> usize {
unsafe {
let rval;
asm!("csrr $0, mstatus":"=r"(rval));
rval
}
}
pub fn stvec_write(val: usize) {
unsafe {
asm!("csrw stvec, $0" ::"r"(val));
}
}
pub fn stvec_read() -> usize {
unsafe {
let rval;
asm!("csrr $0, stvec" :"=r"(rval));
rval
}
}
pub fn mscratch_write(val: usize) {
unsafe {
asm!("csrw mscratch, $0" ::"r"(val));
}
}
pub fn mscratch_read() -> usize {
unsafe {
let rval;
asm!("csrr $0, mscratch" : "=r"(rval));
rval
}
}
pub fn mscratch_swap(to: usize) -> usize {
unsafe {
let from;
asm!("csrrw $0, mscratch, $1" : "=r"(from) : "r"(to));
from
}
}
pub fn sscratch_write(val: usize) {
unsafe {
asm!("csrw sscratch, $0" ::"r"(val));
}
}
pub fn sscratch_read() -> usize {
unsafe {
let rval;
asm!("csrr $0, sscratch" : "=r"(rval));
rval
}
}
pub fn sscratch_swap(to: usize) -> usize {
unsafe {
let from;
asm!("csrrw $0, sscratch, $1" : "=r"(from) : "r"(to));
from
}
}
pub fn sepc_write(val: usize) {
unsafe {
asm!("csrw sepc, $0" :: "r"(val));
}
}
pub fn sepc_read() -> usize {
unsafe {
let rval;
asm!("csrr $0, sepc" :"=r"(rval));
rval
}
}
pub fn satp_write(val: usize) {
unsafe {
asm!("csrw satp, $0" :: "r"(val));
}
}
pub fn satp_read() -> usize {
unsafe {
let rval;
asm!("csrr $0, satp" :"=r"(rval));
rval
}
}
pub fn satp_fence(vaddr: usize, asid: usize) {
unsafe {
asm!("sfence.vma $0, $1" :: "r"(vaddr), "r"(asid));
}
}
pub fn satp_fence_asid(asid: usize) {
unsafe {
asm!("sfence.vma zero, $0" :: "r"(asid));
}
}

273
risc_v/ch5/src/kmem.rs Normal file
View File

@ -0,0 +1,273 @@
// kmem.rs
// Sub-page level: malloc-like allocation system
// Stephen Marz
// 7 October 2019
use crate::page::{align_val, zalloc, Table, PAGE_SIZE};
use core::{mem::size_of, ptr::null_mut};
#[repr(usize)]
enum AllocListFlags {
Taken = 1 << 63,
}
impl AllocListFlags {
pub fn val(self) -> usize {
self as usize
}
}
struct AllocList {
pub flags_size: usize,
}
impl AllocList {
pub fn is_taken(&self) -> bool {
self.flags_size & AllocListFlags::Taken.val() != 0
}
pub fn is_free(&self) -> bool {
!self.is_taken()
}
pub fn set_taken(&mut self) {
self.flags_size |= AllocListFlags::Taken.val();
}
pub fn set_free(&mut self) {
self.flags_size &= !AllocListFlags::Taken.val();
}
pub fn set_size(&mut self, sz: usize) {
let k = self.is_taken();
self.flags_size = sz & !AllocListFlags::Taken.val();
if k {
self.flags_size |= AllocListFlags::Taken.val();
}
}
pub fn get_size(&self) -> usize {
self.flags_size & !AllocListFlags::Taken.val()
}
}
// This is the head of the allocation. We start here when
// we search for a free memory location.
static mut KMEM_HEAD: *mut AllocList = null_mut();
// In the future, we will have on-demand pages
// so, we need to keep track of our memory footprint to
// see if we actually need to allocate more.
static mut KMEM_ALLOC: usize = 0;
static mut KMEM_PAGE_TABLE: *mut Table = null_mut();
// These functions are safe helpers around an unsafe
// operation.
pub fn get_head() -> *mut u8 {
unsafe { KMEM_HEAD as *mut u8 }
}
pub fn get_page_table() -> *mut Table {
unsafe { KMEM_PAGE_TABLE as *mut Table }
}
pub fn get_num_allocations() -> usize {
unsafe { KMEM_ALLOC }
}
/// Initialize kernel's memory
/// This is not to be used to allocate memory
/// for user processes. If that's the case, use
/// alloc/dealloc from the page crate.
pub fn init() {
unsafe {
// Allocate kernel pages (KMEM_ALLOC)
KMEM_ALLOC = 512;
let k_alloc = zalloc(KMEM_ALLOC);
assert!(!k_alloc.is_null());
KMEM_HEAD = k_alloc as *mut AllocList;
(*KMEM_HEAD).set_free();
(*KMEM_HEAD).set_size(KMEM_ALLOC * PAGE_SIZE);
KMEM_PAGE_TABLE = zalloc(1) as *mut Table;
}
}
/// Allocate sub-page level allocation based on bytes and zero the memory
pub fn kzmalloc(sz: usize) -> *mut u8 {
let size = align_val(sz, 3);
let ret = kmalloc(size);
if !ret.is_null() {
for i in 0..size {
unsafe {
(*ret.add(i)) = 0;
}
}
}
ret
}
/// Allocate sub-page level allocation based on bytes
pub fn kmalloc(sz: usize) -> *mut u8 {
unsafe {
let size = align_val(sz, 3) + size_of::<AllocList>();
let mut head = KMEM_HEAD;
// .add() uses pointer arithmetic, so we type-cast into a u8
// so that we multiply by an absolute size (KMEM_ALLOC *
// PAGE_SIZE).
let tail = (KMEM_HEAD as *mut u8).add(KMEM_ALLOC * PAGE_SIZE)
as *mut AllocList;
while head < tail {
if (*head).is_free() && size <= (*head).get_size() {
let chunk_size = (*head).get_size();
let rem = chunk_size - size;
(*head).set_taken();
if rem > size_of::<AllocList>() {
let next = (head as *mut u8).add(size)
as *mut AllocList;
// There is space remaining here.
(*next).set_free();
(*next).set_size(rem);
(*head).set_size(size);
}
else {
// If we get here, take the entire chunk
(*head).set_size(chunk_size);
}
return head.add(1) as *mut u8;
}
else {
// If we get here, what we saw wasn't a free
// chunk, move on to the next.
head = (head as *mut u8).add((*head).get_size())
as *mut AllocList;
}
}
}
// If we get here, we didn't find any free chunks--i.e. there isn't
// enough memory for this. TODO: Add on-demand page allocation.
null_mut()
}
/// Free a sub-page level allocation
pub fn kfree(ptr: *mut u8) {
unsafe {
if !ptr.is_null() {
let p = (ptr as *mut AllocList).offset(-1);
if (*p).is_taken() {
(*p).set_free();
}
// After we free, see if we can combine adjacent free
// spots to see if we can reduce fragmentation.
coalesce();
}
}
}
/// Merge smaller chunks into a bigger chunk
pub fn coalesce() {
unsafe {
let mut head = KMEM_HEAD;
let tail = (KMEM_HEAD as *mut u8).add(KMEM_ALLOC * PAGE_SIZE)
as *mut AllocList;
while head < tail {
let next = (head as *mut u8).add((*head).get_size())
as *mut AllocList;
if (*head).get_size() == 0 {
// If this happens, then we have a bad heap
// (double free or something). However, that
// will cause an infinite loop since the next
// pointer will never move beyond the current
// location.
break;
}
else if next >= tail {
// We calculated the next by using the size
// given as get_size(), however this could push
// us past the tail. In that case, the size is
// wrong, hence we break and stop doing what we
// need to do.
break;
}
else if (*head).is_free() && (*next).is_free() {
// This means we have adjacent blocks needing to
// be freed. So, we combine them into one
// allocation.
(*head).set_size(
(*head).get_size()
+ (*next).get_size(),
);
}
// If we get here, we might've moved. Recalculate new
// head.
head = (head as *mut u8).add((*head).get_size())
as *mut AllocList;
}
}
}
/// For debugging purposes, print the kmem table
pub fn print_table() {
unsafe {
let mut head = KMEM_HEAD;
let tail = (KMEM_HEAD as *mut u8).add(KMEM_ALLOC * PAGE_SIZE)
as *mut AllocList;
while head < tail {
println!(
"{:p}: Length = {:<10} Taken = {}",
head,
(*head).get_size(),
(*head).is_taken()
);
head = (head as *mut u8).add((*head).get_size())
as *mut AllocList;
}
}
}
// ///////////////////////////////////
// / GLOBAL ALLOCATOR
// ///////////////////////////////////
// The global allocator allows us to use the data structures
// in the core library, such as a linked list or B-tree.
// We want to use these sparingly since we have a coarse-grained
// allocator.
use core::alloc::{GlobalAlloc, Layout};
// The global allocator is a static constant to a global allocator
// structure. We don't need any members because we're using this
// structure just to implement alloc and dealloc.
struct OsGlobalAlloc;
unsafe impl GlobalAlloc for OsGlobalAlloc {
unsafe fn alloc(&self, layout: Layout) -> *mut u8 {
// We align to the next page size so that when
// we divide by PAGE_SIZE, we get exactly the number
// of pages necessary.
kzmalloc(layout.size())
}
unsafe fn dealloc(&self, ptr: *mut u8, _layout: Layout) {
// We ignore layout since our allocator uses ptr_start -> last
// to determine the span of an allocation.
kfree(ptr);
}
}
#[global_allocator]
/// Technically, we don't need the {} at the end, but it
/// reveals that we're creating a new structure and not just
/// copying a value.
static GA: OsGlobalAlloc = OsGlobalAlloc {};
#[alloc_error_handler]
/// If for some reason alloc() in the global allocator gets null_mut(),
/// then we come here. This is a divergent function, so we call panic to
/// let the tester know what's going on.
pub fn alloc_error(l: Layout) -> ! {
panic!(
"Allocator failed to allocate {} bytes with {}-byte alignment.",
l.size(),
l.align()
);
}

246
risc_v/ch5/src/lds/virt.lds Normal file
View File

@ -0,0 +1,246 @@
/*
virt.lds
Linker script for outputting to RISC-V QEMU "virt" machine.
Stephen Marz
6 October 2019
*/
/*
riscv is the name of the architecture that the linker understands
for any RISC-V target (64-bit or 32-bit).
We will further refine this by using -mabi=lp64 and -march=rv64gc
*/
OUTPUT_ARCH( "riscv" )
/*
We're setting our entry point to a symbol
called _start which is inside of boot.S. This
essentially stores the address of _start as the
"entry point", or where CPU instructions should start
executing.
In the rest of this script, we are going to place _start
right at the beginning of 0x8000_0000 because this is where
the virtual machine and many RISC-V boards will start executing.
*/
ENTRY( _start )
/*
The MEMORY section will explain that we have "ram" that contains
a section that is 'w' (writeable), 'x' (executable), and 'a' (allocatable).
We use '!' to invert 'r' (read-only) and 'i' (initialized). We don't want
our memory to be read-only, and we're stating that it is NOT initialized
at the beginning.
The ORIGIN is the memory address 0x8000_0000. If we look at the virt
spec or the specification for the RISC-V HiFive Unleashed, this is the
starting memory address for our code.
Side note: There might be other boot ROMs at different addresses, but
their job is to get to this point.
Finally LENGTH = 128M tells the linker that we have 128 megabyte of RAM.
The linker will double check this to make sure everything can fit.
The HiFive Unleashed has a lot more RAM than this, but for the virtual
machine, I went with 128M since I think that's enough RAM for now.
We can provide other pieces of memory, such as QSPI, or ROM, but we're
telling the linker script here that we have one pool of RAM.
*/
MEMORY
{
ram (wxa!ri) : ORIGIN = 0x80000000, LENGTH = 128M
}
/*
PHDRS is short for "program headers", which we specify three here:
text - CPU instructions (executable sections)
data - Global, initialized variables
bss - Global, uninitialized variables (all will be set to 0 by boot.S)
The command PT_LOAD tells the linker that these sections will be loaded
from the file into memory.
We can actually stuff all of these into a single program header, but by
splitting it up into three, we can actually use the other PT_* commands
such as PT_DYNAMIC, PT_INTERP, PT_NULL to tell the linker where to find
additional information.
However, for our purposes, every section will be loaded from the program
headers.
*/
PHDRS
{
text PT_LOAD;
data PT_LOAD;
bss PT_LOAD;
}
/*
We are now going to organize the memory based on which
section it is in. In assembly, we can change the section
with the ".section" directive. However, in C++ and Rust,
CPU instructions go into text, global constants go into
rodata, global initialized variables go into data, and
global uninitialized variables go into bss.
*/
SECTIONS
{
/*
The first part of our RAM layout will be the text section.
Since our CPU instructions are here, and our memory starts at
0x8000_0000, we need our entry point to line up here.
*/
.text : {
/*
PROVIDE allows me to access a symbol called _text_start so
I know where the text section starts in the operating system.
This should not move, but it is here for convenience.
The period '.' tells the linker to set _text_start to the
CURRENT location ('.' = current memory location). This current
memory location moves as we add things.
*/
PROVIDE(_text_start = .);
/*
We are going to layout all text sections here, starting with
.text.init. The asterisk in front of the parentheses means to match
the .text.init section of ANY object file. Otherwise, we can specify
which object file should contain the .text.init section, for example,
boot.o(.text.init) would specifically put the .text.init section of
our bootloader here.
Because we might want to change the name of our files, we'll leave it
with a *.
Inside the parentheses is the name of the section. I created my own
called .text.init to make 100% sure that the _start is put right at the
beginning. The linker will lay this out in the order it receives it:
.text.init first
all .text sections next
any .text.* sections last
.text.* means to match anything after .text. If we didn't already specify
.text.init, this would've matched here. The assembler and linker can place
things in "special" text sections, so we match any we might come across here.
*/
*(.text.init) *(.text .text.*)
/*
Again, with PROVIDE, we're providing a readable symbol called _text_end, which is
set to the memory address AFTER .text.init, .text, and .text.*'s have been added.
*/
PROVIDE(_text_end = .);
/*
The portion after the right brace is in an odd format. However, this is telling the
linker what memory portion to put it in. We labeled our RAM, ram, with the constraints
that it is writeable, allocatable, and executable. The linker will make sure with this
that we can do all of those things.
>ram - This just tells the linker script to put this entire section (.text) into the
ram region of memory. To my knowledge, the '>' does not mean "greater than". Instead,
it is a symbol to let the linker know we want to put this in ram.
AT>ram - This sets the LMA (load memory address) region to the same thing. LMA is the final
translation of a VMA (virtual memory address). With this linker script, we're loading
everything into its physical location. We'll let the kernel copy and sort out the
virtual memory. That's why >ram and AT>ram are continually the same thing.
:text - This tells the linker script to put this into the :text program header. We've only
defined three: text, data, and bss. In this case, we're telling the linker script
to go into the text section.
*/
} >ram AT>ram :text
/*
The global pointer allows the linker to position global variables and constants into
independent positions relative to the gp (global pointer) register. The globals start
after the text sections and are only relevant to the rodata, data, and bss sections.
*/
PROVIDE(_global_pointer = .);
/*
Most compilers create a rodata (read only data) section for global constants. However,
we're going to place ours in the text section. We can actually put this in :data, but
since the .text section is read-only, we can place it there.
NOTE: This doesn't actually do anything, yet. The actual "protection" cannot be done
at link time. Instead, when we program the memory management unit (MMU), we will be
able to choose which bits (R=read, W=write, X=execute) we want each memory segment
to be able to do.
*/
.rodata : {
PROVIDE(_rodata_start = .);
*(.rodata .rodata.*)
PROVIDE(_rodata_end = .);
/*
Again, we're placing the rodata section in the memory segment "ram" and we're putting
it in the :text program header. We don't have one for rodata anyway.
*/
} >ram AT>ram :text
.data : {
/*
. = ALIGN(4096) tells the linker to align the current memory location (which is
0x8000_0000 + text section + rodata section) to 4096 bytes. This is because our paging
system's resolution is 4,096 bytes or 4 KiB.
*/
. = ALIGN(4096);
PROVIDE(_data_start = .);
/*
sdata and data are essentially the same thing. However, compilers usually use the
sdata sections for shorter, quicker loading sections. So, usually critical data
is loaded there. However, we're loading all of this in one fell swoop.
So, we're looking to put all of the following sections under the umbrella .data:
.sdata
.sdata.[anything]
.data
.data.[anything]
...in that order.
*/
*(.sdata .sdata.*) *(.data .data.*)
PROVIDE(_data_end = .);
} >ram AT>ram :data
.bss : {
PROVIDE(_bss_start = .);
*(.sbss .sbss.*) *(.bss .bss.*)
PROVIDE(_bss_end = .);
} >ram AT>ram :bss
/*
The following will be helpful when we allocate the kernel stack (_stack) and
determine where the heap begnis and ends (_heap_start and _heap_start + _heap_size)/
When we do memory allocation, we can use these symbols.
We use the symbols instead of hard-coding an address because this is a floating target.
As we add code, the heap moves farther down the memory and gets shorter.
_memory_start will be set to 0x8000_0000 here. We use ORIGIN(ram) so that it will take
whatever we set the origin of ram to. Otherwise, we'd have to change it more than once
if we ever stray away from 0x8000_0000 as our entry point.
*/
PROVIDE(_memory_start = ORIGIN(ram));
/*
Our kernel stack starts at the end of the bss segment (_bss_end). However, we're allocating
0x80000 bytes (524 KiB) to our kernel stack. This should be PLENTY of space. The reason
we add the memory is because the stack grows from higher memory to lower memory (bottom to top).
Therefore we set the stack at the very bottom of its allocated slot.
When we go to allocate from the stack, we'll subtract the number of bytes we need.
*/
PROVIDE(_stack_start = _bss_end);
PROVIDE(_stack_end = _stack_start + 0x8000);
PROVIDE(_memory_end = ORIGIN(ram) + LENGTH(ram));
/*
Finally, our heap starts right after the kernel stack. This heap will be used mainly
to dole out memory for user-space applications. However, in some circumstances, it will
be used for kernel memory as well.
We don't align here because we let the kernel determine how it wants to do this.
*/
PROVIDE(_heap_start = _stack_end);
PROVIDE(_heap_size = _memory_end - _heap_start);
}

400
risc_v/ch5/src/lib.rs Executable file
View File

@ -0,0 +1,400 @@
// Steve Operating System
// Stephen Marz
// 21 Sep 2019
#![no_std]
#![feature(panic_info_message,
asm,
allocator_api,
alloc_error_handler,
alloc_prelude,
const_raw_ptr_to_usize_cast)]
#[macro_use]
extern crate alloc;
// This is experimental and requires alloc_prelude as a feature
use alloc::prelude::v1::*;
// ///////////////////////////////////
// / RUST MACROS
// ///////////////////////////////////
#[macro_export]
macro_rules! print
{
($($args:tt)+) => ({
use core::fmt::Write;
let _ = write!(crate::uart::Uart::new(0x1000_0000), $($args)+);
});
}
#[macro_export]
macro_rules! println
{
() => ({
print!("\r\n")
});
($fmt:expr) => ({
print!(concat!($fmt, "\r\n"))
});
($fmt:expr, $($args:tt)+) => ({
print!(concat!($fmt, "\r\n"), $($args)+)
});
}
// ///////////////////////////////////
// / LANGUAGE STRUCTURES / FUNCTIONS
// ///////////////////////////////////
#[no_mangle]
extern "C" fn eh_personality() {}
#[panic_handler]
fn panic(info: &core::panic::PanicInfo) -> ! {
print!("Aborting: ");
if let Some(p) = info.location() {
println!(
"line {}, file {}: {}",
p.line(),
p.file(),
info.message().unwrap()
);
}
else {
println!("no information available.");
}
abort();
}
#[no_mangle]
extern "C" fn abort() -> ! {
loop {
unsafe {
asm!("wfi"::::"volatile");
}
}
}
// ///////////////////////////////////
// / CONSTANTS
// ///////////////////////////////////
// const STR_Y: &str = "\x1b[38;2;79;221;13m✓\x1b[m";
// const STR_N: &str = "\x1b[38;2;221;41;13m✘\x1b[m";
// The following symbols come from asm/mem.S. We can use
// the symbols directly, but the address of the symbols
// themselves are their values, which can cause issues.
// Instead, I created doubleword values in mem.S in the .rodata and .data
// sections.
extern "C" {
static TEXT_START: usize;
static TEXT_END: usize;
static DATA_START: usize;
static DATA_END: usize;
static RODATA_START: usize;
static RODATA_END: usize;
static BSS_START: usize;
static BSS_END: usize;
static KERNEL_STACK_START: usize;
static KERNEL_STACK_END: usize;
static HEAP_START: usize;
static HEAP_SIZE: usize;
}
/// Identity map range
/// Takes a contiguous allocation of memory and maps it using PAGE_SIZE
/// This assumes that start <= end
pub fn id_map_range(root: &mut page::Table,
start: usize,
end: usize,
bits: i64)
{
let mut memaddr = start & !(page::PAGE_SIZE - 1);
let num_kb_pages =
(page::align_val(end, 12) - memaddr) / page::PAGE_SIZE;
// I named this num_kb_pages for future expansion when
// I decide to allow for GiB (2^30) and 2MiB (2^21) page
// sizes. However, the overlapping memory regions are causing
// nightmares.
for _ in 0..num_kb_pages {
page::map(root, memaddr, memaddr, bits, 0);
memaddr += 1 << 12;
}
}
// ///////////////////////////////////
// / ENTRY POINT
// ///////////////////////////////////
#[no_mangle]
extern "C" fn kinit() {
// We created kinit, which runs in super-duper mode
// 3 called "machine mode".
// The job of kinit() is to get us into supervisor mode
// as soon as possible.
// Interrupts are disabled for the duration of kinit()
uart::Uart::new(0x1000_0000).init();
page::init();
kmem::init();
// Map heap allocations
let root_ptr = kmem::get_page_table();
let root_u = root_ptr as usize;
let mut root = unsafe { root_ptr.as_mut().unwrap() };
let kheap_head = kmem::get_head() as usize;
let total_pages = kmem::get_num_allocations();
println!();
println!();
unsafe {
println!("TEXT: 0x{:x} -> 0x{:x}", TEXT_START, TEXT_END);
println!("RODATA: 0x{:x} -> 0x{:x}", RODATA_START, RODATA_END);
println!("DATA: 0x{:x} -> 0x{:x}", DATA_START, DATA_END);
println!("BSS: 0x{:x} -> 0x{:x}", BSS_START, BSS_END);
println!(
"STACK: 0x{:x} -> 0x{:x}",
KERNEL_STACK_START, KERNEL_STACK_END
);
println!(
"HEAP: 0x{:x} -> 0x{:x}",
kheap_head,
kheap_head + total_pages * page::PAGE_SIZE
);
}
id_map_range(
&mut root,
kheap_head,
kheap_head + total_pages * page::PAGE_SIZE,
page::EntryBits::ReadWrite.val(),
);
// Using statics is inherently unsafe.
unsafe {
// Map heap descriptors
let num_pages = HEAP_SIZE / page::PAGE_SIZE;
id_map_range(
&mut root,
HEAP_START,
HEAP_START + num_pages,
page::EntryBits::ReadWrite.val(),
);
// Map executable section
id_map_range(
&mut root,
TEXT_START,
TEXT_END,
page::EntryBits::ReadExecute.val(),
);
// Map rodata section
// We put the ROdata section into the text section, so they can
// potentially overlap however, we only care that it's read
// only.
id_map_range(
&mut root,
RODATA_START,
RODATA_END,
page::EntryBits::ReadExecute.val(),
);
// Map data section
id_map_range(
&mut root,
DATA_START,
DATA_END,
page::EntryBits::ReadWrite.val(),
);
// Map bss section
id_map_range(
&mut root,
BSS_START,
BSS_END,
page::EntryBits::ReadWrite.val(),
);
// Map kernel stack
id_map_range(
&mut root,
KERNEL_STACK_START,
KERNEL_STACK_END,
page::EntryBits::ReadWrite.val(),
);
}
// UART
id_map_range(
&mut root,
0x1000_0000,
0x1000_0100,
page::EntryBits::ReadWrite.val(),
);
// CLINT
// -> MSIP
id_map_range(
&mut root,
0x0200_0000,
0x0200_ffff,
page::EntryBits::ReadWrite.val(),
);
// PLIC
id_map_range(
&mut root,
0x0c00_0000,
0x0c00_2000,
page::EntryBits::ReadWrite.val(),
);
id_map_range(
&mut root,
0x0c20_0000,
0x0c20_8000,
page::EntryBits::ReadWrite.val(),
);
// When we return from here, we'll go back to boot.S and switch into
// supervisor mode We will return the SATP register to be written when
// we return. root_u is the root page table's address. When stored into
// the SATP register, this is divided by 4 KiB (right shift by 12 bits).
// We enable the MMU by setting mode 8. Bits 63, 62, 61, 60 determine
// the mode.
// 0 = Bare (no translation)
// 8 = Sv39
// 9 = Sv48
// build_satp has these parameters: mode, asid, page table address.
let satp_value = cpu::build_satp(cpu::SatpMode::Sv39, 0, root_u);
unsafe {
// We have to store the kernel's table. The tables will be moved
// back and forth between the kernel's table and user
// applicatons' tables. Note that we're writing the physical address
// of the trap frame.
cpu::mscratch_write(
(&mut cpu::KERNEL_TRAP_FRAME[0]
as *mut cpu::TrapFrame)
as usize,
);
cpu::sscratch_write(cpu::mscratch_read());
cpu::KERNEL_TRAP_FRAME[0].satp = satp_value;
// Move the stack pointer to the very bottom. The stack is
// actually in a non-mapped page. The stack is decrement-before
// push and increment after pop. Therefore, the stack will be
// allocated (decremented) before it is stored.
cpu::KERNEL_TRAP_FRAME[0].trap_stack =
page::zalloc(1).add(page::PAGE_SIZE);
id_map_range(
&mut root,
cpu::KERNEL_TRAP_FRAME[0].trap_stack
.sub(page::PAGE_SIZE,)
as usize,
cpu::KERNEL_TRAP_FRAME[0].trap_stack as usize,
page::EntryBits::ReadWrite.val(),
);
// The trap frame itself is stored in the mscratch register.
id_map_range(
&mut root,
cpu::mscratch_read(),
cpu::mscratch_read()
+ core::mem::size_of::<cpu::TrapFrame,>(),
page::EntryBits::ReadWrite.val(),
);
page::print_page_allocations();
let p = cpu::KERNEL_TRAP_FRAME[0].trap_stack as usize - 1;
let m = page::virt_to_phys(&root, p).unwrap_or(0);
println!("Walk 0x{:x} = 0x{:x}", p, m);
}
// The following shows how we're going to walk to translate a virtual
// address into a physical address. We will use this whenever a user
// space application requires services. Since the user space application
// only knows virtual addresses, we have to translate silently behind
// the scenes.
println!("Setting 0x{:x}", satp_value);
println!("Scratch reg = 0x{:x}", cpu::mscratch_read());
cpu::satp_write(satp_value);
cpu::satp_fence_asid(0);
}
#[no_mangle]
extern "C" fn kinit_hart(hartid: usize) {
// All non-0 harts initialize here.
unsafe {
// We have to store the kernel's table. The tables will be moved
// back and forth between the kernel's table and user
// applicatons' tables.
cpu::mscratch_write(
(&mut cpu::KERNEL_TRAP_FRAME[hartid]
as *mut cpu::TrapFrame)
as usize,
);
// Copy the same mscratch over to the supervisor version of the
// same register.
cpu::sscratch_write(cpu::mscratch_read());
cpu::KERNEL_TRAP_FRAME[hartid].hartid = hartid;
// We can't do the following until zalloc() is locked, but we
// don't have locks, yet :( cpu::KERNEL_TRAP_FRAME[hartid].satp
// = cpu::KERNEL_TRAP_FRAME[0].satp;
// cpu::KERNEL_TRAP_FRAME[hartid].trap_stack = page::zalloc(1);
}
}
#[no_mangle]
extern "C" fn kmain() {
// kmain() starts in supervisor mode. So, we should have the trap
// vector setup and the MMU turned on when we get here.
// We initialized my_uart in machine mode under kinit for debugging
// prints, but this just grabs a pointer to it.
let mut my_uart = uart::Uart::new(0x1000_0000);
// Create a new scope so that we can test the global allocator and
// deallocator
{
// We have the global allocator, so let's see if that works!
let k = Box::<u32>::new(100);
println!("Boxed value = {}", *k);
// The following comes from the Rust documentation:
// some bytes, in a vector
let sparkle_heart = vec![240, 159, 146, 150];
// We know these bytes are valid, so we'll use `unwrap()`.
// This will MOVE the vector.
let sparkle_heart = String::from_utf8(sparkle_heart).unwrap();
println!("String = {}", sparkle_heart);
println!("\n\nAllocations of a box, vector, and string");
kmem::print_table();
}
println!("\n\nEverything should now be free:");
kmem::print_table();
unsafe {
// Set the next machine timer to fire.
let mtimecmp = 0x0200_4000 as *mut u64;
let mtime = 0x0200_bff8 as *const u64;
// The frequency given by QEMU is 10_000_000 Hz, so this sets
// the next interrupt to fire one second from now.
mtimecmp.write_volatile(mtime.read_volatile() + 10_000_000);
// Let's cause a page fault and see what happens. This should trap
// to m_trap under trap.rs
let v = 0x0 as *mut u64;
v.write_volatile(0);
}
// If we get here, the Box, vec, and String should all be freed since
// they go out of scope. This calls their "Drop" trait.
// Now see if we can read stuff:
// Usually we can use #[test] modules in Rust, but it would convolute
// the task at hand, and it requires us to create the testing harness
// since the embedded testing system is part of the "std" library.
loop {
if let Some(c) = my_uart.get() {
match c {
8 => {
// This is a backspace, so we
// essentially have to write a space and
// backup again:
print!("{} {}", 8 as char, 8 as char);
},
10 | 13 => {
// Newline or carriage-return
println!();
},
_ => {
print!("{}", c as char);
},
}
}
}
}
// ///////////////////////////////////
// / RUST MODULES
// ///////////////////////////////////
pub mod cpu;
pub mod kmem;
pub mod page;
pub mod trap;
pub mod uart;

558
risc_v/ch5/src/page.rs Normal file
View File

@ -0,0 +1,558 @@
// page.rs
// Memory routines
// Stephen Marz
// 6 October 2019
use core::{mem::size_of, ptr::null_mut};
// ////////////////////////////////
// // Allocation routines
// ////////////////////////////////
extern "C" {
static HEAP_START: usize;
static HEAP_SIZE: usize;
}
// We will use ALLOC_START to mark the start of the actual
// memory we can dish out.
static mut ALLOC_START: usize = 0;
const PAGE_ORDER: usize = 12;
pub const PAGE_SIZE: usize = 1 << 12;
/// Align (set to a multiple of some power of two)
/// This takes an order which is the exponent to 2^order
/// Therefore, all alignments must be made as a power of two.
/// This function always rounds up.
pub const fn align_val(val: usize, order: usize) -> usize {
let o = (1usize << order) - 1;
(val + o) & !o
}
#[repr(u8)]
pub enum PageBits {
Empty = 0,
Taken = 1 << 0,
Last = 1 << 1,
}
impl PageBits {
// We convert PageBits to a u8 a lot, so this is
// for convenience.
pub fn val(self) -> u8 {
self as u8
}
}
// Each page is described by the Page structure. Linux does this
// as well, where each 4096-byte chunk of memory has a structure
// associated with it. However, there structure is much larger.
pub struct Page {
flags: u8,
}
impl Page {
// If this page has been marked as the final allocation,
// this function returns true. Otherwise, it returns false.
pub fn is_last(&self) -> bool {
if self.flags & PageBits::Last.val() != 0 {
true
}
else {
false
}
}
// If the page is marked as being taken (allocated), then
// this function returns true. Otherwise, it returns false.
pub fn is_taken(&self) -> bool {
if self.flags & PageBits::Taken.val() != 0 {
true
}
else {
false
}
}
// This is the opposite of is_taken().
pub fn is_free(&self) -> bool {
!self.is_taken()
}
// Clear the Page structure and all associated allocations.
pub fn clear(&mut self) {
self.flags = PageBits::Empty.val();
}
// Set a certain flag. We ran into trouble here since PageBits
// is an enumeration and we haven't implemented the BitOr Trait
// on it.
pub fn set_flag(&mut self, flag: PageBits) {
self.flags |= flag.val();
}
pub fn clear_flag(&mut self, flag: PageBits) {
self.flags &= !(flag.val());
}
}
/// Initialize the allocation system. There are several ways that we can
/// implement the page allocator:
/// 1. Free list (singly linked list where it starts at the first free
/// allocation) 2. Bookkeeping list (structure contains a taken and length)
/// 3. Allocate one Page structure per 4096 bytes (this is what I chose)
/// 4. Others
pub fn init() {
unsafe {
// let desc_per_page = PAGE_SIZE / size_of::<Page>();
let num_pages = HEAP_SIZE / PAGE_SIZE;
// let num_desc_pages = num_pages / desc_per_page;
let ptr = HEAP_START as *mut Page;
// Clear all pages to make sure that they aren't accidentally
// taken
for i in 0..num_pages {
(*ptr.add(i)).clear();
}
// Determine where the actual useful memory starts. This will be
// after all Page structures. We also must align the ALLOC_START
// to a page-boundary (PAGE_SIZE = 4096). ALLOC_START =
// (HEAP_START + num_pages * size_of::<Page>() + PAGE_SIZE - 1)
// & !(PAGE_SIZE - 1);
ALLOC_START = align_val(
HEAP_START
+ num_pages * size_of::<Page>(),
PAGE_ORDER,
);
}
}
/// Allocate a page or multiple pages
/// pages: the number of PAGE_SIZE pages to allocate
pub fn alloc(pages: usize) -> *mut u8 {
// We have to find a contiguous allocation of pages
assert!(pages > 0);
unsafe {
// We create a Page structure for each page on the heap. We
// actually might have more since HEAP_SIZE moves and so does
// the size of our structure, but we'll only waste a few bytes.
let num_pages = HEAP_SIZE / PAGE_SIZE;
let ptr = HEAP_START as *mut Page;
for i in 0..num_pages - pages {
let mut found = false;
// Check to see if this Page is free. If so, we have our
// first candidate memory address.
if (*ptr.add(i)).is_free() {
// It was FREE! Yay!
found = true;
for j in i..i + pages {
// Now check to see if we have a
// contiguous allocation for all of the
// request pages. If not, we should
// check somewhere else.
if (*ptr.add(j)).is_taken() {
found = false;
break;
}
}
}
// We've checked to see if there are enough contiguous
// pages to form what we need. If we couldn't, found
// will be false, otherwise it will be true, which means
// we've found valid memory we can allocate.
if found {
for k in i..i + pages - 1 {
(*ptr.add(k)).set_flag(PageBits::Taken);
}
// The marker for the last page is
// PageBits::Last This lets us know when we've
// hit the end of this particular allocation.
(*ptr.add(i+pages-1)).set_flag(PageBits::Taken);
(*ptr.add(i+pages-1)).set_flag(PageBits::Last);
// The Page structures themselves aren't the
// useful memory. Instead, there is 1 Page
// structure per 4096 bytes starting at
// ALLOC_START.
return (ALLOC_START + PAGE_SIZE * i)
as *mut u8;
}
}
}
// If we get here, that means that no contiguous allocation was
// found.
null_mut()
}
/// Allocate and zero a page or multiple pages
/// pages: the number of pages to allocate
/// Each page is PAGE_SIZE which is calculated as 1 << PAGE_ORDER
/// On RISC-V, this typically will be 4,096 bytes.
pub fn zalloc(pages: usize) -> *mut u8 {
// Allocate and zero a page.
// First, let's get the allocation
let ret = alloc(pages);
if !ret.is_null() {
let size = (PAGE_SIZE * pages) / 8;
let big_ptr = ret as *mut u64;
for i in 0..size {
// We use big_ptr so that we can force an
// sd (store doubleword) instruction rather than
// the sb. This means 8x fewer stores than before.
// Typically we have to be concerned about remaining
// bytes, but fortunately 4096 % 8 = 0, so we
// won't have any remaining bytes.
unsafe {
(*big_ptr.add(i)) = 0;
}
}
}
ret
}
/// Deallocate a page by its pointer
/// The way we've structured this, it will automatically coalesce
/// contiguous pages.
pub fn dealloc(ptr: *mut u8) {
// Make sure we don't try to free a null pointer.
assert!(!ptr.is_null());
unsafe {
let addr =
HEAP_START + (ptr as usize - ALLOC_START) / PAGE_SIZE;
// Make sure that the address makes sense. The address we
// calculate here is the page structure, not the HEAP address!
assert!(addr >= HEAP_START && addr < HEAP_START + HEAP_SIZE);
let mut p = addr as *mut Page;
// Keep clearing pages until we hit the last page.
while (*p).is_taken() && !(*p).is_last() {
(*p).clear();
p = p.add(1);
}
// If the following assertion fails, it is most likely
// caused by a double-free.
assert!(
(*p).is_last() == true,
"Possible double-free detected! (Not taken found \
before last)"
);
// If we get here, we've taken care of all previous pages and
// we are on the last page.
(*p).clear();
}
}
/// Print all page allocations
/// This is mainly used for debugging.
pub fn print_page_allocations() {
unsafe {
let num_pages = (HEAP_SIZE - (ALLOC_START - HEAP_START)) / PAGE_SIZE;
let mut beg = HEAP_START as *const Page;
let end = beg.add(num_pages);
let alloc_beg = ALLOC_START;
let alloc_end = ALLOC_START + num_pages * PAGE_SIZE;
println!();
println!(
"PAGE ALLOCATION TABLE\nMETA: {:p} -> {:p}\nPHYS: \
0x{:x} -> 0x{:x}",
beg, end, alloc_beg, alloc_end
);
println!("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~");
let mut num = 0;
while beg < end {
if (*beg).is_taken() {
let start = beg as usize;
let memaddr = ALLOC_START
+ (start - HEAP_START)
* PAGE_SIZE;
print!("0x{:x} => ", memaddr);
loop {
num += 1;
if (*beg).is_last() {
let end = beg as usize;
let memaddr = ALLOC_START
+ (end
- HEAP_START)
* PAGE_SIZE
+ PAGE_SIZE - 1;
print!(
"0x{:x}: {:>3} page(s)",
memaddr,
(end - start + 1)
);
println!(".");
break;
}
beg = beg.add(1);
}
}
beg = beg.add(1);
}
println!("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~");
println!(
"Allocated: {:>6} pages ({:>10} bytes).",
num,
num * PAGE_SIZE
);
println!(
"Free : {:>6} pages ({:>10} bytes).",
num_pages - num,
(num_pages - num) * PAGE_SIZE
);
println!();
}
}
// ////////////////////////////////
// // MMU Routines
// ////////////////////////////////
// Represent (repr) our entry bits as
// unsigned 64-bit integers.
#[repr(i64)]
#[derive(Copy, Clone)]
pub enum EntryBits {
None = 0,
Valid = 1 << 0,
Read = 1 << 1,
Write = 1 << 2,
Execute = 1 << 3,
User = 1 << 4,
Global = 1 << 5,
Access = 1 << 6,
Dirty = 1 << 7,
// Convenience combinations
ReadWrite = 1 << 1 | 1 << 2,
ReadExecute = 1 << 1 | 1 << 3,
ReadWriteExecute = 1 << 1 | 1 << 2 | 1 << 3,
// User Convenience Combinations
UserReadWrite = 1 << 1 | 1 << 2 | 1 << 4,
UserReadExecute = 1 << 1 | 1 << 3 | 1 << 4,
UserReadWriteExecute = 1 << 1 | 1 << 2 | 1 << 3 | 1 << 4,
}
// Helper functions to convert the enumeration
// into an i64, which is what our page table
// entries will be.
impl EntryBits {
pub fn val(self) -> i64 {
self as i64
}
}
// A single entry. We're using an i64 so that
// this will sign-extend rather than zero-extend
// since RISC-V requires that the reserved sections
// take on the most significant bit.
pub struct Entry {
pub entry: i64,
}
// The Entry structure describes one of the 512 entries per table, which is
// described in the RISC-V privileged spec Figure 4.18.
impl Entry {
pub fn is_valid(&self) -> bool {
self.get_entry() & EntryBits::Valid.val() != 0
}
// The first bit (bit index #0) is the V bit for
// valid.
pub fn is_invalid(&self) -> bool {
!self.is_valid()
}
// A leaf has one or more RWX bits set
pub fn is_leaf(&self) -> bool {
self.get_entry() & 0xe != 0
}
pub fn is_branch(&self) -> bool {
!self.is_leaf()
}
pub fn set_entry(&mut self, entry: i64) {
self.entry = entry;
}
pub fn get_entry(&self) -> i64 {
self.entry
}
}
// Table represents a single table, which contains 512 (2^9), 64-bit entries.
pub struct Table {
pub entries: [Entry; 512],
}
impl Table {
pub fn len() -> usize {
512
}
}
/// Map a virtual address to a physical address using 4096-byte page
/// size.
/// root: a mutable reference to the root Table
/// vaddr: The virtual address to map
/// paddr: The physical address to map
/// bits: An OR'd bitset containing the bits the leaf should have.
/// The bits should contain only the following:
/// Read, Write, Execute, User, and/or Global
/// The bits MUST include one or more of the following:
/// Read, Write, Execute
/// The valid bit automatically gets added.
pub fn map(root: &mut Table,
vaddr: usize,
paddr: usize,
bits: i64,
level: usize)
{
// Make sure that Read, Write, or Execute have been provided
// otherwise, we'll leak memory and always create a page fault.
assert!(bits & 0xe != 0);
// Extract out each VPN from the virtual address
// On the virtual address, each VPN is exactly 9 bits,
// which is why we use the mask 0x1ff = 0b1_1111_1111 (9 bits)
let vpn = [
// VPN[0] = vaddr[20:12]
(vaddr >> 12) & 0x1ff,
// VPN[1] = vaddr[29:21]
(vaddr >> 21) & 0x1ff,
// VPN[2] = vaddr[38:30]
(vaddr >> 30) & 0x1ff,
];
// Just like the virtual address, extract the physical address
// numbers (PPN). However, PPN[2] is different in that it stores
// 26 bits instead of 9. Therefore, we use,
// 0x3ff_ffff = 0b11_1111_1111_1111_1111_1111_1111 (26 bits).
let ppn = [
// PPN[0] = paddr[20:12]
(paddr >> 12) & 0x1ff,
// PPN[1] = paddr[29:21]
(paddr >> 21) & 0x1ff,
// PPN[2] = paddr[55:30]
(paddr >> 30) & 0x3ff_ffff,
];
// We will use this as a floating reference so that we can set
// individual entries as we walk the table.
let mut v = &mut root.entries[vpn[2]];
// Now, we're going to traverse the page table and set the bits
// properly. We expect the root to be valid, however we're required to
// create anything beyond the root.
// In Rust, we create a range iterator using the .. operator.
// The .rev() will reverse the iteration since we need to start with
// VPN[2] The .. operator is inclusive on start but exclusive on end.
// So, (0..2) will iterate 0 and 1.
for i in (level..2).rev() {
if !v.is_valid() {
// Allocate a page
let page = zalloc(1);
// The page is already aligned by 4,096, so store it
// directly The page is stored in the entry shifted
// right by 2 places.
v.set_entry(
(page as i64 >> 2)
| EntryBits::Valid.val(),
);
}
let entry = ((v.get_entry() & !0x3ff) << 2) as *mut Entry;
v = unsafe { entry.add(vpn[i]).as_mut().unwrap() };
}
// When we get here, we should be at VPN[0] and v should be pointing to
// our entry.
// The entry structure is Figure 4.18 in the RISC-V Privileged
// Specification
let entry = (ppn[2] << 28) as i64 | // PPN[2] = [53:28]
(ppn[1] << 19) as i64 | // PPN[1] = [27:19]
(ppn[0] << 10) as i64 | // PPN[0] = [18:10]
bits | // Specified bits, such as User, Read, Write, etc
EntryBits::Valid.val() | // Valid bit
EntryBits::Dirty.val() | // Some machines require this to =1
EntryBits::Access.val() // Just like dirty, some machines require this
;
// Set the entry. V should be set to the correct pointer by the loop
// above.
v.set_entry(entry);
}
/// Unmaps and frees all memory associated with a table.
/// root: The root table to start freeing.
/// NOTE: This does NOT free root directly. This must be
/// freed manually.
/// The reason we don't free the root is because it is
/// usually embedded into the Process structure.
pub fn unmap(root: &mut Table) {
// Start with level 2
for lv2 in 0..Table::len() {
let ref entry_lv2 = root.entries[lv2];
if entry_lv2.is_valid() && entry_lv2.is_branch() {
// This is a valid entry, so drill down and free.
let memaddr_lv1 = (entry_lv2.get_entry() & !0x3ff) << 2;
let table_lv1 = unsafe {
// Make table_lv1 a mutable reference instead of
// a pointer.
(memaddr_lv1 as *mut Table).as_mut().unwrap()
};
for lv1 in 0..Table::len() {
let ref entry_lv1 = table_lv1.entries[lv1];
if entry_lv1.is_valid() && entry_lv1.is_branch()
{
let memaddr_lv0 = (entry_lv1.get_entry()
& !0x3ff) << 2;
// The next level is level 0, which
// cannot have branches, therefore,
// we free here.
dealloc(memaddr_lv0 as *mut u8);
}
}
dealloc(memaddr_lv1 as *mut u8);
}
}
}
/// Walk the page table to convert a virtual address to a
/// physical address.
/// If a page fault would occur, this returns None
/// Otherwise, it returns Some with the physical address.
pub fn virt_to_phys(root: &Table, vaddr: usize) -> Option<usize> {
// Walk the page table pointed to by root
let vpn = [
// VPN[0] = vaddr[20:12]
(vaddr >> 12) & 0x1ff,
// VPN[1] = vaddr[29:21]
(vaddr >> 21) & 0x1ff,
// VPN[2] = vaddr[38:30]
(vaddr >> 30) & 0x1ff,
];
let mut v = &root.entries[vpn[2]];
for i in (0..=2).rev() {
if v.is_invalid() {
// This is an invalid entry, page fault.
break;
}
else if v.is_leaf() {
// According to RISC-V, a leaf can be at any level.
// The offset mask masks off the PPN. Each PPN is 9
// bits and they start at bit #12. So, our formula
// 12 + i * 9
let off_mask = (1 << (12 + i * 9)) - 1;
let vaddr_pgoff = vaddr & off_mask;
let addr = ((v.get_entry() << 2) as usize) & !off_mask;
return Some(addr | vaddr_pgoff);
}
// Set v to the next entry which is pointed to by this
// entry. However, the address was shifted right by 2 places
// when stored in the page table entry, so we shift it left
// to get it back into place.
let entry = ((v.get_entry() & !0x3ff) << 2) as *const Entry;
// We do i - 1 here, however we should get None or Some() above
// before we do 0 - 1 = -1.
v = unsafe { entry.add(vpn[i - 1]).as_ref().unwrap() };
}
// If we get here, we've exhausted all valid tables and haven't
// found a leaf.
None
}

7
risc_v/ch5/src/plic.rs Normal file
View File

@ -0,0 +1,7 @@
// plic.rs
// Platform Interrupt Controller
// Stephen Marz
// 1 Nov 2019

100
risc_v/ch5/src/trap.rs Executable file
View File

@ -0,0 +1,100 @@
// trap.rs
// Trap routines
// Stephen Marz
// 10 October 2019
use crate::cpu::TrapFrame;
#[no_mangle]
extern "C" fn m_trap(epc: usize,
tval: usize,
cause: usize,
hart: usize,
status: usize,
frame: &mut TrapFrame)
-> usize
{
// We're going to handle all traps in machine mode. RISC-V lets
// us delegate to supervisor mode, but switching out SATP (virtual memory)
// gets hairy.
let is_async = {
if cause >> 63 & 1 == 1 {
true
}
else {
false
}
};
// The cause contains the type of trap (sync, async) as well as the cause
// number. So, here we narrow down just the cause number.
let cause_num = cause & 0xfff;
let mut return_pc = epc;
if is_async {
// Asynchronous trap
match cause_num {
3 => {
// Machine software
println!("Machine software interrupt CPU#{}", hart);
},
7 => unsafe {
// Machine timer
let mtimecmp = 0x0200_4000 as *mut u64;
let mtime = 0x0200_bff8 as *const u64;
// The frequency given by QEMU is 10_000_000 Hz, so this sets
// the next interrupt to fire one second from now.
mtimecmp.write_volatile(mtime.read_volatile() + 10_000_000);
},
11 => {
// Machine external (interrupt from Platform Interrupt Controller (PLIC))
println!("Machine external interrupt CPU#{}", hart);
},
_ => {
panic!("Unhandled async trap CPU#{} -> {}\n", hart, cause_num);
}
}
}
else {
// Synchronous trap
match cause_num {
2 => {
// Illegal instruction
panic!("Illegal instruction CPU#{} -> 0x{:08x}: 0x{:08x}\n", hart, epc, tval);
},
8 => {
// Environment (system) call from User mode
println!("E-call from User mode! CPU#{} -> 0x{:08x}", hart, epc);
return_pc += 4;
},
9 => {
// Environment (system) call from Supervisor mode
println!("E-call from Supervisor mode! CPU#{} -> 0x{:08x}", hart, epc);
return_pc += 4;
},
11 => {
// Environment (system) call from Machine mode
panic!("E-call from Machine mode! CPU#{} -> 0x{:08x}\n", hart, epc);
},
// Page faults
12 => {
// Instruction page fault
println!("Instruction page fault CPU#{} -> 0x{:08x}: 0x{:08x}", hart, epc, tval);
return_pc += 4;
},
13 => {
// Load page fault
println!("Load page fault CPU#{} -> 0x{:08x}: 0x{:08x}", hart, epc, tval);
return_pc += 4;
},
15 => {
// Store page fault
println!("Store page fault CPU#{} -> 0x{:08x}: 0x{:08x}", hart, epc, tval);
return_pc += 4;
},
_ => {
panic!("Unhandled sync trap CPU#{} -> {}\n", hart, cause_num);
}
}
};
// Finally, return the updated program counter
return_pc
}

122
risc_v/ch5/src/uart.rs Executable file
View File

@ -0,0 +1,122 @@
// uart.rs
// UART routines and driver
use core::{convert::TryInto,
fmt::{Error, Write}};
pub struct Uart {
base_address: usize,
}
impl Write for Uart {
fn write_str(&mut self, out: &str) -> Result<(), Error> {
for c in out.bytes() {
self.put(c);
}
Ok(())
}
}
impl Uart {
pub fn new(base_address: usize) -> Self {
Uart { base_address }
}
pub fn init(&mut self) {
let ptr = self.base_address as *mut u8;
unsafe {
// First, set the word length, which
// are bits 0 and 1 of the line control register (LCR)
// which is at base_address + 3
// We can easily write the value 3 here or 0b11, but I'm
// extending it so that it is clear we're setting two
// individual fields
// Word 0 Word 1
// ~~~~~~ ~~~~~~
let lcr: u8 = (1 << 0) | (1 << 1);
ptr.add(3).write_volatile(lcr);
// Now, enable the FIFO, which is bit index 0 of the
// FIFO control register (FCR at offset 2).
// Again, we can just write 1 here, but when we use left
// shift, it's easier to see that we're trying to write
// bit index #0.
ptr.add(2).write_volatile(1 << 0);
// Enable receiver buffer interrupts, which is at bit
// index 0 of the interrupt enable register (IER at
// offset 1).
ptr.add(1).write_volatile(1 << 0);
// If we cared about the divisor, the code below would
// set the divisor from a global clock rate of 22.729
// MHz (22,729,000 cycles per second) to a signaling
// rate of 2400 (BAUD). We usually have much faster
// signalling rates nowadays, but this demonstrates what
// the divisor actually does. The formula given in the
// NS16500A specification for calculating the divisor
// is:
// divisor = ceil( (clock_hz) / (baud_sps x 16) )
// So, we substitute our values and get:
// divisor = ceil( 22_729_000 / (2400 x 16) )
// divisor = ceil( 22_729_000 / 38_400 )
// divisor = ceil( 591.901 ) = 592
// The divisor register is two bytes (16 bits), so we
// need to split the value 592 into two bytes.
// Typically, we would calculate this based on measuring
// the clock rate, but again, for our purposes [qemu],
// this doesn't really do anything.
let divisor: u16 = 592;
let divisor_least: u8 =
(divisor & 0xff).try_into().unwrap();
let divisor_most: u8 =
(divisor >> 8).try_into().unwrap();
// Notice that the divisor register DLL (divisor latch
// least) and DLM (divisor latch most) have the same
// base address as the receiver/transmitter and the
// interrupt enable register. To change what the base
// address points to, we open the "divisor latch" by
// writing 1 into the Divisor Latch Access Bit (DLAB),
// which is bit index 7 of the Line Control Register
// (LCR) which is at base_address + 3.
ptr.add(3).write_volatile(lcr | 1 << 7);
// Now, base addresses 0 and 1 point to DLL and DLM,
// respectively. Put the lower 8 bits of the divisor
// into DLL
ptr.add(0).write_volatile(divisor_least);
ptr.add(1).write_volatile(divisor_most);
// Now that we've written the divisor, we never have to
// touch this again. In hardware, this will divide the
// global clock (22.729 MHz) into one suitable for 2,400
// signals per second. So, to once again get access to
// the RBR/THR/IER registers, we need to close the DLAB
// bit by clearing it to 0.
ptr.add(3).write_volatile(lcr);
}
}
pub fn put(&mut self, c: u8) {
let ptr = self.base_address as *mut u8;
unsafe {
ptr.add(0).write_volatile(c);
}
}
pub fn get(&mut self) -> Option<u8> {
let ptr = self.base_address as *mut u8;
unsafe {
if ptr.add(5).read_volatile() & 1 == 0 {
// The DR bit is 0, meaning no data
None
}
else {
// The DR bit is 1, meaning data!
Some(ptr.add(0).read_volatile())
}
}
}
}