From 3d69c6053e06d14ec041b01cf495e44eed3fff4e Mon Sep 17 00:00:00 2001 From: Yifan Wu Date: Wed, 21 Dec 2022 11:11:50 +0800 Subject: [PATCH] Add coroutine examples --- user/src/bin/stackful_coroutine.rs | 349 ++++++++++++++++++++++++++++ user/src/bin/stackless_coroutine.rs | 129 ++++++++++ 2 files changed, 478 insertions(+) create mode 100644 user/src/bin/stackful_coroutine.rs create mode 100644 user/src/bin/stackless_coroutine.rs diff --git a/user/src/bin/stackful_coroutine.rs b/user/src/bin/stackful_coroutine.rs new file mode 100644 index 00000000..83eceb73 --- /dev/null +++ b/user/src/bin/stackful_coroutine.rs @@ -0,0 +1,349 @@ +// we porting below codes to Rcore Tutorial v3 +// https://cfsamson.gitbook.io/green-threads-explained-in-200-lines-of-rust/ +// https://github.com/cfsamson/example-greenthreads +#![no_std] +#![no_main] +#![feature(naked_functions)] +#![feature(asm)] + +extern crate alloc; +#[macro_use] +extern crate user_lib; + +use core::arch::asm; + +#[macro_use] +use alloc::vec; +use alloc::vec::Vec; + +use user_lib::exit; + +// In our simple example we set most constraints here. +const DEFAULT_STACK_SIZE: usize = 4096; //128 got SEGFAULT, 256(1024, 4096) got right results. +const MAX_TASKS: usize = 5; +static mut RUNTIME: usize = 0; + +pub struct Runtime { + tasks: Vec, + current: usize, +} + +#[derive(PartialEq, Eq, Debug)] +enum State { + Available, + Running, + Ready, +} + +struct Task { + id: usize, + stack: Vec, + ctx: TaskContext, + state: State, +} + +#[derive(Debug, Default)] +#[repr(C)] // not strictly needed but Rust ABI is not guaranteed to be stable +pub struct TaskContext { + // 15 u64 + x1: u64, //ra: return addres + x2: u64, //sp + x8: u64, //s0,fp + x9: u64, //s1 + x18: u64, //x18-27: s2-11 + x19: u64, + x20: u64, + x21: u64, + x22: u64, + x23: u64, + x24: u64, + x25: u64, + x26: u64, + x27: u64, + nx1: u64, //new return addres +} + +impl Task { + fn new(id: usize) -> Self { + // We initialize each task here and allocate the stack. This is not neccesary, + // we can allocate memory for it later, but it keeps complexity down and lets us focus on more interesting parts + // to do it here. The important part is that once allocated it MUST NOT move in memory. + Task { + id, + stack: vec![0_u8; DEFAULT_STACK_SIZE], + ctx: TaskContext::default(), + state: State::Available, + } + } +} + +impl Runtime { + pub fn new() -> Self { + // This will be our base task, which will be initialized in the `running` state + let base_task = Task { + id: 0, + stack: vec![0_u8; DEFAULT_STACK_SIZE], + ctx: TaskContext::default(), + state: State::Running, + }; + + // We initialize the rest of our tasks. + let mut tasks = vec![base_task]; + let mut available_tasks: Vec = (1..MAX_TASKS).map(|i| Task::new(i)).collect(); + tasks.append(&mut available_tasks); + + Runtime { tasks, current: 0 } + } + + /// This is cheating a bit, but we need a pointer to our Runtime stored so we can call yield on it even if + /// we don't have a reference to it. + pub fn init(&self) { + unsafe { + let r_ptr: *const Runtime = self; + RUNTIME = r_ptr as usize; + } + } + + /// This is where we start running our runtime. If it is our base task, we call yield until + /// it returns false (which means that there are no tasks scheduled) and we are done. + pub fn run(&mut self) { + while self.t_yield() {} + println!("All tasks finished!"); + } + + /// This is our return function. The only place we use this is in our `guard` function. + /// If the current task is not our base task we set its state to Available. It means + /// we're finished with it. Then we yield which will schedule a new task to be run. + fn t_return(&mut self) { + if self.current != 0 { + self.tasks[self.current].state = State::Available; + self.t_yield(); + } + } + + /// This is the heart of our runtime. Here we go through all tasks and see if anyone is in the `Ready` state. + /// If no task is `Ready` we're all done. This is an extremely simple scheduler using only a round-robin algorithm. + /// + /// If we find a task that's ready to be run we change the state of the current task from `Running` to `Ready`. + /// Then we call switch which will save the current context (the old context) and load the new context + /// into the CPU which then resumes based on the context it was just passed. + /// + /// NOITCE: if we comment below `#[inline(never)]`, we can not get the corrent running result + #[inline(never)] + fn t_yield(&mut self) -> bool { + let mut pos = self.current; + while self.tasks[pos].state != State::Ready { + pos += 1; + if pos == self.tasks.len() { + pos = 0; + } + if pos == self.current { + return false; + } + } + + if self.tasks[self.current].state != State::Available { + self.tasks[self.current].state = State::Ready; + } + + self.tasks[pos].state = State::Running; + let old_pos = self.current; + self.current = pos; + + unsafe { + switch(&mut self.tasks[old_pos].ctx, &self.tasks[pos].ctx); + } + + // NOTE: this might look strange and it is. Normally we would just mark this as `unreachable!()` but our compiler + // is too smart for it's own good so it optimized our code away on release builds. Curiously this happens on windows + // and not on linux. This is a common problem in tests so Rust has a `black_box` function in the `test` crate that + // will "pretend" to use a value we give it to prevent the compiler from eliminating code. I'll just do this instead, + // this code will never be run anyways and if it did it would always be `true`. + self.tasks.len() > 0 + } + + /// While `yield` is the logically interesting function I think this the technically most interesting. + /// + /// When we spawn a new task we first check if there are any available tasks (tasks in `Parked` state). + /// If we run out of tasks we panic in this scenario but there are several (better) ways to handle that. + /// We keep things simple for now. + /// + /// When we find an available task we get the stack length and a pointer to our u8 bytearray. + /// + /// The next part we have to use some unsafe functions. First we write an address to our `guard` function + /// that will be called if the function we provide returns. Then we set the address to the function we + /// pass inn. + /// + /// Third, we set the value of `sp` which is the stack pointer to the address of our provided function so we start + /// executing that first when we are scheuled to run. + /// + /// Lastly we set the state as `Ready` which means we have work to do and is ready to do it. + pub fn spawn(&mut self, f: fn()) { + let available = self + .tasks + .iter_mut() + .find(|t| t.state == State::Available) + .expect("no available task."); + + let size = available.stack.len(); + unsafe { + let s_ptr = available.stack.as_mut_ptr().offset(size as isize); + + // make sure our stack itself is 8 byte aligned - it will always + // offset to a lower memory address. Since we know we're at the "high" + // memory address of our allocated space, we know that offsetting to + // a lower one will be a valid address (given that we actually allocated) + // enough space to actually get an aligned pointer in the first place). + let s_ptr = (s_ptr as usize & !7) as *mut u8; + + available.ctx.x1 = guard as u64; //ctx.x1 is old return address + available.ctx.nx1 = f as u64; //ctx.nx2 is new return address + available.ctx.x2 = s_ptr.offset(-32) as u64; //cxt.x2 is sp + } + available.state = State::Ready; + } +} + +/// This is our guard function that we place on top of the stack. All this function does is set the +/// state of our current task and then `yield` which will then schedule a new task to be run. +fn guard() { + unsafe { + let rt_ptr = RUNTIME as *mut Runtime; + (*rt_ptr).t_return(); + }; +} + +/// We know that Runtime is alive the length of the program and that we only access from one core +/// (so no datarace). We yield execution of the current task by dereferencing a pointer to our +/// Runtime and then calling `t_yield` +pub fn yield_task() { + unsafe { + let rt_ptr = RUNTIME as *mut Runtime; + (*rt_ptr).t_yield(); + }; +} + +/// So here is our inline Assembly. As you remember from our first example this is just a bit more elaborate where we first +/// read out the values of all the registers we need and then sets all the register values to the register values we +/// saved when we suspended exceution on the "new" task. +/// +/// This is essentially all we need to do to save and resume execution. +/// +/// Some details about inline assembly. +/// +/// The assembly commands in the string literal is called the assemblt template. It is preceeded by +/// zero or up to four segments indicated by ":": +/// +/// - First ":" we have our output parameters, this parameters that this function will return. +/// - Second ":" we have the input parameters which is our contexts. We only read from the "new" context +/// but we modify the "old" context saving our registers there (see volatile option below) +/// - Third ":" This our clobber list, this is information to the compiler that these registers can't be used freely +/// - Fourth ":" This is options we can pass inn, Rust has 3: "alignstack", "volatile" and "intel" +/// +/// For this to work on windows we need to use "alignstack" where the compiler adds the neccesary padding to +/// make sure our stack is aligned. Since we modify one of our inputs, our assembly has "side effects" +/// therefore we should use the `volatile` option. I **think** this is actually set for us by default +/// when there are no output parameters given (my own assumption after going through the source code) +/// for the `asm` macro, but we should make it explicit anyway. +/// +/// One last important part (it will not work without this) is the #[naked] attribute. Basically this lets us have full +/// control over the stack layout since normal functions has a prologue-and epilogue added by the +/// compiler that will cause trouble for us. We avoid this by marking the funtion as "Naked". +/// For this to work on `release` builds we also need to use the `#[inline(never)] attribute or else +/// the compiler decides to inline this function (curiously this currently only happens on Windows). +/// If the function is inlined we get a curious runtime error where it fails when switching back +/// to as saved context and in general our assembly will not work as expected. +/// +/// see: https://github.com/rust-lang/rfcs/blob/master/text/1201-naked-fns.md +/// see: https://doc.rust-lang.org/nightly/reference/inline-assembly.html +/// see: https://doc.rust-lang.org/nightly/rust-by-example/unsafe/asm.html +#[naked] +#[no_mangle] +unsafe fn switch(old: *mut TaskContext, new: *const TaskContext) { + // a0: _old, a1: _new + asm!( + " + sd x1, 0x00(a0) + sd x2, 0x08(a0) + sd x8, 0x10(a0) + sd x9, 0x18(a0) + sd x18, 0x20(a0) + sd x19, 0x28(a0) + sd x20, 0x30(a0) + sd x21, 0x38(a0) + sd x22, 0x40(a0) + sd x23, 0x48(a0) + sd x24, 0x50(a0) + sd x25, 0x58(a0) + sd x26, 0x60(a0) + sd x27, 0x68(a0) + sd x1, 0x70(a0) + + ld x1, 0x00(a1) + ld x2, 0x08(a1) + ld x8, 0x10(a1) + ld x9, 0x18(a1) + ld x18, 0x20(a1) + ld x19, 0x28(a1) + ld x20, 0x30(a1) + ld x21, 0x38(a1) + ld x22, 0x40(a1) + ld x23, 0x48(a1) + ld x24, 0x50(a1) + ld x25, 0x58(a1) + ld x26, 0x60(a1) + ld x27, 0x68(a1) + ld t0, 0x70(a1) + + jr t0 + ", + options(noreturn) + ); +} + +#[no_mangle] +pub fn main() { + println!("stackful_coroutine begin..."); + println!("TASK 0(Runtime) STARTING"); + let mut runtime = Runtime::new(); + runtime.init(); + runtime.spawn(|| { + println!("TASK 1 STARTING"); + let id = 1; + for i in 0..4 { + println!("task: {} counter: {}", id, i); + yield_task(); + } + println!("TASK 1 FINISHED"); + }); + runtime.spawn(|| { + println!("TASK 2 STARTING"); + let id = 2; + for i in 0..8 { + println!("task: {} counter: {}", id, i); + yield_task(); + } + println!("TASK 2 FINISHED"); + }); + runtime.spawn(|| { + println!("TASK 3 STARTING"); + let id = 3; + for i in 0..12 { + println!("task: {} counter: {}", id, i); + yield_task(); + } + println!("TASK 3 FINISHED"); + }); + runtime.spawn(|| { + println!("TASK 4 STARTING"); + let id = 4; + for i in 0..16 { + println!("task: {} counter: {}", id, i); + yield_task(); + } + println!("TASK 4 FINISHED"); + }); + runtime.run(); + println!("stackful_coroutine PASSED"); + exit(0); +} diff --git a/user/src/bin/stackless_coroutine.rs b/user/src/bin/stackless_coroutine.rs new file mode 100644 index 00000000..43aeb2de --- /dev/null +++ b/user/src/bin/stackless_coroutine.rs @@ -0,0 +1,129 @@ +// https://blog.aloni.org/posts/a-stack-less-rust-coroutine-100-loc/ +// https://github.com/chyyuu/example-coroutine-and-thread/tree/stackless-coroutine-x86 +#![no_std] +#![no_main] + +use core::future::Future; +use core::pin::Pin; +use core::task::{Context, Poll}; +use core::task::{RawWaker, RawWakerVTable, Waker}; + +extern crate alloc; +use alloc::collections::VecDeque; + +use alloc::boxed::Box; + +#[macro_use] +extern crate user_lib; + +enum State { + Halted, + Running, +} + +struct Task { + state: State, +} + +impl Task { + fn waiter<'a>(&'a mut self) -> Waiter<'a> { + Waiter { task: self } + } +} + +struct Waiter<'a> { + task: &'a mut Task, +} + +impl<'a> Future for Waiter<'a> { + type Output = (); + + fn poll(mut self: Pin<&mut Self>, _cx: &mut Context) -> Poll { + match self.task.state { + State::Halted => { + self.task.state = State::Running; + Poll::Ready(()) + } + State::Running => { + self.task.state = State::Halted; + Poll::Pending + } + } + } +} + +struct Executor { + tasks: VecDeque>>>, +} + +impl Executor { + fn new() -> Self { + Executor { + tasks: VecDeque::new(), + } + } + + fn push(&mut self, closure: C) + where + F: Future + 'static, + C: FnOnce(Task) -> F, + { + let task = Task { + state: State::Running, + }; + self.tasks.push_back(Box::pin(closure(task))); + } + + fn run(&mut self) { + let waker = create_waker(); + let mut context = Context::from_waker(&waker); + + while let Some(mut task) = self.tasks.pop_front() { + match task.as_mut().poll(&mut context) { + Poll::Pending => { + self.tasks.push_back(task); + } + Poll::Ready(()) => {} + } + } + } +} + +pub fn create_waker() -> Waker { + // Safety: The waker points to a vtable with functions that do nothing. Doing + // nothing is memory-safe. + unsafe { Waker::from_raw(RAW_WAKER) } +} + +const RAW_WAKER: RawWaker = RawWaker::new(core::ptr::null(), &VTABLE); +const VTABLE: RawWakerVTable = RawWakerVTable::new(clone, wake, wake_by_ref, drop); + +unsafe fn clone(_: *const ()) -> RawWaker { + RAW_WAKER +} +unsafe fn wake(_: *const ()) {} +unsafe fn wake_by_ref(_: *const ()) {} +unsafe fn drop(_: *const ()) {} + +#[no_mangle] +pub fn main() -> i32 { + println!("stackless coroutine Begin.."); + let mut exec = Executor::new(); + println!(" Create futures"); + for instance in 1..=3 { + exec.push(move |mut task| async move { + println!(" Task {}: begin state", instance); + task.waiter().await; + println!(" Task {}: next state", instance); + task.waiter().await; + println!(" Task {}: end state", instance); + }); + } + + println!(" Running"); + exec.run(); + println!(" Done"); + println!("stackless coroutine PASSED"); + + 0 +}