From f88072f7ac009c89fcce09f2ce77889242ac0e37 Mon Sep 17 00:00:00 2001 From: Wynd Date: Mon, 21 Oct 2024 17:30:43 +0300 Subject: [PATCH] First iteration of the BF compiler to ASM, only supports 3 operations --- .gitignore | 3 ++ input/math.bf | 2 +- input/math2.bf | 1 + src/asm_gen.rs | 63 ++++++++++++++++++++++++++++ src/compiler.rs | 3 ++ src/interpreter.rs | 93 +++++++++++++++++++++++++++++++++++++++++ src/main.rs | 102 ++++++--------------------------------------- src/tokenizer.rs | 81 +++++++++++++++++++++++++++++++++++ 8 files changed, 258 insertions(+), 90 deletions(-) create mode 100644 input/math2.bf create mode 100644 src/asm_gen.rs create mode 100644 src/compiler.rs create mode 100644 src/interpreter.rs create mode 100644 src/tokenizer.rs diff --git a/.gitignore b/.gitignore index ea8c4bf..fb7ae50 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,4 @@ /target + +# asm gen testing +/asm diff --git a/input/math.bf b/input/math.bf index ad6a80d..18c6a10 100644 --- a/input/math.bf +++ b/input/math.bf @@ -1 +1 @@ -,++. ++++.--. diff --git a/input/math2.bf b/input/math2.bf new file mode 100644 index 0000000..ad6a80d --- /dev/null +++ b/input/math2.bf @@ -0,0 +1 @@ +,++. diff --git a/src/asm_gen.rs b/src/asm_gen.rs new file mode 100644 index 0000000..5ca9b69 --- /dev/null +++ b/src/asm_gen.rs @@ -0,0 +1,63 @@ +use crate::tokenizer::{Token, TokenType}; + +pub fn generate(tokens: Vec) -> String { + let mut output = String::new(); + + output.push_str( + "section .bss\n\ + result: resb 65536\n", + ); + + // global start label + output.push_str( + "global _start\n\ + section .text\n\ + _start:\n", + ); + + // NOTE: For debugging only + output.push_str("mov dl, 48\n"); + + for token in tokens { + match token.r#type { + TokenType::Plus(x) => { + output.push_str(&format!( + "add dl, {x}\n\ + mov byte [result], dl\n" + )); + } + TokenType::Minus(x) => { + output.push_str(&format!( + "sub dl, {x}\n\ + mov byte [result], dl\n" + )); + } + TokenType::MoveRight(_) => todo!(), + TokenType::MoveLeft(_) => todo!(), + TokenType::Output => { + output.push_str( + "push rdx\n\ + mov byte [result + 1], 10\n\ + mov rax, 1\n\ + mov rdi, 1\n\ + mov rsi, result\n\ + mov rdx, 4\n\ + syscall\n\ + pop rdx\n", + ); + } + TokenType::Input => todo!(), + TokenType::BracketOpen => todo!(), + TokenType::BracketClose => todo!(), + } + } + + // exit syscall + output.push_str( + "mov rax, 60\n\ + mov rdi, 0\n\ + syscall\n", + ); + + output +} diff --git a/src/compiler.rs b/src/compiler.rs new file mode 100644 index 0000000..19e153d --- /dev/null +++ b/src/compiler.rs @@ -0,0 +1,3 @@ +pub struct Compiler {} + +impl Compiler {} diff --git a/src/interpreter.rs b/src/interpreter.rs new file mode 100644 index 0000000..1d71e09 --- /dev/null +++ b/src/interpreter.rs @@ -0,0 +1,93 @@ +use std::{ + collections::{HashMap, VecDeque}, + io::{self, Write}, +}; + +pub fn run(program: Vec) { + let mut tape: Vec = vec![0]; + let mut cell_index: usize = 0; + + let mut ip: usize = 0; + let mut icount: usize = 0; + let mut user_input: Vec = vec![]; + + let mut table: HashMap = HashMap::new(); + let mut stack: VecDeque = VecDeque::new(); + for (ip, instruction) in program.iter().enumerate() { + if *instruction == b'[' { + stack.push_back(ip); + } else if *instruction == b']' { + let begin_index = stack.pop_back().unwrap(); + table.insert(begin_index, ip); + table.insert(ip, begin_index); + } + } + + while ip < program.len() { + let Some(instruction) = program.get(ip) else { + break; + }; + + match instruction { + b'+' => { + tape[cell_index] = tape[cell_index].wrapping_add(1); + icount += 1; + } + b'-' => { + tape[cell_index] = tape[cell_index].wrapping_sub(1); + icount += 1; + } + b'>' => { + cell_index = cell_index.wrapping_add(1); + if tape.len() <= cell_index { + tape.push(0); + } + icount += 1; + } + b'<' => { + cell_index = cell_index.wrapping_sub(1); + icount += 1; + } + b'.' => { + print!("{}", tape[cell_index] as char); + icount += 1; + } + b',' => { + if user_input.is_empty() { + user_input = input(); + } + tape[cell_index] = user_input.remove(0); + icount += 1; + } + b'[' => { + if tape[cell_index] == 0 { + ip = *table.get(&ip).unwrap(); + } + icount += 1; + } + b']' => { + if tape[cell_index] != 0 { + ip = *table.get(&ip).unwrap(); + } + icount += 1; + } + _ => {} + } + + ip += 1; + } + + println!(); + println!("Instructions ran: {icount}"); + // println!("{tape:?}"); +} + +fn input() -> Vec { + let mut input = String::new(); + + print!("Input: "); + let _ = io::stdout().flush(); + let _ = io::stdin().read_line(&mut input); + + input.as_bytes().to_vec() +} diff --git a/src/main.rs b/src/main.rs index 14187d1..83387ab 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,99 +1,23 @@ -use std::{ - collections::{HashMap, VecDeque}, - env, fs, - io::{self, Write}, - path::PathBuf, -}; +#![feature(let_chains)] + +use std::{env, fs, io::Write, path::PathBuf}; + +mod asm_gen; +mod compiler; +mod interpreter; +mod tokenizer; fn main() { let program: Vec = load_program(); - let mut tape: Vec = vec![0]; - let mut cell_index: usize = 0; + let tokens = tokenizer::Tokenizer::default().tokenize(program); - let mut ip: usize = 0; - let mut icount: usize = 0; - let mut user_input: Vec = vec![]; + let asm = asm_gen::generate(tokens); - let mut table: HashMap = HashMap::new(); - let mut stack: VecDeque = VecDeque::new(); - for (ip, instruction) in program.iter().enumerate() { - if *instruction == b'[' { - stack.push_back(ip); - } else if *instruction == b']' { - let begin_index = stack.pop_back().unwrap(); - table.insert(begin_index, ip); - table.insert(ip, begin_index); - } - } + let mut file = fs::File::create("./demo.asm").unwrap(); + file.write_all(asm.as_bytes()).unwrap(); - while ip < program.len() { - let Some(instruction) = program.get(ip) else { - break; - }; - - match instruction { - b'+' => { - tape[cell_index] = tape[cell_index].wrapping_add(1); - icount += 1; - } - b'-' => { - tape[cell_index] = tape[cell_index].wrapping_sub(1); - icount += 1; - } - b'>' => { - cell_index = cell_index.wrapping_add(1); - if tape.len() <= cell_index { - tape.push(0); - } - icount += 1; - } - b'<' => { - cell_index = cell_index.wrapping_sub(1); - icount += 1; - } - b'.' => { - print!("{}", tape[cell_index] as char); - icount += 1; - } - b',' => { - if user_input.is_empty() { - user_input = input(); - } - tape[cell_index] = user_input.remove(0); - icount += 1; - } - b'[' => { - if tape[cell_index] == 0 { - ip = *table.get(&ip).unwrap(); - } - icount += 1; - } - b']' => { - if tape[cell_index] != 0 { - ip = *table.get(&ip).unwrap(); - } - icount += 1; - } - _ => {} - } - - ip += 1; - } - - println!(); - println!("Instructions ran: {icount}"); - // println!("{tape:?}"); -} - -fn input() -> Vec { - let mut input = String::new(); - - print!("Input: "); - let _ = io::stdout().flush(); - let _ = io::stdin().read_line(&mut input); - - input.as_bytes().to_vec() + // interpreter::run(program); } fn load_program() -> Vec { diff --git a/src/tokenizer.rs b/src/tokenizer.rs new file mode 100644 index 0000000..059dc66 --- /dev/null +++ b/src/tokenizer.rs @@ -0,0 +1,81 @@ +#[derive(Debug)] +pub enum TokenType { + Plus(u32), + Minus(u32), + MoveRight(u32), + MoveLeft(u32), + Output, + Input, + BracketOpen, + BracketClose, +} + +#[derive(Debug)] +pub struct Token { + pub r#type: TokenType, +} + +impl Token { + fn new(r#type: TokenType) -> Self { + Self { r#type } + } +} + +#[derive(Debug, Default)] +pub struct Tokenizer { + index: u32, +} + +impl Tokenizer { + pub fn tokenize(&self, program: Vec) -> Vec { + let mut tokens = vec![]; + + let mut ip = 0; + let mut icount = 0; + while ip < program.len() { + let Some(instruction) = program.get(ip) else { + break; + }; + + let next_instruction = program.get(&ip + 1); + + ip += 1; + + match instruction { + b'+' => { + icount += 1; + if let Some(next) = next_instruction + && *next == b'+' + { + continue; + } else { + tokens.push(Token::new(TokenType::Plus(icount))); + icount = 0; + } + } + b'-' => { + icount += 1; + if let Some(next) = next_instruction + && *next == b'-' + { + continue; + } else { + tokens.push(Token::new(TokenType::Minus(icount))); + icount = 0; + } + } + b'>' => {} + b'<' => {} + b'.' => { + tokens.push(Token::new(TokenType::Output)); + } + b',' => {} + b'[' => {} + b']' => {} + _ => {} + } + } + + tokens + } +}