From 6a96ce56ee5077f2aa8eb961e2425cce6167d017 Mon Sep 17 00:00:00 2001 From: Sander Hautvast Date: Mon, 2 Oct 2023 16:03:51 +0200 Subject: [PATCH] made class reading a little smarter --- src/class.rs | 8 +-- src/classloader.rs | 149 +++++++++++++++++++-------------------------- src/io.rs | 42 ++++++++----- src/opcodes.rs | 46 +++++++------- src/vm.rs | 20 ++---- 5 files changed, 124 insertions(+), 141 deletions(-) diff --git a/src/class.rs b/src/class.rs index 2f5e731..3b5f730 100644 --- a/src/class.rs +++ b/src/class.rs @@ -217,12 +217,12 @@ pub struct Exception { } impl Exception { - pub fn read(code: &[u8], index: usize) -> Self { + pub fn read(code: &[u8], index: &mut usize) -> Self { Self { start_pc: read_u16(code, index), - end_pc: read_u16(code, index + 2), - handler_pc: read_u16(code, index + 4), - catch_type: read_u16(code, index + 6), + end_pc: read_u16(code, index), + handler_pc: read_u16(code, index), + catch_type: read_u16(code, index), } } } diff --git a/src/classloader.rs b/src/classloader.rs index 97c3c5d..783cf57 100644 --- a/src/classloader.rs +++ b/src/classloader.rs @@ -1,15 +1,17 @@ use crate::class::{AttributeType, Class, Exception, Field, Method, MethodCode}; -use crate::io::{read_f32, read_f64, read_i32, read_i64, read_u16, read_u32}; +use crate::io::{read_bytes, read_f32, read_f64, read_i32, read_i64, read_u16, read_u32, read_u8}; use anyhow::Error; use std::collections::HashMap; use std::rc::Rc; pub fn load_class(bytecode: Vec) -> Result { - check_magic(&bytecode); + let pos = &mut 0; + check_magic(&bytecode, pos); + let minor_version = read_u16(&bytecode, pos); + let major_version = read_u16(&bytecode, pos); - let constant_pool_count = read_u16(&bytecode, 8); + let constant_pool_count = read_u16(&bytecode, pos); // println!("cp count: {}", constant_pool_count); - let mut index = 10; let mut constant_pool: HashMap = HashMap::with_capacity(constant_pool_count as usize); let mut cp_index = 1; @@ -17,46 +19,40 @@ pub fn load_class(bytecode: Vec) -> Result { // println!("cp#{}", cp_index); constant_pool.insert( cp_index, - read_constant_pool_entry(&mut cp_index, &mut index, &bytecode), + read_constant_pool_entry(&mut cp_index, pos, &bytecode), ); cp_index += 1; } let constant_pool = Rc::new(constant_pool); - let access_flags = read_u16(&bytecode, index); - let this_class = read_u16(&bytecode, index + 2); - let super_class = read_u16(&bytecode, index + 4); + let access_flags = read_u16(&bytecode, pos); + let this_class = read_u16(&bytecode, pos); + let super_class = read_u16(&bytecode, pos); - let interfaces_count = read_u16(&bytecode, index + 6); - // println!("interfaces count: {}", interfaces_count); - index += 8; + let interfaces_count = read_u16(&bytecode, pos); let mut interfaces = vec![]; for _ in 0..interfaces_count { - interfaces.push(read_u16(&bytecode, index)); - index += 2; + interfaces.push(read_u16(&bytecode, pos)); } - let fields_count = read_u16(&bytecode, index); - index += 2; + let fields_count = read_u16(&bytecode, pos); let mut fields = vec![]; for _ in 0..fields_count { - fields.push(read_field(constant_pool.clone(), &mut index, &bytecode)); + fields.push(read_field(constant_pool.clone(), pos, &bytecode)); } - let methods_count = read_u16(&bytecode, index); - index += 2; + let methods_count = read_u16(&bytecode, pos); let mut methods = HashMap::new(); for _ in 0..methods_count { - let m = read_method(constant_pool.clone(), &mut index, &bytecode); + let m = read_method(constant_pool.clone(), pos, &bytecode); methods.insert(m.name(), m); } - let attributes_count = read_u16(&bytecode, index); - index += 2; + let attributes_count = read_u16(&bytecode, pos); let mut attributes = HashMap::new(); for _ in 0..attributes_count { - let some = read_attribute(constant_pool.clone(), &bytecode, &mut index); + let some = read_attribute(constant_pool.clone(), &bytecode, pos); if let Some(att) = some { attributes.insert(att.0, att.1); } else { @@ -65,8 +61,8 @@ pub fn load_class(bytecode: Vec) -> Result { } Ok(Class { - minor_version: read_u16(&bytecode, 4), - major_version: read_u16(&bytecode, 6), + minor_version, + major_version, constant_pool, access_flags, this_class, @@ -78,78 +74,67 @@ pub fn load_class(bytecode: Vec) -> Result { }) } -fn check_magic(bytecode: &[u8]) { - if bytecode[0..4] != [0xCA, 0xFE, 0xBA, 0xBE] { +fn check_magic(bytecode: &[u8], pos: &mut usize) { + if bytecode[*pos..*pos + 4] != [0xCA, 0xFE, 0xBA, 0xBE] { panic!("Invalid class file"); } + *pos += 4; } fn read_constant_pool_entry(cp_index: &mut u16, index: &mut usize, bytecode: &[u8]) -> CpEntry { - let tag = bytecode[*index]; - // println!("#{}: {}", cp_index, tag); + let tag = read_u8(bytecode, index); match tag { 1 => { - let len = read_u16(bytecode, *index + 1) as usize; - let utf: Vec = Vec::from(&bytecode[*index + 3..*index + 3 + len]); - *index += len + 3; + let len = read_u16(bytecode, index) as usize; + let utf: Vec = read_bytes(&bytecode, index, len); CpEntry::Utf8(String::from_utf8(utf).unwrap()) } 3 => { - let value = read_i32(bytecode, *index + 1); - *index += 5; + let value = read_i32(bytecode, index); CpEntry::Integer(value) } 4 => { - let value = read_f32(bytecode, *index + 1); - *index += 5; + let value = read_f32(bytecode, index); CpEntry::Float(value) } 5 => { - let value = read_i64(bytecode, *index + 1); - *index += 9; - let r = CpEntry::Long(value); + let value = read_i64(bytecode, index); + let val = CpEntry::Long(value); *cp_index += 1; - r + val } 6 => { - let value = read_f64(bytecode, *index + 1); - *index += 9; - let r = CpEntry::Double(value); + let value = read_f64(bytecode, index); + let val = CpEntry::Double(value); //TODO order can be smarter *cp_index += 1; - r + val } 7 => { - let name_index = read_u16(bytecode, *index + 1); - *index += 3; + let name_index = read_u16(bytecode, index); CpEntry::ClassRef(name_index) } 8 => { - let string_index = read_u16(bytecode, *index + 1); - *index += 3; + let string_index = read_u16(bytecode, index); CpEntry::StringRef(string_index) } 9 => { - let class_index = read_u16(bytecode, *index + 1); - let name_and_type_index = read_u16(bytecode, *index + 3); - *index += 5; + let class_index = read_u16(bytecode, index); + let name_and_type_index = read_u16(bytecode, index); CpEntry::Fieldref(class_index, name_and_type_index) } 10 => { - let class_index = read_u16(bytecode, *index + 1); - let name_and_type_index = read_u16(bytecode, *index + 3); - *index += 5; + let class_index = read_u16(bytecode, index); + let name_and_type_index = read_u16(bytecode, index); CpEntry::MethodRef(class_index, name_and_type_index) } 11 => { - let class_index = read_u16(bytecode, *index + 1); - let name_and_type_index = read_u16(bytecode, *index + 3); - *index += 5; + let class_index = read_u16(bytecode, index); + let name_and_type_index = read_u16(bytecode, index); CpEntry::InterfaceMethodref(class_index, name_and_type_index) } 12 => { - let name_index = read_u16(bytecode, *index + 1); - let descriptor_index = read_u16(bytecode, *index + 3); - *index += 5; + let name_index = read_u16(bytecode, index); + let descriptor_index = read_u16(bytecode, index); CpEntry::NameAndType(name_index, descriptor_index) } // 15 MethodHandle, @@ -167,11 +152,10 @@ fn read_field( index: &mut usize, bytecode: &[u8], ) -> Field { - let access_flags = read_u16(bytecode, *index); - let name_index = read_u16(bytecode, *index + 2); - let descriptor_index = read_u16(bytecode, *index + 4); - let attributes_count = read_u16(bytecode, *index + 6); - *index += 8; + let access_flags = read_u16(bytecode, index); + let name_index = read_u16(bytecode, index); + let descriptor_index = read_u16(bytecode, index); + let attributes_count = read_u16(bytecode, index); let mut attributes = HashMap::new(); for _ in 0..attributes_count { if let Some(att) = read_attribute(constant_pool.clone(), bytecode, index) { @@ -194,11 +178,10 @@ fn read_method( index: &mut usize, bytecode: &[u8], ) -> Method { - let access_flags = read_u16(bytecode, *index); - let name_index = read_u16(bytecode, *index + 2); - let descriptor_index = read_u16(bytecode, *index + 4); - let attributes_count = read_u16(bytecode, *index + 6); - *index += 8; + let access_flags = read_u16(bytecode, index); + let name_index = read_u16(bytecode, index); + let descriptor_index = read_u16(bytecode, index); + let attributes_count = read_u16(bytecode, index); let mut attributes = HashMap::new(); for _ in 0..attributes_count { @@ -221,10 +204,8 @@ fn read_attribute( bytecode: &[u8], index: &mut usize, ) -> Option<(String, AttributeType)> { - let attribute_name_index = read_u16(bytecode, *index); - *index += 2; - let attribute_length = read_u32(bytecode, *index) as usize; - *index += 4; + let attribute_name_index = read_u16(bytecode, index); + let attribute_length = read_u32(bytecode, index) as usize; let info: Vec = Vec::from(&bytecode[*index..*index + attribute_length]); *index += attribute_length; @@ -235,27 +216,25 @@ fn read_attribute( assert_eq!(info.len(), 2); Some(( "ConstantValue".into(), - AttributeType::ConstantValue(read_u16(&info, 0)), + AttributeType::ConstantValue(read_u16(&info, &mut 0)), )) } "Code" => { - let max_stack = read_u16(&info, 0); - let max_locals = read_u16(&info, 2); - let code_length = read_u32(&info, 4) as usize; - let code = Vec::from(&info[8..8 + code_length]); - let exception_table_length = read_u16(&info, 8 + code_length) as usize; + let ci = &mut 0; + let max_stack = read_u16(&info, ci); + let max_locals = read_u16(&info, ci); + let code_length = read_u32(&info, ci) as usize; + let code = read_bytes(&info, ci, code_length); + let exception_table_length = read_u16(&info, ci) as usize; - let mut code_index = 10 + code_length; let mut exception_table = vec![]; for _ in 0..exception_table_length { - exception_table.push(Exception::read(&info, code_index)); - code_index += 8; + exception_table.push(Exception::read(&info, ci)); } - let attribute_count = read_u16(&info, code_index); - code_index += 2; + let attribute_count = read_u16(&info, ci); let mut code_attributes = HashMap::new(); for _ in 0..attribute_count { - if let Some(att) = read_attribute(constant_pool.clone(), &info, &mut code_index) + if let Some(att) = read_attribute(constant_pool.clone(), &info, ci) { code_attributes.insert(att.0, att.1); } diff --git a/src/io.rs b/src/io.rs index 43ec56f..e32bea2 100644 --- a/src/io.rs +++ b/src/io.rs @@ -56,57 +56,71 @@ pub fn read_bytecode(name: String) -> Result, Error> { // methods to read values from big-endian binary data -pub(crate) fn read_u8(data: &[u8], pos: usize) -> u8 { +pub(crate) fn read_u8(data: &[u8], pos: &mut usize) -> u8 { + *pos += 1; u8::from_be_bytes( - data[pos..pos + 1] + data[*pos - 1..*pos] .try_into() .expect("slice with incorrect length"), ) } -pub(crate) fn read_u16(data: &[u8], pos: usize) -> u16 { +pub(crate) fn read_bytes(data: &[u8], pos: &mut usize, len: usize) -> Vec { + *pos += len; + data[*pos - len..*pos] + .try_into() + .expect("slice with incorrect length") +} + +pub(crate) fn read_u16(data: &[u8], pos: &mut usize) -> u16 { + *pos += 2; u16::from_be_bytes( - data[pos..pos + 2] + data[*pos - 2..*pos] .try_into() .expect("slice with incorrect length"), ) } -pub(crate) fn read_i32(data: &[u8], pos: usize) -> i32 { +pub(crate) fn read_i32(data: &[u8], pos: &mut usize) -> i32 { + *pos += 4; i32::from_be_bytes( - data[pos..pos + 4] + data[*pos - 4..*pos] .try_into() .expect("slice with incorrect length"), ) } -pub(crate) fn read_u32(data: &[u8], pos: usize) -> u32 { +pub(crate) fn read_u32(data: &[u8], pos: &mut usize) -> u32 { + *pos += 4; u32::from_be_bytes( - data[pos..pos + 4] + data[*pos - 4..*pos] .try_into() .expect("slice with incorrect length"), ) } -pub(crate) fn read_f32(data: &[u8], pos: usize) -> f32 { +pub(crate) fn read_f32(data: &[u8], pos: &mut usize) -> f32 { + *pos += 4; f32::from_be_bytes( - data[pos..pos + 4] + data[*pos - 4..*pos] .try_into() .expect("slice with incorrect length"), ) } -pub(crate) fn read_i64(data: &[u8], pos: usize) -> i64 { +pub(crate) fn read_i64(data: &[u8], pos: &mut usize) -> i64 { + *pos += 8; i64::from_be_bytes( - data[pos..pos + 8] + data[*pos - 8..*pos] .try_into() .expect("slice with incorrect length"), ) } -pub(crate) fn read_f64(data: &[u8], pos: usize) -> f64 { +pub(crate) fn read_f64(data: &[u8], pos: &mut usize) -> f64 { + *pos += 8; f64::from_be_bytes( - data[pos..pos + 8] + data[*pos - 8..*pos] .try_into() .expect("slice with incorrect length"), ) diff --git a/src/opcodes.rs b/src/opcodes.rs index e9474f1..ea5e48b 100644 --- a/src/opcodes.rs +++ b/src/opcodes.rs @@ -5,26 +5,26 @@ // pub const dconst_0:u8 = 14; // (0xe) push double 0 // pub const dconst_1:u8 = 15; // (0xf) push double 1 // TODO turn all into references -pub const BIPUSH: &u8 = &16; // (0x10) Push byte -pub const LDC: &u8 = &18; // (0x12) Push item from run-time pub constant pool -pub const LDC_W: &u8 = &19; // (0x13) Push item from run-time constant pool (wide index) -pub const LDC2_W: &u8 = &20; // (0x14) Push long or double from run-time constant pool (wide index) +pub const BIPUSH: u8 = 16; // (0x10) Push byte +pub const LDC: u8 = 18; // (0x12) Push item from run-time pub constant pool +pub const LDC_W: u8 = 19; // (0x13) Push item from run-time constant pool (wide index) +pub const LDC2_W: u8 = 20; // (0x14) Push long or double from run-time constant pool (wide index) // pub const fload:u8 = 23; // (0x17) Load float from local variable // pub const dload:u8 = 24; // (0x18) load double from local variable // pub const aload:u8 = 25; //0x19 // -pub const FLOAD_0: &u8 = &34; // (0x22) Load float from local variable 0 -pub const FLOAD_1: &u8 = &35; // (0x23) Load float from local variable 1 -pub const FLOAD_2: &u8 = &36; // (0x24) Load float from local variable 2 -pub const FLOAD_3: &u8 = &37; // (0x25) Load float from local variable 3 +pub const FLOAD_0: u8 = 34; // (0x22) Load float from local variable 0 +pub const FLOAD_1: u8 = 35; // (0x23) Load float from local variable 1 +pub const FLOAD_2: u8 = 36; // (0x24) Load float from local variable 2 +pub const FLOAD_3: u8 = 37; // (0x25) Load float from local variable 3 // pub const dload_0:u8 = 38; // (0x26) Load double 0 from local variable // pub const dload_1:u8 = 39; // (0x27) Load double 1 from local variable // pub const dload_2:u8 = 40; // (0x28) Load double 2 from local variable // pub const dload_3:u8 = 41; // (0x29) Load double 3 from local variable -pub const ALOAD_0: &u8 = &42; // (0x2a) -pub const ALOAD_1: &u8 = &43;// (0x2a) -pub const ALOAD_2: &u8 = &44;// (0x2b) -pub const ALOAD_3: &u8 = &45;// (0x2c) +pub const ALOAD_0: u8 = 42; // (0x2a) +pub const ALOAD_1: u8 = 43;// (0x2a) +pub const ALOAD_2: u8 = 44;// (0x2b) +pub const ALOAD_3: u8 = 45;// (0x2c) // pub const faload: u8 = 48; // (0x30) Load float from array // pub const daload:u8 = 49; // (0x31) load double from array @@ -51,8 +51,8 @@ pub const ALOAD_3: &u8 = &45;// (0x2c) // pub const bastore:u8 = 84; // (0x54) // // pub const castore:u8 = 85; // (0x55) -pub const POP: &u8 = &87; // (0x57) Pop the top operand stack value -pub const DUP: &u8 = &89; // (0x59) duplicate the top operand stack value +pub const POP: u8 = 87; // (0x57) Pop the top operand stack value +pub const DUP: u8 = 89; // (0x59) duplicate the top operand stack value // pub const dup_x1: u8 = 90; // (0x5a) Duplicate the top operand stack value and insert two values down // pub const dup_x2: u8 = 91; // (0x5b) Duplicate the top operand stack value and insert two or three values down // pub const dup2: u8 = 92; // (0x5c) Duplicate the top one or two operand stack values @@ -82,18 +82,18 @@ pub const DUP: &u8 = &89; // (0x59) duplicate the top operand stack value // pub const dcmpl:u8 = 151; // (0x97) compare double (less than) // pub const dcmpg:u8 = 152; // (0x98) compare double (greater than) // -pub const IRETURN: &u8 = &172; // (0xac) ireturn -pub const FRETURN: &u8 = &174; // (0xae) Return float from method -pub const DRETURN: &u8 = &175; // (0xaf) Return double from method +pub const IRETURN: u8 = 172; // (0xac) ireturn +pub const FRETURN: u8 = 174; // (0xae) Return float from method +pub const DRETURN: u8 = 175; // (0xaf) Return double from method // pub const areturn: u8 = 176; //(0xb0) return reference -pub const RETURN_VOID: &u8 = &177; // (0xb1) Return void from method (actually 'return' but that's a keyword) +pub const RETURN_VOID: u8 = 177; // (0xb1) Return void from method (actually 'return' but that's a keyword) // pub const getstatic: u8 = 178; // (0xb2) Get static field from class -pub const GETFIELD: &u8 = &180; // (0xb4) Fetch field from object3 -pub const PUTFIELD: &u8 = &181; // (0xb5) Set field in object -pub const INVOKEVIRTUAL: &u8 = &182; // (0xb6) Invoke instance method; dispatch based on class -pub const NEW: &u8 = &187; // (0xbb) Create new object +pub const GETFIELD: u8 = 180; // (0xb4) Fetch field from object3 +pub const PUTFIELD: u8 = 181; // (0xb5) Set field in object +pub const INVOKEVIRTUAL: u8 = 182; // (0xb6) Invoke instance method; dispatch based on class +pub const NEW: u8 = 187; // (0xbb) Create new object // -pub const INVOKESPECIAL: &u8 = &183; // (0xb7) // nvoke instance method; direct invocation of instance initialization methods and methods of the current class and its supertypes +pub const INVOKESPECIAL: u8 = 183; // (0xb7) // nvoke instance method; direct invocation of instance initialization methods and methods of the current class and its supertypes // pub const anewarray: u8 = 189; // (0xbd) // // pub const arraylength: u8 = 190; // (0xbe) diff --git a/src/vm.rs b/src/vm.rs index 6557e75..293d4ee 100644 --- a/src/vm.rs +++ b/src/vm.rs @@ -111,17 +111,15 @@ impl Vm { let stackframe = StackFrame::new(class_name, method_name); self.stack.push(stackframe); - let mut pc: usize = 0; - while pc < code.opcodes.len() { - let opcode = &code.opcodes[pc]; - pc += 1; + let mut pc= &mut 0; + while *pc < code.opcodes.len() { + let opcode = read_u8(&code.opcodes, pc); println!("opcode {} ", opcode); match opcode { BIPUSH => { println!("BISPUSH"); - let c = code.opcodes[pc] as i32; - self.local_stack().push(Arc::new(UnsafeCell::new(Value::I32(c)))); - pc += 1; + let c =read_u8(&code.opcodes, pc); + self.local_stack().push(Arc::new(UnsafeCell::new(Value::I32(c as i32)))); } LDC => { println!("LDC"); @@ -135,7 +133,6 @@ impl Vm { } _ => {} } - pc += 1; } LDC_W => { let cp_index = read_u16(&code.opcodes, pc); @@ -150,7 +147,6 @@ impl Vm { panic!("unexpected") } } - pc += 2; } LDC2_W => { let cp_index = read_u16(&code.opcodes, pc); @@ -166,7 +162,6 @@ impl Vm { } } - pc += 2; } FLOAD_0 => { self.local_stack().push(args[0].clone()); @@ -241,7 +236,6 @@ impl Vm { } } } - pc += 2; } PUTFIELD => { println!("PUTFIELD"); @@ -259,7 +253,6 @@ impl Vm { } } } - pc += 2; } INVOKEVIRTUAL => { let cp_index = read_u16(&code.opcodes, pc); @@ -279,7 +272,6 @@ impl Vm { } } } - pc += 2; } INVOKESPECIAL => { println!("INVOKESPECIAL"); @@ -300,7 +292,6 @@ impl Vm { } } } - pc += 2; } NEW => { let class_index = read_u16(&code.opcodes, pc); @@ -318,7 +309,6 @@ impl Vm { self.heap.new_object(object); } } - pc += 2; } //TODO implement all opcodes _ => {