made class reading a little smarter

This commit is contained in:
Sander Hautvast 2023-10-02 16:03:51 +02:00
parent 1f0a916b12
commit 6a96ce56ee
5 changed files with 124 additions and 141 deletions

View file

@ -217,12 +217,12 @@ pub struct Exception {
}
impl Exception {
pub fn read(code: &[u8], index: usize) -> Self {
pub fn read(code: &[u8], index: &mut usize) -> Self {
Self {
start_pc: read_u16(code, index),
end_pc: read_u16(code, index + 2),
handler_pc: read_u16(code, index + 4),
catch_type: read_u16(code, index + 6),
end_pc: read_u16(code, index),
handler_pc: read_u16(code, index),
catch_type: read_u16(code, index),
}
}
}

View file

@ -1,15 +1,17 @@
use crate::class::{AttributeType, Class, Exception, Field, Method, MethodCode};
use crate::io::{read_f32, read_f64, read_i32, read_i64, read_u16, read_u32};
use crate::io::{read_bytes, read_f32, read_f64, read_i32, read_i64, read_u16, read_u32, read_u8};
use anyhow::Error;
use std::collections::HashMap;
use std::rc::Rc;
pub fn load_class(bytecode: Vec<u8>) -> Result<Class, Error> {
check_magic(&bytecode);
let pos = &mut 0;
check_magic(&bytecode, pos);
let minor_version = read_u16(&bytecode, pos);
let major_version = read_u16(&bytecode, pos);
let constant_pool_count = read_u16(&bytecode, 8);
let constant_pool_count = read_u16(&bytecode, pos);
// println!("cp count: {}", constant_pool_count);
let mut index = 10;
let mut constant_pool: HashMap<u16, CpEntry> =
HashMap::with_capacity(constant_pool_count as usize);
let mut cp_index = 1;
@ -17,46 +19,40 @@ pub fn load_class(bytecode: Vec<u8>) -> Result<Class, Error> {
// println!("cp#{}", cp_index);
constant_pool.insert(
cp_index,
read_constant_pool_entry(&mut cp_index, &mut index, &bytecode),
read_constant_pool_entry(&mut cp_index, pos, &bytecode),
);
cp_index += 1;
}
let constant_pool = Rc::new(constant_pool);
let access_flags = read_u16(&bytecode, index);
let this_class = read_u16(&bytecode, index + 2);
let super_class = read_u16(&bytecode, index + 4);
let access_flags = read_u16(&bytecode, pos);
let this_class = read_u16(&bytecode, pos);
let super_class = read_u16(&bytecode, pos);
let interfaces_count = read_u16(&bytecode, index + 6);
// println!("interfaces count: {}", interfaces_count);
index += 8;
let interfaces_count = read_u16(&bytecode, pos);
let mut interfaces = vec![];
for _ in 0..interfaces_count {
interfaces.push(read_u16(&bytecode, index));
index += 2;
interfaces.push(read_u16(&bytecode, pos));
}
let fields_count = read_u16(&bytecode, index);
index += 2;
let fields_count = read_u16(&bytecode, pos);
let mut fields = vec![];
for _ in 0..fields_count {
fields.push(read_field(constant_pool.clone(), &mut index, &bytecode));
fields.push(read_field(constant_pool.clone(), pos, &bytecode));
}
let methods_count = read_u16(&bytecode, index);
index += 2;
let methods_count = read_u16(&bytecode, pos);
let mut methods = HashMap::new();
for _ in 0..methods_count {
let m = read_method(constant_pool.clone(), &mut index, &bytecode);
let m = read_method(constant_pool.clone(), pos, &bytecode);
methods.insert(m.name(), m);
}
let attributes_count = read_u16(&bytecode, index);
index += 2;
let attributes_count = read_u16(&bytecode, pos);
let mut attributes = HashMap::new();
for _ in 0..attributes_count {
let some = read_attribute(constant_pool.clone(), &bytecode, &mut index);
let some = read_attribute(constant_pool.clone(), &bytecode, pos);
if let Some(att) = some {
attributes.insert(att.0, att.1);
} else {
@ -65,8 +61,8 @@ pub fn load_class(bytecode: Vec<u8>) -> Result<Class, Error> {
}
Ok(Class {
minor_version: read_u16(&bytecode, 4),
major_version: read_u16(&bytecode, 6),
minor_version,
major_version,
constant_pool,
access_flags,
this_class,
@ -78,78 +74,67 @@ pub fn load_class(bytecode: Vec<u8>) -> Result<Class, Error> {
})
}
fn check_magic(bytecode: &[u8]) {
if bytecode[0..4] != [0xCA, 0xFE, 0xBA, 0xBE] {
fn check_magic(bytecode: &[u8], pos: &mut usize) {
if bytecode[*pos..*pos + 4] != [0xCA, 0xFE, 0xBA, 0xBE] {
panic!("Invalid class file");
}
*pos += 4;
}
fn read_constant_pool_entry(cp_index: &mut u16, index: &mut usize, bytecode: &[u8]) -> CpEntry {
let tag = bytecode[*index];
// println!("#{}: {}", cp_index, tag);
let tag = read_u8(bytecode, index);
match tag {
1 => {
let len = read_u16(bytecode, *index + 1) as usize;
let utf: Vec<u8> = Vec::from(&bytecode[*index + 3..*index + 3 + len]);
*index += len + 3;
let len = read_u16(bytecode, index) as usize;
let utf: Vec<u8> = read_bytes(&bytecode, index, len);
CpEntry::Utf8(String::from_utf8(utf).unwrap())
}
3 => {
let value = read_i32(bytecode, *index + 1);
*index += 5;
let value = read_i32(bytecode, index);
CpEntry::Integer(value)
}
4 => {
let value = read_f32(bytecode, *index + 1);
*index += 5;
let value = read_f32(bytecode, index);
CpEntry::Float(value)
}
5 => {
let value = read_i64(bytecode, *index + 1);
*index += 9;
let r = CpEntry::Long(value);
let value = read_i64(bytecode, index);
let val = CpEntry::Long(value);
*cp_index += 1;
r
val
}
6 => {
let value = read_f64(bytecode, *index + 1);
*index += 9;
let r = CpEntry::Double(value);
let value = read_f64(bytecode, index);
let val = CpEntry::Double(value); //TODO order can be smarter
*cp_index += 1;
r
val
}
7 => {
let name_index = read_u16(bytecode, *index + 1);
*index += 3;
let name_index = read_u16(bytecode, index);
CpEntry::ClassRef(name_index)
}
8 => {
let string_index = read_u16(bytecode, *index + 1);
*index += 3;
let string_index = read_u16(bytecode, index);
CpEntry::StringRef(string_index)
}
9 => {
let class_index = read_u16(bytecode, *index + 1);
let name_and_type_index = read_u16(bytecode, *index + 3);
*index += 5;
let class_index = read_u16(bytecode, index);
let name_and_type_index = read_u16(bytecode, index);
CpEntry::Fieldref(class_index, name_and_type_index)
}
10 => {
let class_index = read_u16(bytecode, *index + 1);
let name_and_type_index = read_u16(bytecode, *index + 3);
*index += 5;
let class_index = read_u16(bytecode, index);
let name_and_type_index = read_u16(bytecode, index);
CpEntry::MethodRef(class_index, name_and_type_index)
}
11 => {
let class_index = read_u16(bytecode, *index + 1);
let name_and_type_index = read_u16(bytecode, *index + 3);
*index += 5;
let class_index = read_u16(bytecode, index);
let name_and_type_index = read_u16(bytecode, index);
CpEntry::InterfaceMethodref(class_index, name_and_type_index)
}
12 => {
let name_index = read_u16(bytecode, *index + 1);
let descriptor_index = read_u16(bytecode, *index + 3);
*index += 5;
let name_index = read_u16(bytecode, index);
let descriptor_index = read_u16(bytecode, index);
CpEntry::NameAndType(name_index, descriptor_index)
}
// 15 MethodHandle,
@ -167,11 +152,10 @@ fn read_field(
index: &mut usize,
bytecode: &[u8],
) -> Field {
let access_flags = read_u16(bytecode, *index);
let name_index = read_u16(bytecode, *index + 2);
let descriptor_index = read_u16(bytecode, *index + 4);
let attributes_count = read_u16(bytecode, *index + 6);
*index += 8;
let access_flags = read_u16(bytecode, index);
let name_index = read_u16(bytecode, index);
let descriptor_index = read_u16(bytecode, index);
let attributes_count = read_u16(bytecode, index);
let mut attributes = HashMap::new();
for _ in 0..attributes_count {
if let Some(att) = read_attribute(constant_pool.clone(), bytecode, index) {
@ -194,11 +178,10 @@ fn read_method(
index: &mut usize,
bytecode: &[u8],
) -> Method {
let access_flags = read_u16(bytecode, *index);
let name_index = read_u16(bytecode, *index + 2);
let descriptor_index = read_u16(bytecode, *index + 4);
let attributes_count = read_u16(bytecode, *index + 6);
*index += 8;
let access_flags = read_u16(bytecode, index);
let name_index = read_u16(bytecode, index);
let descriptor_index = read_u16(bytecode, index);
let attributes_count = read_u16(bytecode, index);
let mut attributes = HashMap::new();
for _ in 0..attributes_count {
@ -221,10 +204,8 @@ fn read_attribute(
bytecode: &[u8],
index: &mut usize,
) -> Option<(String, AttributeType)> {
let attribute_name_index = read_u16(bytecode, *index);
*index += 2;
let attribute_length = read_u32(bytecode, *index) as usize;
*index += 4;
let attribute_name_index = read_u16(bytecode, index);
let attribute_length = read_u32(bytecode, index) as usize;
let info: Vec<u8> = Vec::from(&bytecode[*index..*index + attribute_length]);
*index += attribute_length;
@ -235,27 +216,25 @@ fn read_attribute(
assert_eq!(info.len(), 2);
Some((
"ConstantValue".into(),
AttributeType::ConstantValue(read_u16(&info, 0)),
AttributeType::ConstantValue(read_u16(&info, &mut 0)),
))
}
"Code" => {
let max_stack = read_u16(&info, 0);
let max_locals = read_u16(&info, 2);
let code_length = read_u32(&info, 4) as usize;
let code = Vec::from(&info[8..8 + code_length]);
let exception_table_length = read_u16(&info, 8 + code_length) as usize;
let ci = &mut 0;
let max_stack = read_u16(&info, ci);
let max_locals = read_u16(&info, ci);
let code_length = read_u32(&info, ci) as usize;
let code = read_bytes(&info, ci, code_length);
let exception_table_length = read_u16(&info, ci) as usize;
let mut code_index = 10 + code_length;
let mut exception_table = vec![];
for _ in 0..exception_table_length {
exception_table.push(Exception::read(&info, code_index));
code_index += 8;
exception_table.push(Exception::read(&info, ci));
}
let attribute_count = read_u16(&info, code_index);
code_index += 2;
let attribute_count = read_u16(&info, ci);
let mut code_attributes = HashMap::new();
for _ in 0..attribute_count {
if let Some(att) = read_attribute(constant_pool.clone(), &info, &mut code_index)
if let Some(att) = read_attribute(constant_pool.clone(), &info, ci)
{
code_attributes.insert(att.0, att.1);
}

View file

@ -56,57 +56,71 @@ pub fn read_bytecode(name: String) -> Result<Vec<u8>, Error> {
// methods to read values from big-endian binary data
pub(crate) fn read_u8(data: &[u8], pos: usize) -> u8 {
pub(crate) fn read_u8(data: &[u8], pos: &mut usize) -> u8 {
*pos += 1;
u8::from_be_bytes(
data[pos..pos + 1]
data[*pos - 1..*pos]
.try_into()
.expect("slice with incorrect length"),
)
}
pub(crate) fn read_u16(data: &[u8], pos: usize) -> u16 {
pub(crate) fn read_bytes(data: &[u8], pos: &mut usize, len: usize) -> Vec<u8> {
*pos += len;
data[*pos - len..*pos]
.try_into()
.expect("slice with incorrect length")
}
pub(crate) fn read_u16(data: &[u8], pos: &mut usize) -> u16 {
*pos += 2;
u16::from_be_bytes(
data[pos..pos + 2]
data[*pos - 2..*pos]
.try_into()
.expect("slice with incorrect length"),
)
}
pub(crate) fn read_i32(data: &[u8], pos: usize) -> i32 {
pub(crate) fn read_i32(data: &[u8], pos: &mut usize) -> i32 {
*pos += 4;
i32::from_be_bytes(
data[pos..pos + 4]
data[*pos - 4..*pos]
.try_into()
.expect("slice with incorrect length"),
)
}
pub(crate) fn read_u32(data: &[u8], pos: usize) -> u32 {
pub(crate) fn read_u32(data: &[u8], pos: &mut usize) -> u32 {
*pos += 4;
u32::from_be_bytes(
data[pos..pos + 4]
data[*pos - 4..*pos]
.try_into()
.expect("slice with incorrect length"),
)
}
pub(crate) fn read_f32(data: &[u8], pos: usize) -> f32 {
pub(crate) fn read_f32(data: &[u8], pos: &mut usize) -> f32 {
*pos += 4;
f32::from_be_bytes(
data[pos..pos + 4]
data[*pos - 4..*pos]
.try_into()
.expect("slice with incorrect length"),
)
}
pub(crate) fn read_i64(data: &[u8], pos: usize) -> i64 {
pub(crate) fn read_i64(data: &[u8], pos: &mut usize) -> i64 {
*pos += 8;
i64::from_be_bytes(
data[pos..pos + 8]
data[*pos - 8..*pos]
.try_into()
.expect("slice with incorrect length"),
)
}
pub(crate) fn read_f64(data: &[u8], pos: usize) -> f64 {
pub(crate) fn read_f64(data: &[u8], pos: &mut usize) -> f64 {
*pos += 8;
f64::from_be_bytes(
data[pos..pos + 8]
data[*pos - 8..*pos]
.try_into()
.expect("slice with incorrect length"),
)

View file

@ -5,26 +5,26 @@
// pub const dconst_0:u8 = 14; // (0xe) push double 0
// pub const dconst_1:u8 = 15; // (0xf) push double 1
// TODO turn all into references
pub const BIPUSH: &u8 = &16; // (0x10) Push byte
pub const LDC: &u8 = &18; // (0x12) Push item from run-time pub constant pool
pub const LDC_W: &u8 = &19; // (0x13) Push item from run-time constant pool (wide index)
pub const LDC2_W: &u8 = &20; // (0x14) Push long or double from run-time constant pool (wide index)
pub const BIPUSH: u8 = 16; // (0x10) Push byte
pub const LDC: u8 = 18; // (0x12) Push item from run-time pub constant pool
pub const LDC_W: u8 = 19; // (0x13) Push item from run-time constant pool (wide index)
pub const LDC2_W: u8 = 20; // (0x14) Push long or double from run-time constant pool (wide index)
// pub const fload:u8 = 23; // (0x17) Load float from local variable
// pub const dload:u8 = 24; // (0x18) load double from local variable
// pub const aload:u8 = 25; //0x19
//
pub const FLOAD_0: &u8 = &34; // (0x22) Load float from local variable 0
pub const FLOAD_1: &u8 = &35; // (0x23) Load float from local variable 1
pub const FLOAD_2: &u8 = &36; // (0x24) Load float from local variable 2
pub const FLOAD_3: &u8 = &37; // (0x25) Load float from local variable 3
pub const FLOAD_0: u8 = 34; // (0x22) Load float from local variable 0
pub const FLOAD_1: u8 = 35; // (0x23) Load float from local variable 1
pub const FLOAD_2: u8 = 36; // (0x24) Load float from local variable 2
pub const FLOAD_3: u8 = 37; // (0x25) Load float from local variable 3
// pub const dload_0:u8 = 38; // (0x26) Load double 0 from local variable
// pub const dload_1:u8 = 39; // (0x27) Load double 1 from local variable
// pub const dload_2:u8 = 40; // (0x28) Load double 2 from local variable
// pub const dload_3:u8 = 41; // (0x29) Load double 3 from local variable
pub const ALOAD_0: &u8 = &42; // (0x2a)
pub const ALOAD_1: &u8 = &43;// (0x2a)
pub const ALOAD_2: &u8 = &44;// (0x2b)
pub const ALOAD_3: &u8 = &45;// (0x2c)
pub const ALOAD_0: u8 = 42; // (0x2a)
pub const ALOAD_1: u8 = 43;// (0x2a)
pub const ALOAD_2: u8 = 44;// (0x2b)
pub const ALOAD_3: u8 = 45;// (0x2c)
// pub const faload: u8 = 48; // (0x30) Load float from array
// pub const daload:u8 = 49; // (0x31) load double from array
@ -51,8 +51,8 @@ pub const ALOAD_3: &u8 = &45;// (0x2c)
// pub const bastore:u8 = 84; // (0x54)
//
// pub const castore:u8 = 85; // (0x55)
pub const POP: &u8 = &87; // (0x57) Pop the top operand stack value
pub const DUP: &u8 = &89; // (0x59) duplicate the top operand stack value
pub const POP: u8 = 87; // (0x57) Pop the top operand stack value
pub const DUP: u8 = 89; // (0x59) duplicate the top operand stack value
// pub const dup_x1: u8 = 90; // (0x5a) Duplicate the top operand stack value and insert two values down
// pub const dup_x2: u8 = 91; // (0x5b) Duplicate the top operand stack value and insert two or three values down
// pub const dup2: u8 = 92; // (0x5c) Duplicate the top one or two operand stack values
@ -82,18 +82,18 @@ pub const DUP: &u8 = &89; // (0x59) duplicate the top operand stack value
// pub const dcmpl:u8 = 151; // (0x97) compare double (less than)
// pub const dcmpg:u8 = 152; // (0x98) compare double (greater than)
//
pub const IRETURN: &u8 = &172; // (0xac) ireturn
pub const FRETURN: &u8 = &174; // (0xae) Return float from method
pub const DRETURN: &u8 = &175; // (0xaf) Return double from method
pub const IRETURN: u8 = 172; // (0xac) ireturn
pub const FRETURN: u8 = 174; // (0xae) Return float from method
pub const DRETURN: u8 = 175; // (0xaf) Return double from method
// pub const areturn: u8 = 176; //(0xb0) return reference
pub const RETURN_VOID: &u8 = &177; // (0xb1) Return void from method (actually 'return' but that's a keyword)
pub const RETURN_VOID: u8 = 177; // (0xb1) Return void from method (actually 'return' but that's a keyword)
// pub const getstatic: u8 = 178; // (0xb2) Get static field from class
pub const GETFIELD: &u8 = &180; // (0xb4) Fetch field from object3
pub const PUTFIELD: &u8 = &181; // (0xb5) Set field in object
pub const INVOKEVIRTUAL: &u8 = &182; // (0xb6) Invoke instance method; dispatch based on class
pub const NEW: &u8 = &187; // (0xbb) Create new object
pub const GETFIELD: u8 = 180; // (0xb4) Fetch field from object3
pub const PUTFIELD: u8 = 181; // (0xb5) Set field in object
pub const INVOKEVIRTUAL: u8 = 182; // (0xb6) Invoke instance method; dispatch based on class
pub const NEW: u8 = 187; // (0xbb) Create new object
//
pub const INVOKESPECIAL: &u8 = &183; // (0xb7) // nvoke instance method; direct invocation of instance initialization methods and methods of the current class and its supertypes
pub const INVOKESPECIAL: u8 = 183; // (0xb7) // nvoke instance method; direct invocation of instance initialization methods and methods of the current class and its supertypes
// pub const anewarray: u8 = 189; // (0xbd)
//
// pub const arraylength: u8 = 190; // (0xbe)

View file

@ -111,17 +111,15 @@ impl Vm {
let stackframe = StackFrame::new(class_name, method_name);
self.stack.push(stackframe);
let mut pc: usize = 0;
while pc < code.opcodes.len() {
let opcode = &code.opcodes[pc];
pc += 1;
let mut pc= &mut 0;
while *pc < code.opcodes.len() {
let opcode = read_u8(&code.opcodes, pc);
println!("opcode {} ", opcode);
match opcode {
BIPUSH => {
println!("BISPUSH");
let c = code.opcodes[pc] as i32;
self.local_stack().push(Arc::new(UnsafeCell::new(Value::I32(c))));
pc += 1;
let c =read_u8(&code.opcodes, pc);
self.local_stack().push(Arc::new(UnsafeCell::new(Value::I32(c as i32))));
}
LDC => {
println!("LDC");
@ -135,7 +133,6 @@ impl Vm {
}
_ => {}
}
pc += 1;
}
LDC_W => {
let cp_index = read_u16(&code.opcodes, pc);
@ -150,7 +147,6 @@ impl Vm {
panic!("unexpected")
}
}
pc += 2;
}
LDC2_W => {
let cp_index = read_u16(&code.opcodes, pc);
@ -166,7 +162,6 @@ impl Vm {
}
}
pc += 2;
}
FLOAD_0 => {
self.local_stack().push(args[0].clone());
@ -241,7 +236,6 @@ impl Vm {
}
}
}
pc += 2;
}
PUTFIELD => {
println!("PUTFIELD");
@ -259,7 +253,6 @@ impl Vm {
}
}
}
pc += 2;
}
INVOKEVIRTUAL => {
let cp_index = read_u16(&code.opcodes, pc);
@ -279,7 +272,6 @@ impl Vm {
}
}
}
pc += 2;
}
INVOKESPECIAL => {
println!("INVOKESPECIAL");
@ -300,7 +292,6 @@ impl Vm {
}
}
}
pc += 2;
}
NEW => {
let class_index = read_u16(&code.opcodes, pc);
@ -318,7 +309,6 @@ impl Vm {
self.heap.new_object(object);
}
}
pc += 2;
}
//TODO implement all opcodes
_ => {