Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions compiler/codegen/src/ir.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ use crate::{IndexMap, IndexSet, error::InternalError};
use rustpython_compiler_core::{
OneIndexed, SourceLocation,
bytecode::{
CodeFlags, CodeObject, CodeUnit, ConstantData, InstrDisplayContext, Instruction, Label,
OpArg, PyCodeLocationInfoKind,
CodeFlags, CodeObject, CodeUnit, CodeUnits, ConstantData, InstrDisplayContext, Instruction,
Label, OpArg, PyCodeLocationInfoKind,
},
};

Expand Down Expand Up @@ -214,7 +214,7 @@ impl CodeInfo {
qualname: qualname.unwrap_or(obj_name),

max_stackdepth,
instructions: instructions.into_boxed_slice(),
instructions: CodeUnits::from(instructions),
locations: locations.into_boxed_slice(),
constants: constants.into_iter().collect(),
names: name_cache.into_iter().collect(),
Expand Down
73 changes: 67 additions & 6 deletions compiler/core/src/bytecode.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,16 @@
//! Implement python as a virtual machine with bytecode. This module
//! implements bytecode structure.

use crate::{OneIndexed, SourceLocation};
use crate::{
marshal::MarshalError,
{OneIndexed, SourceLocation},
};
use bitflags::bitflags;
use itertools::Itertools;
use malachite_bigint::BigInt;
use num_complex::Complex64;
use rustpython_wtf8::{Wtf8, Wtf8Buf};
use std::{collections::BTreeSet, fmt, hash, marker::PhantomData, mem};
use std::{collections::BTreeSet, fmt, hash, marker::PhantomData, mem, ops::Deref};

#[derive(Copy, Clone, Debug, Hash, PartialEq, Eq)]
#[repr(i8)]
Expand Down Expand Up @@ -195,7 +198,7 @@ impl ConstantBag for BasicBag {
/// a code object. Also a module has a code object.
#[derive(Clone)]
pub struct CodeObject<C: Constant = ConstantData> {
pub instructions: Box<[CodeUnit]>,
pub instructions: CodeUnits,
pub locations: Box<[SourceLocation]>,
pub flags: CodeFlags,
/// Number of positional-only arguments
Expand Down Expand Up @@ -257,6 +260,12 @@ impl OpArgByte {
}
}

impl From<u8> for OpArgByte {
fn from(raw: u8) -> Self {
Self(raw)
}
}

impl fmt::Debug for OpArgByte {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
self.0.fmt(f)
Expand Down Expand Up @@ -808,14 +817,14 @@ impl From<Instruction> for u8 {
}

impl TryFrom<u8> for Instruction {
type Error = crate::marshal::MarshalError;
type Error = MarshalError;

#[inline]
fn try_from(value: u8) -> Result<Self, crate::marshal::MarshalError> {
fn try_from(value: u8) -> Result<Self, MarshalError> {
if value <= u8::from(LAST_INSTRUCTION) {
Ok(unsafe { std::mem::transmute::<u8, Self>(value) })
} else {
Err(crate::marshal::MarshalError::InvalidBytecode)
Err(MarshalError::InvalidBytecode)
}
}
}
Expand All @@ -835,6 +844,58 @@ impl CodeUnit {
}
}

impl TryFrom<&[u8]> for CodeUnit {
type Error = MarshalError;

fn try_from(value: &[u8]) -> Result<Self, Self::Error> {
match value.len() {
2 => Ok(Self::new(value[0].try_into()?, value[1].into())),
_ => Err(Self::Error::InvalidBytecode),
}
}
}

#[derive(Clone)]
pub struct CodeUnits(Box<[CodeUnit]>);

impl TryFrom<&[u8]> for CodeUnits {
type Error = MarshalError;

fn try_from(value: &[u8]) -> Result<Self, Self::Error> {
if !value.len().is_multiple_of(2) {
return Err(Self::Error::InvalidBytecode);
}

value.chunks_exact(2).map(CodeUnit::try_from).collect()
}
}

impl<const N: usize> From<[CodeUnit; N]> for CodeUnits {
fn from(value: [CodeUnit; N]) -> Self {
Self(Box::from(value))
}
}

impl From<Vec<CodeUnit>> for CodeUnits {
fn from(value: Vec<CodeUnit>) -> Self {
Self(value.into_boxed_slice())
}
}

impl FromIterator<CodeUnit> for CodeUnits {
fn from_iter<T: IntoIterator<Item = CodeUnit>>(iter: T) -> Self {
Self(iter.into_iter().collect())
}
}

impl Deref for CodeUnits {
type Target = [CodeUnit];

fn deref(&self) -> &Self::Target {
&self.0
}
}

use self::Instruction::*;

bitflags! {
Expand Down
17 changes: 2 additions & 15 deletions compiler/core/src/marshal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -165,19 +165,6 @@ impl<'a> ReadBorrowed<'a> for &'a [u8] {
}
}

/// Parses bytecode bytes into CodeUnit instructions.
/// Each instruction is 2 bytes: opcode and argument.
pub fn parse_instructions_from_bytes(bytes: &[u8]) -> Result<Box<[CodeUnit]>> {
bytes
.chunks_exact(2)
.map(|cu| {
let op = Instruction::try_from(cu[0])?;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Where is this used in new code?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

let arg = OpArgByte(cu[1]);
Ok(CodeUnit { op, arg })
})
.collect()
}

pub struct Cursor<B> {
pub data: B,
pub position: usize,
Expand All @@ -197,8 +184,8 @@ pub fn deserialize_code<R: Read, Bag: ConstantBag>(
bag: Bag,
) -> Result<CodeObject<Bag::Constant>> {
let len = rdr.read_u32()?;
let instructions = rdr.read_slice(len * 2)?;
let instructions = parse_instructions_from_bytes(instructions)?;
let raw_instructions = rdr.read_slice(len * 2)?;
let instructions = CodeUnits::try_from(raw_instructions)?;

let len = rdr.read_u32()?;
let locations = (0..len)
Expand Down
29 changes: 5 additions & 24 deletions vm/src/builtins/code.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,10 @@
/*! Infamous code object. The python class `code`

*/
//! Infamous code object. The python class `code`

use super::{PyBytesRef, PyStrRef, PyTupleRef, PyType, PyTypeRef};
use crate::{
AsObject, Context, Py, PyObject, PyObjectRef, PyPayload, PyResult, VirtualMachine,
builtins::PyStrInterned,
bytecode::{self, AsBag, BorrowedConstant, CodeFlags, CodeUnit, Constant, ConstantBag},
bytecode::{self, AsBag, BorrowedConstant, CodeFlags, Constant, ConstantBag},
class::{PyClassImpl, StaticType},
convert::ToPyObject,
frozen,
Expand All @@ -15,11 +13,7 @@ use crate::{
};
use malachite_bigint::BigInt;
use num_traits::Zero;
use rustpython_compiler_core::{
OneIndexed,
bytecode::PyCodeLocationInfoKind,
marshal::{MarshalError, parse_instructions_from_bytes},
};
use rustpython_compiler_core::{OneIndexed, bytecode::CodeUnits, bytecode::PyCodeLocationInfoKind};
use std::{borrow::Borrow, fmt, ops::Deref};

/// State for iterating through code address ranges
Expand Down Expand Up @@ -457,7 +451,7 @@ impl Constructor for PyCode {

// Parse and validate bytecode from bytes
let bytecode_bytes = args.co_code.as_bytes();
let instructions = parse_bytecode(bytecode_bytes)
let instructions = CodeUnits::try_from(bytecode_bytes)
.map_err(|e| vm.new_value_error(format!("invalid bytecode: {}", e)))?;

// Convert constants
Expand Down Expand Up @@ -925,7 +919,7 @@ impl PyCode {
let instructions = match co_code {
OptionalArg::Present(code_bytes) => {
// Parse and validate bytecode from bytes
parse_bytecode(code_bytes.as_bytes())
CodeUnits::try_from(code_bytes.as_bytes())
.map_err(|e| vm.new_value_error(format!("invalid bytecode: {}", e)))?
}
OptionalArg::Missing => self.code.instructions.clone(),
Expand Down Expand Up @@ -1033,19 +1027,6 @@ impl ToPyObject for bytecode::CodeObject {
}
}

/// Validates and parses bytecode bytes into CodeUnit instructions.
/// Returns MarshalError if bytecode is invalid (odd length or contains invalid opcodes).
/// Note: Returning MarshalError is not necessary at this point because this is not a part of marshalling API.
/// However, we (temporarily) reuse MarshalError for simplicity.
fn parse_bytecode(bytecode_bytes: &[u8]) -> Result<Box<[CodeUnit]>, MarshalError> {
// Bytecode must have even length (each instruction is 2 bytes)
if !bytecode_bytes.len().is_multiple_of(2) {
return Err(MarshalError::InvalidBytecode);
}

parse_instructions_from_bytes(bytecode_bytes)
}

// Helper struct for reading linetable
struct LineTableReader<'a> {
data: &'a [u8],
Expand Down
Loading