Skip to content

Commit 9792001

Browse files
authored
Add newtype of CodeUnits (#6241)
1 parent d8a4a09 commit 9792001

File tree

4 files changed

+77
-48
lines changed

4 files changed

+77
-48
lines changed

compiler/codegen/src/ir.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,8 @@ use crate::{IndexMap, IndexSet, error::InternalError};
44
use rustpython_compiler_core::{
55
OneIndexed, SourceLocation,
66
bytecode::{
7-
CodeFlags, CodeObject, CodeUnit, ConstantData, InstrDisplayContext, Instruction, Label,
8-
OpArg, PyCodeLocationInfoKind,
7+
CodeFlags, CodeObject, CodeUnit, CodeUnits, ConstantData, InstrDisplayContext, Instruction,
8+
Label, OpArg, PyCodeLocationInfoKind,
99
},
1010
};
1111

@@ -214,7 +214,7 @@ impl CodeInfo {
214214
qualname: qualname.unwrap_or(obj_name),
215215

216216
max_stackdepth,
217-
instructions: instructions.into_boxed_slice(),
217+
instructions: CodeUnits::from(instructions),
218218
locations: locations.into_boxed_slice(),
219219
constants: constants.into_iter().collect(),
220220
names: name_cache.into_iter().collect(),

compiler/core/src/bytecode.rs

Lines changed: 67 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,16 @@
11
//! Implement python as a virtual machine with bytecode. This module
22
//! implements bytecode structure.
33
4-
use crate::{OneIndexed, SourceLocation};
4+
use crate::{
5+
marshal::MarshalError,
6+
{OneIndexed, SourceLocation},
7+
};
58
use bitflags::bitflags;
69
use itertools::Itertools;
710
use malachite_bigint::BigInt;
811
use num_complex::Complex64;
912
use rustpython_wtf8::{Wtf8, Wtf8Buf};
10-
use std::{collections::BTreeSet, fmt, hash, marker::PhantomData, mem};
13+
use std::{collections::BTreeSet, fmt, hash, marker::PhantomData, mem, ops::Deref};
1114

1215
#[derive(Copy, Clone, Debug, Hash, PartialEq, Eq)]
1316
#[repr(i8)]
@@ -195,7 +198,7 @@ impl ConstantBag for BasicBag {
195198
/// a code object. Also a module has a code object.
196199
#[derive(Clone)]
197200
pub struct CodeObject<C: Constant = ConstantData> {
198-
pub instructions: Box<[CodeUnit]>,
201+
pub instructions: CodeUnits,
199202
pub locations: Box<[SourceLocation]>,
200203
pub flags: CodeFlags,
201204
/// Number of positional-only arguments
@@ -257,6 +260,12 @@ impl OpArgByte {
257260
}
258261
}
259262

263+
impl From<u8> for OpArgByte {
264+
fn from(raw: u8) -> Self {
265+
Self(raw)
266+
}
267+
}
268+
260269
impl fmt::Debug for OpArgByte {
261270
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
262271
self.0.fmt(f)
@@ -808,14 +817,14 @@ impl From<Instruction> for u8 {
808817
}
809818

810819
impl TryFrom<u8> for Instruction {
811-
type Error = crate::marshal::MarshalError;
820+
type Error = MarshalError;
812821

813822
#[inline]
814-
fn try_from(value: u8) -> Result<Self, crate::marshal::MarshalError> {
823+
fn try_from(value: u8) -> Result<Self, MarshalError> {
815824
if value <= u8::from(LAST_INSTRUCTION) {
816825
Ok(unsafe { std::mem::transmute::<u8, Self>(value) })
817826
} else {
818-
Err(crate::marshal::MarshalError::InvalidBytecode)
827+
Err(MarshalError::InvalidBytecode)
819828
}
820829
}
821830
}
@@ -835,6 +844,58 @@ impl CodeUnit {
835844
}
836845
}
837846

847+
impl TryFrom<&[u8]> for CodeUnit {
848+
type Error = MarshalError;
849+
850+
fn try_from(value: &[u8]) -> Result<Self, Self::Error> {
851+
match value.len() {
852+
2 => Ok(Self::new(value[0].try_into()?, value[1].into())),
853+
_ => Err(Self::Error::InvalidBytecode),
854+
}
855+
}
856+
}
857+
858+
#[derive(Clone)]
859+
pub struct CodeUnits(Box<[CodeUnit]>);
860+
861+
impl TryFrom<&[u8]> for CodeUnits {
862+
type Error = MarshalError;
863+
864+
fn try_from(value: &[u8]) -> Result<Self, Self::Error> {
865+
if !value.len().is_multiple_of(2) {
866+
return Err(Self::Error::InvalidBytecode);
867+
}
868+
869+
value.chunks_exact(2).map(CodeUnit::try_from).collect()
870+
}
871+
}
872+
873+
impl<const N: usize> From<[CodeUnit; N]> for CodeUnits {
874+
fn from(value: [CodeUnit; N]) -> Self {
875+
Self(Box::from(value))
876+
}
877+
}
878+
879+
impl From<Vec<CodeUnit>> for CodeUnits {
880+
fn from(value: Vec<CodeUnit>) -> Self {
881+
Self(value.into_boxed_slice())
882+
}
883+
}
884+
885+
impl FromIterator<CodeUnit> for CodeUnits {
886+
fn from_iter<T: IntoIterator<Item = CodeUnit>>(iter: T) -> Self {
887+
Self(iter.into_iter().collect())
888+
}
889+
}
890+
891+
impl Deref for CodeUnits {
892+
type Target = [CodeUnit];
893+
894+
fn deref(&self) -> &Self::Target {
895+
&self.0
896+
}
897+
}
898+
838899
use self::Instruction::*;
839900

840901
bitflags! {

compiler/core/src/marshal.rs

Lines changed: 2 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -165,19 +165,6 @@ impl<'a> ReadBorrowed<'a> for &'a [u8] {
165165
}
166166
}
167167

168-
/// Parses bytecode bytes into CodeUnit instructions.
169-
/// Each instruction is 2 bytes: opcode and argument.
170-
pub fn parse_instructions_from_bytes(bytes: &[u8]) -> Result<Box<[CodeUnit]>> {
171-
bytes
172-
.chunks_exact(2)
173-
.map(|cu| {
174-
let op = Instruction::try_from(cu[0])?;
175-
let arg = OpArgByte(cu[1]);
176-
Ok(CodeUnit { op, arg })
177-
})
178-
.collect()
179-
}
180-
181168
pub struct Cursor<B> {
182169
pub data: B,
183170
pub position: usize,
@@ -197,8 +184,8 @@ pub fn deserialize_code<R: Read, Bag: ConstantBag>(
197184
bag: Bag,
198185
) -> Result<CodeObject<Bag::Constant>> {
199186
let len = rdr.read_u32()?;
200-
let instructions = rdr.read_slice(len * 2)?;
201-
let instructions = parse_instructions_from_bytes(instructions)?;
187+
let raw_instructions = rdr.read_slice(len * 2)?;
188+
let instructions = CodeUnits::try_from(raw_instructions)?;
202189

203190
let len = rdr.read_u32()?;
204191
let locations = (0..len)

vm/src/builtins/code.rs

Lines changed: 5 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,10 @@
1-
/*! Infamous code object. The python class `code`
2-
3-
*/
1+
//! Infamous code object. The python class `code`
42
53
use super::{PyBytesRef, PyStrRef, PyTupleRef, PyType, PyTypeRef};
64
use crate::{
75
AsObject, Context, Py, PyObject, PyObjectRef, PyPayload, PyResult, VirtualMachine,
86
builtins::PyStrInterned,
9-
bytecode::{self, AsBag, BorrowedConstant, CodeFlags, CodeUnit, Constant, ConstantBag},
7+
bytecode::{self, AsBag, BorrowedConstant, CodeFlags, Constant, ConstantBag},
108
class::{PyClassImpl, StaticType},
119
convert::ToPyObject,
1210
frozen,
@@ -15,11 +13,7 @@ use crate::{
1513
};
1614
use malachite_bigint::BigInt;
1715
use num_traits::Zero;
18-
use rustpython_compiler_core::{
19-
OneIndexed,
20-
bytecode::PyCodeLocationInfoKind,
21-
marshal::{MarshalError, parse_instructions_from_bytes},
22-
};
16+
use rustpython_compiler_core::{OneIndexed, bytecode::CodeUnits, bytecode::PyCodeLocationInfoKind};
2317
use std::{borrow::Borrow, fmt, ops::Deref};
2418

2519
/// State for iterating through code address ranges
@@ -457,7 +451,7 @@ impl Constructor for PyCode {
457451

458452
// Parse and validate bytecode from bytes
459453
let bytecode_bytes = args.co_code.as_bytes();
460-
let instructions = parse_bytecode(bytecode_bytes)
454+
let instructions = CodeUnits::try_from(bytecode_bytes)
461455
.map_err(|e| vm.new_value_error(format!("invalid bytecode: {}", e)))?;
462456

463457
// Convert constants
@@ -925,7 +919,7 @@ impl PyCode {
925919
let instructions = match co_code {
926920
OptionalArg::Present(code_bytes) => {
927921
// Parse and validate bytecode from bytes
928-
parse_bytecode(code_bytes.as_bytes())
922+
CodeUnits::try_from(code_bytes.as_bytes())
929923
.map_err(|e| vm.new_value_error(format!("invalid bytecode: {}", e)))?
930924
}
931925
OptionalArg::Missing => self.code.instructions.clone(),
@@ -1033,19 +1027,6 @@ impl ToPyObject for bytecode::CodeObject {
10331027
}
10341028
}
10351029

1036-
/// Validates and parses bytecode bytes into CodeUnit instructions.
1037-
/// Returns MarshalError if bytecode is invalid (odd length or contains invalid opcodes).
1038-
/// Note: Returning MarshalError is not necessary at this point because this is not a part of marshalling API.
1039-
/// However, we (temporarily) reuse MarshalError for simplicity.
1040-
fn parse_bytecode(bytecode_bytes: &[u8]) -> Result<Box<[CodeUnit]>, MarshalError> {
1041-
// Bytecode must have even length (each instruction is 2 bytes)
1042-
if !bytecode_bytes.len().is_multiple_of(2) {
1043-
return Err(MarshalError::InvalidBytecode);
1044-
}
1045-
1046-
parse_instructions_from_bytes(bytecode_bytes)
1047-
}
1048-
10491030
// Helper struct for reading linetable
10501031
struct LineTableReader<'a> {
10511032
data: &'a [u8],

0 commit comments

Comments
 (0)