Skip to content

Commit 7c24e09

Browse files
author
Scott Griffiths
committed
Making the Dtype class visible.
Adding a multiplier for dtypes so that we don't need the hacky code around bytes types. Replacement code is still a bit hacky for now though.
1 parent c718793 commit 7c24e09

File tree

11 files changed

+95
-35
lines changed

11 files changed

+95
-35
lines changed

bitstring/__init__.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@
6868
from .methods import pack
6969
from .array_ import Array
7070
from .exceptions import Error, ReadError, InterpretError, ByteAlignError, CreationError
71-
from .dtypes import MetaDtype, Register
71+
from .dtypes import MetaDtype, Register, Dtype
7272
import types
7373
from typing import List, Tuple
7474
from .utils import initialise_constants
@@ -151,7 +151,7 @@ def lsb0(self, value: bool) -> None:
151151
MetaDtype('bits', 'a bitstring object',
152152
Bits._setbits, Bits._readbits, None, False, False, False, False, None),
153153
MetaDtype('bytes', 'a bytes object',
154-
Bits._setbytes, Bits._readbytes, Bits._getbytes,False, False, False, False, None),
154+
Bits._setbytes, Bits._readbytes, Bits._getbytes,False, False, False, False, None, 8),
155155
MetaDtype('bool', 'a bool (True or False)',
156156
Bits._setbool, Bits._readbool, Bits._getbool, True, False, False, False, 1),
157157
MetaDtype('se', 'a signed exponential-Golomb code',
@@ -197,4 +197,4 @@ def lsb0(self, value: bool) -> None:
197197

198198
__all__ = ['ConstBitStream', 'BitStream', 'BitArray', 'Array',
199199
'Bits', 'pack', 'Error', 'ReadError', 'InterpretError',
200-
'ByteAlignError', 'CreationError', 'bytealigned', 'lsb0']
200+
'ByteAlignError', 'CreationError', 'bytealigned', 'lsb0', 'Dtype']

bitstring/bits.py

Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -94,8 +94,8 @@ class Bits:
9494

9595
__slots__ = ('_bitstore')
9696

97-
_options = None
98-
_register = None
97+
_options: Optional[Options] = None
98+
_register: Optional[Register] = None
9999

100100
@classmethod
101101
def _initialise_options(cls):
@@ -669,7 +669,7 @@ def _setbytes(self, data: Union[bytearray, bytes],
669669
def _readbytes(self, start: int, length: int) -> bytes:
670670
"""Read bytes and return them. Note that length is in bits."""
671671
assert length % 8 == 0
672-
assert start + length <= self.len
672+
# assert start + length <= self.len
673673
return self._bitstore.getslice(slice(start, start + length, None)).tobytes()
674674

675675
def _getbytes(self) -> bytes:
@@ -1198,17 +1198,17 @@ def _absolute_slice(self: TBits, start: int, end: int) -> TBits:
11981198

11991199
def _readtoken(self, name: str, pos: int, length: Optional[int]) -> Tuple[Union[float, int, str, None, Bits], int]:
12001200
"""Reads a token from the bitstring and returns the result."""
1201-
if length is not None and length > self.length - pos:
1202-
raise ReadError("Reading off the end of the data. "
1203-
f"Tried to read {length} bits when only {self.length - pos} available.")
12041201
dtype = Bits._register.get_dtype(name, length)
1202+
if dtype.bitlength is not None and dtype.bitlength > self.length - pos:
1203+
raise ReadError("Reading off the end of the data. "
1204+
f"Tried to read {dtype.bitlength} bits when only {self.length - pos} available.")
12051205
try:
12061206
val = dtype.read_fn(self, pos)
12071207
if isinstance(val, tuple):
12081208
return val
12091209
else:
12101210
assert length is not None
1211-
return val, pos + length
1211+
return val, pos + dtype.bitlength
12121212
except KeyError:
12131213
raise ValueError(f"Can't parse token {name}:{length}")
12141214

@@ -1362,8 +1362,6 @@ def convert_length_strings(length_: Optional[Union[str, int]]) -> Optional[int]:
13621362
if isinstance(length_, str):
13631363
if length_ in kwargs:
13641364
int_length = kwargs[length_]
1365-
if name == 'bytes':
1366-
int_length *= 8
13671365
else:
13681366
int_length = length_
13691367
return int_length
@@ -1398,6 +1396,10 @@ def convert_length_strings(length_: Optional[Union[str, int]]) -> Optional[int]:
13981396
for token in tokens:
13991397
name, length, _ = token
14001398
length = convert_length_strings(length)
1399+
# TODO: Ugly. But not as ugly as the opposite bit of code later on. This converts currently just bytes lengths.
1400+
lm = Bits._register.name_to_meta_dtype[name].length_multiplier
1401+
if lm is not None and length is not None:
1402+
length *= lm
14011403
if stretchy_token:
14021404
if name in Bits._register.unknowable_length_names():
14031405
raise Error(f"It's not possible to parse a variable length token ('{name}') after a 'filler' token.")
@@ -1415,7 +1417,12 @@ def convert_length_strings(length_: Optional[Union[str, int]]) -> Optional[int]:
14151417
if token is stretchy_token:
14161418
# Set length to the remaining bits
14171419
length = max(bits_left - bits_after_stretchy_token, 0)
1420+
# TODO: Very ugly. This converts our bitlength back to a token length (i.e. for bytes token where each length is a bytes)
1421+
lm = Bits._register.name_to_meta_dtype[name].length_multiplier
1422+
if lm is not None:
1423+
length //= lm
14181424
length = convert_length_strings(length)
1425+
14191426
value, newpos = self._readtoken(name, pos, length)
14201427
bits_left -= newpos - pos
14211428
pos = newpos

bitstring/bitstore_helpers.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
from bitstring.fp8 import fp143_fmt, fp152_fmt
1212
from bitstring.bitstore import BitStore
1313

14+
1415
byteorder: str = sys.byteorder
1516

1617
# The size of various caches used to improve performance

bitstring/bitstream.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from bitstring.bitarray import BitArray
55
from bitstring.utils import tokenparser
66
from bitstring.exceptions import ReadError, ByteAlignError, CreationError, InterpretError
7+
from bitstring.dtypes import Dtype
78
from typing import Union, List, Any, Optional, overload, TypeVar, Tuple
89
import copy
910
import numbers
@@ -276,7 +277,7 @@ def read(self, fmt: int) -> Bits:
276277
def read(self, fmt: str) -> Any:
277278
...
278279

279-
def read(self, fmt: Union[int, str]) -> Union[int, float, str, Bits, bool, bytes, None]:
280+
def read(self, fmt: Union[int, str, Dtype]) -> Union[int, float, str, Bits, bool, bytes, None]:
280281
"""Interpret next bits according to the format string and return result.
281282
282283
fmt -- Token string describing how to interpret the next bits.
@@ -320,6 +321,7 @@ def read(self, fmt: Union[int, str]) -> Union[int, float, str, Bits, bool, bytes
320321
bs = self._slice(self._pos, self._pos + fmt)
321322
self._pos += fmt
322323
return bs
324+
323325
p = self._pos
324326
_, token = tokenparser(fmt)
325327
if len(token) != 1:

bitstring/dtypes.py

Lines changed: 24 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,15 @@
11
from __future__ import annotations
22

33
import functools
4-
from bitstring.exceptions import InterpretError
4+
from bitstring.exceptions import InterpretError, CreationError
55
from bitstring.bits import Bits
66
from typing import Optional, Dict, List, Any
77
from bitstring.utils import parse_name_length_token
88

99

1010
class Dtype:
1111

12-
__slots__ = ('name', 'length', 'read_fn', 'set_fn', 'get_fn', 'is_integer', 'is_signed', 'is_float', 'is_fixed_length', 'is_unknown_length')
12+
__slots__ = ('name', 'length', 'bitlength', 'read_fn', 'set_fn', 'get_fn', 'is_integer', 'is_signed', 'is_float', 'is_fixed_length', 'is_unknown_length')
1313

1414
def __new__(cls, token: Optional[str] = None) -> Dtype:
1515
if token is not None:
@@ -20,17 +20,23 @@ def __new__(cls, token: Optional[str] = None) -> Dtype:
2020
else:
2121
return super(Dtype, cls).__new__(cls)
2222

23+
def __hash__(self) -> int:
24+
return 0 # TODO: Optimise :)
25+
2326
@classmethod
24-
def create(cls, name: str, length: Optional[int], set_fn, read_fn, get_fn, is_integer, is_float, is_signed,
25-
is_unknown_length, is_fixed_length) -> Dtype:
27+
def create(cls, name: str, length: Optional[int], set_fn, read_fn, get_fn, is_integer: bool, is_float: bool, is_signed: bool,
28+
is_unknown_length: bool, is_fixed_length: bool, length_multiplier: Optional[int]) -> Dtype:
2629
x = cls.__new__(cls)
2730
x.name = name
2831
x.length = length
29-
x.read_fn = functools.partial(read_fn, length=length)
32+
x.bitlength = length
33+
if length_multiplier is not None:
34+
x.bitlength *= length_multiplier
35+
x.read_fn = functools.partial(read_fn, length=x.bitlength)
3036
if set_fn is None:
3137
x.set_fn = None
3238
else:
33-
x.set_fn = functools.partial(set_fn, length=length)
39+
x.set_fn = functools.partial(set_fn, length=x.bitlength)
3440
x.get_fn = get_fn
3541
x.is_integer = is_integer
3642
x.is_signed = is_signed
@@ -57,7 +63,7 @@ class MetaDtype:
5763
# Represents a class of dtypes, such as uint or float, rather than a concrete dtype such as uint8.
5864

5965
def __init__(self, name: str, description: str, set_fn, read_fn, get_fn, is_integer: bool, is_float: bool, is_signed: bool,
60-
is_unknown_length: bool, length: Optional[int] = None):
66+
is_unknown_length: bool, length: Optional[int] = None, length_multiplier: Optional[int] = None):
6167
# Consistency checks
6268
if is_unknown_length and length is not None:
6369
raise ValueError("Can't set is_unknown_length and give a value for length.")
@@ -72,6 +78,7 @@ def __init__(self, name: str, description: str, set_fn, read_fn, get_fn, is_inte
7278
self.is_fixed_length = length is not None
7379
self.is_unknown_length = is_unknown_length
7480
self.length = length
81+
self.length_multiplier = length_multiplier
7582

7683
self.set_fn = set_fn
7784
self.read_fn = read_fn # With a start and usually a length
@@ -82,7 +89,7 @@ def getDtype(self, length: Optional[int] = None) -> Dtype:
8289
if not self.is_fixed_length and not self.is_unknown_length:
8390
raise ValueError(f"No length given for dtype '{self.name}', and meta type is not fixed length.")
8491
d = Dtype.create(self.name, None, self.set_fn, self.read_fn, self.get_fn, self.is_integer, self.is_float, self.is_signed,
85-
self.is_unknown_length, self.is_fixed_length)
92+
self.is_unknown_length, self.is_fixed_length, self.length_multiplier)
8693
return d
8794
if self.is_unknown_length:
8895
raise ValueError("Length shouldn't be supplied for dtypes that are variable length.")
@@ -91,7 +98,7 @@ def getDtype(self, length: Optional[int] = None) -> Dtype:
9198
raise ValueError # TODO
9299
length = self.length
93100
d = Dtype.create(self.name, length, self.set_fn, self.read_fn, self.get_fn, self.is_integer, self.is_float, self.is_signed,
94-
self.is_unknown_length, self.is_fixed_length)
101+
self.is_unknown_length, self.is_fixed_length, self.length_multiplier)
95102
return d
96103

97104

@@ -100,6 +107,7 @@ class Register:
100107
_instance: Optional[Register] = None
101108

102109
def __new__(cls) -> Register:
110+
# Singleton. Only one Register instance can ever exist.
103111
if cls._instance is None:
104112
cls._instance = super(Register, cls).__new__(cls)
105113
cls.name_to_meta_dtype: Dict[str, MetaDtype] = {}
@@ -122,11 +130,16 @@ def get_dtype(cls, name: str, length: Optional[int]) -> Dtype:
122130
d = meta_type.getDtype(length)
123131
# Test if the length makes sense by trying out the getter. # TODO: Optimise!
124132
if length != 0 and not d.is_unknown_length:
125-
temp = Bits(length)
133+
if meta_type.length_multiplier is not None:
134+
length *= meta_type.length_multiplier
135+
try:
136+
temp = Bits(length)
137+
except CreationError as e:
138+
raise ValueError(f"Invalid Dtype: {e}")
126139
try:
127140
_ = d.read_fn(temp, 0)
128141
except InterpretError as e:
129-
raise ValueError(f"Invalid Dtype: {e.msg}")
142+
raise ValueError(f"Invalid Dtype: {e}")
130143
return d
131144

132145
# TODO: This should be only calculated if the register has been altered since the last time it was called.

bitstring/methods.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,10 @@ def pack(fmt: Union[str, List[str]], *values, **kwargs) -> BitStream:
6969
continue
7070
if length is not None:
7171
length = int(length)
72+
# TODO: ugly
73+
lm = Bits._register.name_to_meta_dtype[name].length_multiplier
74+
if lm is not None:
75+
length *= lm
7276
if value is None and name != 'pad':
7377
# Take the next value from the ones provided
7478
value = next(value_iter)

bitstring/utils.py

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ def structparser(m: Match[str]) -> List[str]:
9595

9696

9797
@functools.lru_cache(CACHE_SIZE)
98-
def parse_name_length_token(fmt: str) -> Tuple[str, int]:
98+
def parse_name_length_token(fmt: str) -> Tuple[str, Optional[int]]:
9999
# Any single token with just a name and length
100100
m = SINGLE_STRUCT_PACK_RE.match(fmt)
101101
if m:
@@ -127,8 +127,8 @@ def parse_name_length_token(fmt: str) -> Tuple[str, int]:
127127
raise ValueError(f"{name} tokens can only be {token_length} bits long, not {length} bits.")
128128
length = token_length
129129

130-
if length is None:
131-
length = 0
130+
# if length is None:
131+
# length = 0
132132
return name, length
133133

134134

@@ -226,9 +226,6 @@ def tokenparser(fmt: str, keys: Tuple[str, ...] = ()) -> \
226226
length = int(length)
227227
if length < 0:
228228
raise Error
229-
# For the 'bytes' token convert length to bits.
230-
if name == 'bytes':
231-
length *= 8
232229
except Error:
233230
raise ValueError("Can't read a token with a negative length.")
234231
except ValueError:

doc/interpretation.rst

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,15 @@ Note that these properties can potentially be very expensive in terms of both co
1111

1212
If you're in an interactive session then the pretty-print method :meth:`~Bits.pp` can be useful as it will only convert the bitstring one chunk at a time for display.
1313

14+
Dtypes
15+
------
16+
17+
A data type (or 'dtype') concept is used in the bitstring module to encapsulate how to create, parse and present different bit interpretations.
18+
19+
.. class:: Dtype(token: str | None = None)
20+
21+
Creates a :class:`Dtype` object based on the `token` string.
22+
1423

1524
Short Interpretations
1625
---------------------

tests/test_bitstream.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3266,12 +3266,12 @@ def testRolErrors(self):
32663266
a.rol(-1)
32673267

32683268
def testBytesToken(self):
3269-
a = BitStream('0x010203')
3269+
a = BitStream('0x510203')
32703270
b = a.read('bytes:1')
32713271
self.assertTrue(isinstance(b, bytes))
3272-
self.assertEqual(b, b'\x01')
3272+
self.assertEqual(b, b'\x51')
32733273
x, y, z = a.unpack('uint:4, bytes:2, uint')
3274-
self.assertEqual(x, 0)
3274+
self.assertEqual(x, 5)
32753275
self.assertEqual(y, b'\x10\x20')
32763276
self.assertEqual(z, 3)
32773277
s = pack('bytes:4', b'abcd')
@@ -3770,7 +3770,7 @@ def testLengthKeywordErrors(self):
37703770
a = pack('uint:p=33', p=12)
37713771
with self.assertRaises(ValueError):
37723772
a.unpack('uint:p')
3773-
with self.assertRaises(TypeError):
3773+
with self.assertRaises(ValueError):
37743774
a.unpack('uint:p', p='a_string')
37753775

37763776

tests/test_bitstring.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ class ModuleData(unittest.TestCase):
2323
def testAll(self):
2424
exported = ['ConstBitStream', 'BitStream', 'BitArray',
2525
'Bits', 'pack', 'Error', 'ReadError', 'Array',
26-
'InterpretError', 'ByteAlignError', 'CreationError', 'bytealigned', 'lsb0']
26+
'InterpretError', 'ByteAlignError', 'CreationError', 'bytealigned', 'lsb0', 'Dtype']
2727
self.assertEqual(set(bitstring.__all__), set(exported))
2828

2929
def testReverseDict(self):

0 commit comments

Comments
 (0)