-
Notifications
You must be signed in to change notification settings - Fork 4
Expand file tree
/
Copy pathload-array.lisp
More file actions
101 lines (99 loc) · 5.08 KB
/
load-array.lisp
File metadata and controls
101 lines (99 loc) · 5.08 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
(in-package #:numpy-file-format)
(defun load-array-metadata (filename)
(with-open-file (stream filename :direction :input :element-type '(unsigned-byte 8))
;; The first 6 bytes are a magic string: exactly \x93NUMPY.
(unless (and (eql (read-byte stream) #x93)
(eql (read-byte stream) 78) ; N
(eql (read-byte stream) 85) ; U
(eql (read-byte stream) 77) ; M
(eql (read-byte stream) 80) ; P
(eql (read-byte stream) 89)) ; Y
(error "Not a Numpy file."))
(let* (;; The next 1 byte is an unsigned byte: the major version number
;; of the file format, e.g. \x01.
(major-version (read-byte stream))
;; The next 1 byte is an unsigned byte: the minor version number
;; of the file format, e.g. \x00.
(minor-version (read-byte stream))
(header-len
(if (= major-version 1)
;; Version 1.0: The next 2 bytes form a little-endian
;; unsigned int: the length of the header data HEADER_LEN.
(logior (ash (read-byte stream) 0)
(ash (read-byte stream) 8))
;; Version 2.0: The next 4 bytes form a little-endian
;; unsigned int: the length of the header data HEADER_LEN.
(logior (ash (read-byte stream) 0)
(ash (read-byte stream) 8)
(ash (read-byte stream) 16)
(ash (read-byte stream) 24)))))
(declare (ignore minor-version))
;; The next HEADER_LEN bytes form the header data describing the
;; array’s format. It is an ASCII string which contains a Python
;; literal expression of a dictionary. It is terminated by a newline
;; (\n) and padded with spaces (\x20) to make the total of len(magic
;; string) + 2 + len(length) + HEADER_LEN be evenly divisible by 64
;; for alignment purposes.
(let ((dict (read-python-object-from-string
(let ((buffer (make-string header-len :element-type 'base-char)))
(loop for index from 0 below header-len do
(setf (schar buffer index) (code-char (read-byte stream))))
buffer))))
(values
(gethash "shape" dict)
(dtype-from-code (gethash "descr" dict))
(gethash "fortran_order" dict)
(* 8 (+ header-len (if (= 1 major-version) 10 12))))))))
(defun load-array (filename)
;; We actually open the file twice, once to read the metadata - one byte
;; at a time, and once to read the array contents with a suitable element
;; type (e.g. (unsigned-byte 32) for single precision floating-point
;; numbers).
(multiple-value-bind (dimensions dtype fortran-order header-bits)
(load-array-metadata filename)
(let* ((element-type (dtype-type dtype))
(array (make-array dimensions :element-type element-type))
(total-size (array-total-size array))
(chunk-size (if (subtypep element-type 'complex)
(/ (dtype-size dtype) 2)
(dtype-size dtype)))
(stream-element-type
(if (or (eq element-type 'double-float)
(eq element-type 'single-float)
(subtypep element-type '(unsigned-byte *)))
`(unsigned-byte ,chunk-size)
`(signed-byte ,chunk-size))))
(unless (not fortran-order)
(error "Reading arrays in Fortran order is not yet supported."))
(unless (eq (dtype-endianness dtype) +endianness+)
(error "Endianness conversion is not yet supported."))
;; TODO Respect fortran-order and endianness.
(with-open-file (stream filename :element-type stream-element-type)
;; Skip the header.
(loop repeat (/ header-bits chunk-size) do (read-byte stream))
(etypecase array
((simple-array single-float)
(loop for index below total-size do
(setf (row-major-aref array index)
(ieee-floats:decode-float32 (read-byte stream)))))
((simple-array double-float)
(loop for index below total-size do
(setf (row-major-aref array index)
(ieee-floats:decode-float64 (read-byte stream)))))
((simple-array (complex single-float))
(loop for index below total-size do
(setf (row-major-aref array index)
(complex
(ieee-floats:decode-float32 (read-byte stream))
(ieee-floats:decode-float32 (read-byte stream))))))
((simple-array (complex double-float))
(loop for index below total-size do
(setf (row-major-aref array index)
(complex
(ieee-floats:decode-float64 (read-byte stream))
(ieee-floats:decode-float64 (read-byte stream))))))
((simple-array *)
(loop for index below total-size do
(setf (row-major-aref array index)
(read-byte stream))))))
array)))