Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
186 changes: 119 additions & 67 deletions Lib/_pyio.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,16 @@
_setmode = None

import io
from io import (__all__, SEEK_SET, SEEK_CUR, SEEK_END)
from io import (__all__, SEEK_SET, SEEK_CUR, SEEK_END, Reader, Writer) # noqa: F401

valid_seek_flags = {0, 1, 2} # Hardwired values
if hasattr(os, 'SEEK_HOLE') :
valid_seek_flags.add(os.SEEK_HOLE)
valid_seek_flags.add(os.SEEK_DATA)

# open() uses st_blksize whenever we can
DEFAULT_BUFFER_SIZE = 8 * 1024 # bytes
# open() uses max(min(blocksize, 8 MiB), DEFAULT_BUFFER_SIZE)
# when the device block size is available.
DEFAULT_BUFFER_SIZE = 128 * 1024 # bytes

# NOTE: Base classes defined here are registered with the "official" ABCs
# defined in io.py. We don't use real inheritance though, because we don't want
Expand Down Expand Up @@ -123,10 +124,10 @@ def open(file, mode="r", buffering=-1, encoding=None, errors=None,
the size of a fixed-size chunk buffer. When no buffering argument is
given, the default buffering policy works as follows:

* Binary files are buffered in fixed-size chunks; the size of the buffer
is chosen using a heuristic trying to determine the underlying device's
"block size" and falling back on `io.DEFAULT_BUFFER_SIZE`.
On many systems, the buffer will typically be 4096 or 8192 bytes long.
* Binary files are buffered in fixed-size chunks; the size of the buffer
is max(min(blocksize, 8 MiB), DEFAULT_BUFFER_SIZE)
when the device block size is available.
On most systems, the buffer will typically be 128 kilobytes long.

* "Interactive" text files (files for which isatty() returns True)
use line buffering. Other text files use the policy described above
Expand Down Expand Up @@ -238,18 +239,11 @@ def open(file, mode="r", buffering=-1, encoding=None, errors=None,
result = raw
try:
line_buffering = False
if buffering == 1 or buffering < 0 and raw.isatty():
if buffering == 1 or buffering < 0 and raw._isatty_open_only():
buffering = -1
line_buffering = True
if buffering < 0:
buffering = DEFAULT_BUFFER_SIZE
try:
bs = os.fstat(raw.fileno()).st_blksize
except (OSError, AttributeError):
pass
else:
if bs > 1:
buffering = bs
buffering = max(min(raw._blksize, 8192 * 1024), DEFAULT_BUFFER_SIZE)
if buffering < 0:
raise ValueError("invalid buffering size")
if buffering == 0:
Expand Down Expand Up @@ -941,22 +935,22 @@ def read1(self, size=-1):
return self.read(size)

def write(self, b):
if self.closed:
raise ValueError("write to closed file")
if isinstance(b, str):
raise TypeError("can't write str to binary stream")
with memoryview(b) as view:
if self.closed:
raise ValueError("write to closed file")

n = view.nbytes # Size of any bytes-like object
if n == 0:
return 0
pos = self._pos
if pos > len(self._buffer):
# Inserts null bytes between the current end of the file
# and the new write position.
padding = b'\x00' * (pos - len(self._buffer))
self._buffer += padding
self._buffer[pos:pos + n] = b
self._pos += n
if n == 0:
return 0

pos = self._pos
if pos > len(self._buffer):
# Pad buffer to pos with null bytes.
self._buffer.resize(pos)
self._buffer[pos:pos + n] = view
self._pos += n
return n

def seek(self, pos, whence=0):
Expand Down Expand Up @@ -1470,6 +1464,17 @@ def write(self, b):
return BufferedWriter.write(self, b)


def _new_buffersize(bytes_read):
# Parallels _io/fileio.c new_buffersize
if bytes_read > 65536:
addend = bytes_read >> 3
else:
addend = 256 + bytes_read
if addend < DEFAULT_BUFFER_SIZE:
addend = DEFAULT_BUFFER_SIZE
return bytes_read + addend


class FileIO(RawIOBase):
_fd = -1
_created = False
Expand All @@ -1494,6 +1499,7 @@ def __init__(self, file, mode='r', closefd=True, opener=None):
"""
if self._fd >= 0:
# Have to close the existing file first.
self._stat_atopen = None
try:
if self._closefd:
os.close(self._fd)
Expand Down Expand Up @@ -1573,18 +1579,15 @@ def __init__(self, file, mode='r', closefd=True, opener=None):
os.set_inheritable(fd, False)

self._closefd = closefd
fdfstat = os.fstat(fd)
self._stat_atopen = os.fstat(fd)
try:
if stat.S_ISDIR(fdfstat.st_mode):
if stat.S_ISDIR(self._stat_atopen.st_mode):
raise IsADirectoryError(errno.EISDIR,
os.strerror(errno.EISDIR), file)
except AttributeError:
# Ignore the AttributeError if stat.S_ISDIR or errno.EISDIR
# don't exist.
pass
self._blksize = getattr(fdfstat, 'st_blksize', 0)
if self._blksize <= 1:
self._blksize = DEFAULT_BUFFER_SIZE

if _setmode:
# don't translate newlines (\r\n <=> \n)
Expand All @@ -1601,6 +1604,7 @@ def __init__(self, file, mode='r', closefd=True, opener=None):
if e.errno != errno.ESPIPE:
raise
except:
self._stat_atopen = None
if owned_fd is not None:
os.close(owned_fd)
raise
Expand Down Expand Up @@ -1629,6 +1633,17 @@ def __repr__(self):
return ('<%s name=%r mode=%r closefd=%r>' %
(class_name, name, self.mode, self._closefd))

@property
def _blksize(self):
if self._stat_atopen is None:
return DEFAULT_BUFFER_SIZE

blksize = getattr(self._stat_atopen, "st_blksize", 0)
# WASI sets blsize to 0
if not blksize:
return DEFAULT_BUFFER_SIZE
return blksize

def _checkReadable(self):
if not self._readable:
raise UnsupportedOperation('File not open for reading')
Expand All @@ -1640,7 +1655,13 @@ def _checkWritable(self, msg=None):
def read(self, size=None):
"""Read at most size bytes, returned as bytes.

Only makes one system call, so less data may be returned than requested
If size is less than 0, read all bytes in the file making
multiple read calls. See ``FileIO.readall``.

Attempts to make only one system call, retrying only per
PEP 475 (EINTR). This means less data may be returned than
requested.

In non-blocking mode, returns None if no data is available.
Return an empty bytes object at EOF.
"""
Expand All @@ -1656,45 +1677,57 @@ def read(self, size=None):
def readall(self):
"""Read all data from the file, returned as bytes.

In non-blocking mode, returns as much as is immediately available,
or None if no data is available. Return an empty bytes object at EOF.
Reads until either there is an error or read() returns size 0
(indicates EOF). If the file is already at EOF, returns an
empty bytes object.

In non-blocking mode, returns as much data as could be read
before EAGAIN. If no data is available (EAGAIN is returned
before bytes are read) returns None.
"""
self._checkClosed()
self._checkReadable()
bufsize = DEFAULT_BUFFER_SIZE
try:
pos = os.lseek(self._fd, 0, SEEK_CUR)
end = os.fstat(self._fd).st_size
if end >= pos:
bufsize = end - pos + 1
except OSError:
pass
if self._stat_atopen is None or self._stat_atopen.st_size <= 0:
bufsize = DEFAULT_BUFFER_SIZE
else:
# In order to detect end of file, need a read() of at least 1
# byte which returns size 0. Oversize the buffer by 1 byte so the
# I/O can be completed with two read() calls (one for all data, one
# for EOF) without needing to resize the buffer.
bufsize = self._stat_atopen.st_size + 1

result = bytearray()
while True:
if len(result) >= bufsize:
bufsize = len(result)
bufsize += max(bufsize, DEFAULT_BUFFER_SIZE)
n = bufsize - len(result)
try:
chunk = os.read(self._fd, n)
except BlockingIOError:
if result:
break
if self._stat_atopen.st_size > 65536:
try:
pos = os.lseek(self._fd, 0, SEEK_CUR)
if self._stat_atopen.st_size >= pos:
bufsize = self._stat_atopen.st_size - pos + 1
except OSError:
pass

result = bytearray(bufsize)
bytes_read = 0
try:
while n := os.readinto(self._fd, memoryview(result)[bytes_read:]):
bytes_read += n
if bytes_read >= len(result):
result.resize(_new_buffersize(bytes_read))
except BlockingIOError:
if not bytes_read:
return None
if not chunk: # reached the end of the file
break
result += chunk

assert len(result) - bytes_read >= 1, \
"os.readinto buffer size 0 will result in erroneous EOF / returns 0"
result.resize(bytes_read)
return bytes(result)

def readinto(self, b):
def readinto(self, buffer):
"""Same as RawIOBase.readinto()."""
m = memoryview(b).cast('B')
data = self.read(len(m))
n = len(data)
m[:n] = data
return n
self._checkClosed()
self._checkReadable()
try:
return os.readinto(self._fd, buffer)
except BlockingIOError:
return None

def write(self, b):
"""Write bytes b to file, return number written.
Expand Down Expand Up @@ -1744,6 +1777,7 @@ def truncate(self, size=None):
if size is None:
size = self.tell()
os.ftruncate(self._fd, size)
self._stat_atopen = None
return size

def close(self):
Expand All @@ -1753,6 +1787,7 @@ def close(self):
called more than once without error.
"""
if not self.closed:
self._stat_atopen = None
try:
if self._closefd and self._fd >= 0:
os.close(self._fd)
Expand Down Expand Up @@ -1791,6 +1826,21 @@ def isatty(self):
self._checkClosed()
return os.isatty(self._fd)

def _isatty_open_only(self):
"""Checks whether the file is a TTY using an open-only optimization.

TTYs are always character devices. If the interpreter knows a file is
not a character device when it would call ``isatty``, can skip that
call. Inside ``open()`` there is a fresh stat result that contains that
information. Use the stat result to skip a system call. Outside of that
context TOCTOU issues (the fd could be arbitrarily modified by
surrounding code).
"""
if (self._stat_atopen is not None
and not stat.S_ISCHR(self._stat_atopen.st_mode)):
return False
return os.isatty(self._fd)

@property
def closefd(self):
"""True if the file descriptor will be closed by close()."""
Expand Down Expand Up @@ -2015,8 +2065,7 @@ def __init__(self, buffer, encoding=None, errors=None, newline=None,
raise ValueError("invalid encoding: %r" % encoding)

if not codecs.lookup(encoding)._is_text_encoding:
msg = ("%r is not a text encoding; "
"use codecs.open() to handle arbitrary codecs")
msg = "%r is not a text encoding"
raise LookupError(msg % encoding)

if errors is None:
Expand Down Expand Up @@ -2524,9 +2573,12 @@ def read(self, size=None):
size = size_index()
decoder = self._decoder or self._get_decoder()
if size < 0:
chunk = self.buffer.read()
if chunk is None:
raise BlockingIOError("Read returned None.")
# Read everything.
result = (self._get_decoded_chars() +
decoder.decode(self.buffer.read(), final=True))
decoder.decode(chunk, final=True))
if self._snapshot is not None:
self._set_decoded_chars('')
self._snapshot = None
Expand Down
Loading
Loading