Skip to content

Commit

Permalink
Merge pull request truenas#265 from truenas/NAS-129309
Browse files Browse the repository at this point in the history
NAS-129309 / None / Add support for zvol block cloning
  • Loading branch information
ixhamza authored Dec 19, 2024
2 parents d7a872f + 0382188 commit 83e855f
Show file tree
Hide file tree
Showing 7 changed files with 460 additions and 4 deletions.
32 changes: 32 additions & 0 deletions config/kernel-blkdev.m4
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,36 @@ AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_BLK_MODE_T], [
])
])

dnl #
dnl # Upstream patch for blkdev copy offload support
dnl #
AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV_COPY_OFFLOAD], [
ZFS_LINUX_TEST_SRC([blkdev_copy_offload], [
#include <linux/bio.h>
#include <linux/blkdev.h>
], [
struct block_device *bdev_in = NULL, *bdev_out = NULL;
loff_t pos_in = 0, pos_out = 0;
ssize_t ret __attribute__ ((unused));
ssize_t len = 0;
void *private = NULL;
void (*endio)(void *, int, ssize_t) = NULL;
ret = blkdev_copy_offload(bdev_in, pos_in, pos_out, len,
endio, private, GFP_KERNEL, bdev_out);
])
])

AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_COPY_OFFLOAD], [
AC_MSG_CHECKING([whether blkdev_copy_offload exists])
ZFS_LINUX_TEST_RESULT([blkdev_copy_offload], [
AC_MSG_RESULT(yes)
AC_DEFINE(HAVE_BLKDEV_COPY_OFFLOAD, 1,
[blkdev_copy_offload exists])
], [
AC_MSG_RESULT(no)
])
])

dnl #
dnl # 2.6.38 API change,
dnl # Added blkdev_put()
Expand Down Expand Up @@ -759,6 +789,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV], [
ZFS_AC_KERNEL_SRC_BLKDEV_DISK_CHECK_MEDIA_CHANGE
ZFS_AC_KERNEL_SRC_BLKDEV_BLK_STS_RESV_CONFLICT
ZFS_AC_KERNEL_SRC_BLKDEV_BLK_MODE_T
ZFS_AC_KERNEL_SRC_BLKDEV_COPY_OFFLOAD
])

AC_DEFUN([ZFS_AC_KERNEL_BLKDEV], [
Expand All @@ -781,4 +812,5 @@ AC_DEFUN([ZFS_AC_KERNEL_BLKDEV], [
ZFS_AC_KERNEL_BLKDEV_DISK_CHECK_MEDIA_CHANGE
ZFS_AC_KERNEL_BLKDEV_BLK_STS_RESV_CONFLICT
ZFS_AC_KERNEL_BLKDEV_BLK_MODE_T
ZFS_AC_KERNEL_BLKDEV_COPY_OFFLOAD
])
2 changes: 2 additions & 0 deletions include/sys/zvol.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,8 @@ extern int zvol_set_ro(const char *, boolean_t);
extern zvol_state_handle_t *zvol_suspend(const char *);
extern int zvol_resume(zvol_state_handle_t *);
extern void *zvol_tag(zvol_state_handle_t *);
extern int zvol_clone_range(zvol_state_handle_t *, uint64_t,
zvol_state_handle_t *, uint64_t, uint64_t);

extern int zvol_init(void);
extern void zvol_fini(void);
Expand Down
3 changes: 3 additions & 0 deletions include/sys/zvol_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,9 @@ void zvol_log_truncate(zvol_state_t *zv, dmu_tx_t *tx, uint64_t off,
uint64_t len);
void zvol_log_write(zvol_state_t *zv, dmu_tx_t *tx, uint64_t offset,
uint64_t size, boolean_t commit);
void zvol_log_clone_range(zilog_t *zilog, dmu_tx_t *tx, int txtype,
uint64_t off, uint64_t len, uint64_t blksz, const blkptr_t *bps,
size_t nbps);
int zvol_get_data(void *arg, uint64_t arg2, lr_write_t *lr, char *buf,
struct lwb *lwb, zio_t *zio);
int zvol_init_impl(void);
Expand Down
137 changes: 137 additions & 0 deletions module/os/linux/zfs/zvol_os.c
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
#include <sys/zvol.h>
#include <sys/zvol_impl.h>
#include <cityhash.h>
#include <sys/zfs_znode.h>

#include <linux/blkdev_compat.h>
#include <linux/task_io_accounting_ops.h>
Expand Down Expand Up @@ -68,6 +69,8 @@ static unsigned int zvol_threads = 0;
static unsigned int zvol_blk_mq_threads = 0;
static unsigned int zvol_blk_mq_actual_threads;
static boolean_t zvol_use_blk_mq = B_FALSE;
static boolean_t zvol_bclone_enabled = B_TRUE;
static unsigned long zvol_max_copy_bytes = 0;

/*
* The maximum number of volblocksize blocks to process per thread. Typically,
Expand Down Expand Up @@ -496,6 +499,85 @@ zvol_read_task(void *arg)
zv_request_task_free(task);
}

#ifdef HAVE_BLKDEV_COPY_OFFLOAD
static void zvol_clone_range_impl(zv_request_t *zvr)
{
zvol_state_t *zv_src = zvr->zv, *zv_dst = NULL;
struct request *req = zvr->rq;
struct bio *bio = zvr->bio;
zfs_uio_t uio_src, uio_dst;
uint64_t len = 0;
int error = EINVAL, seg = 1;
struct blkdev_copy_offload_io *offload_io;

if (!zvol_bclone_enabled) {
zvol_end_io(bio, req, -SET_ERROR(EOPNOTSUPP));
return;
}

memset(&uio_src, 0, sizeof (zfs_uio_t));
memset(&uio_dst, 0, sizeof (zfs_uio_t));

if (bio) {
/*
* Single-Queue Request: driver_private contains the
* destination ZVOL.
*/
offload_io = bio->bi_private;
if (offload_io && offload_io->driver_private)
zv_dst = offload_io->driver_private;
if (bio->bi_iter.bi_size !=
offload_io->dst_bio->bi_iter.bi_size) {
zvol_end_io(bio, req, -SET_ERROR(error));
return;
}
zfs_uio_bvec_init(&uio_src, bio, NULL);
zfs_uio_bvec_init(&uio_dst, offload_io->dst_bio, NULL);
len = bio->bi_iter.bi_size;
} else {
/*
* Multi-Queue (MQ) Request: First bio contains information
* about destination and the second contains information
* about the source
*/
struct bio *bio_temp;
__rq_for_each_bio(bio_temp, req) {
if (seg == blk_rq_nr_phys_segments(req)) {
offload_io = bio_temp->bi_private;
zfs_uio_bvec_init(&uio_src, bio_temp, NULL);
if (len != bio_temp->bi_iter.bi_size) {
zvol_end_io(bio, req,
-SET_ERROR(error));
return;
}
if (offload_io && offload_io->driver_private)
zv_dst = offload_io->driver_private;
} else {
zfs_uio_bvec_init(&uio_dst, bio_temp, NULL);
len = bio_temp->bi_iter.bi_size;
}
seg++;
}
}

if (!zv_src || !zv_dst) {
zvol_end_io(bio, req, -SET_ERROR(error));
return;
}

error = zvol_clone_range(zv_src, uio_src.uio_loffset, zv_dst,
uio_dst.uio_loffset, len);
zvol_end_io(bio, req, -error);
}

static void
zvol_clone_range_task(void *arg)
{
zv_request_task_t *task = arg;
zvol_clone_range_impl(&task->zvr);
zv_request_task_free(task);
}
#endif

/*
* Process a BIO or request
Expand Down Expand Up @@ -555,6 +637,24 @@ zvol_request_impl(zvol_state_t *zv, struct bio *bio, struct request *rq,
blk_mq_hw_queue);
tq_idx = taskq_hash % ztqs->tqs_cnt;

#ifdef HAVE_BLKDEV_COPY_OFFLOAD
if ((bio && op_is_copy(bio_op(bio))) ||
(rq && op_is_copy(req_op(rq)))) {
if (unlikely(zv->zv_flags & ZVOL_RDONLY)) {
zvol_end_io(bio, rq, -SET_ERROR(EROFS));
goto out;
}
if (force_sync) {
zvol_clone_range_impl(&zvr);
} else {
task = zv_request_task_create(zvr);
taskq_dispatch_ent(ztqs->tqs_taskq[tq_idx],
zvol_clone_range_task, task, 0, &task->ent);
}
goto out;
}
#endif

if (rw == WRITE) {
if (unlikely(zv->zv_flags & ZVOL_RDONLY)) {
zvol_end_io(bio, rq, -SET_ERROR(EROFS));
Expand Down Expand Up @@ -1607,6 +1707,10 @@ zvol_os_create_minor(const char *name)
uint64_t hash = zvol_name_hash(name);
uint64_t volthreading;
bool replayed_zil = B_FALSE;
#ifdef HAVE_BLKDEV_COPY_OFFLOAD
struct queue_limits *lim;
uint64_t max_clone_blocks = 1022;
#endif

if (zvol_inhibit_dev)
return (0);
Expand Down Expand Up @@ -1693,6 +1797,33 @@ zvol_os_create_minor(const char *name)
else
replayed_zil = zil_replay(os, zv, zvol_replay_vector);
}
#ifdef HAVE_BLKDEV_COPY_OFFLOAD
lim = &zv->zv_zso->zvo_queue->limits;
lim->max_user_copy_sectors = UINT_MAX;

/*
* When zvol_bclone_enabled is unset, blkdev_copy_offload() should
* return early and fall back to the default path. Existing zvols
* would require export/import to make this applicable.
*/
if (!zvol_bclone_enabled) {
lim->max_copy_hw_sectors = 0;
lim->max_copy_sectors = 0;
} else if (!zvol_max_copy_bytes) {
if (zv->zv_zilog)
max_clone_blocks = zil_max_log_data(zv->zv_zilog,
sizeof (lr_clone_range_t)) / sizeof (blkptr_t);
lim->max_copy_hw_sectors = MIN((doi->doi_data_block_size *
max_clone_blocks), BLK_COPY_MAX_BYTES) >> SECTOR_SHIFT;
lim->max_copy_sectors = MIN((doi->doi_data_block_size *
max_clone_blocks), BLK_COPY_MAX_BYTES) >> SECTOR_SHIFT;
} else {
lim->max_copy_hw_sectors = MIN(zvol_max_copy_bytes,
BLK_COPY_MAX_BYTES) >> SECTOR_SHIFT;
lim->max_copy_sectors = MIN(zvol_max_copy_bytes,
BLK_COPY_MAX_BYTES) >> SECTOR_SHIFT;
}
#endif
if (replayed_zil)
zil_close(zv->zv_zilog);
zv->zv_zilog = NULL;
Expand Down Expand Up @@ -1934,6 +2065,12 @@ module_param(zvol_blk_mq_blocks_per_thread, uint, 0644);
MODULE_PARM_DESC(zvol_blk_mq_blocks_per_thread,
"Process volblocksize blocks per thread");

module_param(zvol_max_copy_bytes, ulong, 0644);
MODULE_PARM_DESC(zvol_max_copy_bytes, "max copy bytes for zvol block cloning");

module_param(zvol_bclone_enabled, uint, 0644);
MODULE_PARM_DESC(zvol_bclone_enabled, "Disable block cloning for zvols");

#ifndef HAVE_BLKDEV_GET_ERESTARTSYS
module_param(zvol_open_timeout_ms, uint, 0644);
MODULE_PARM_DESC(zvol_open_timeout_ms, "Timeout for ZVOL open retries");
Expand Down
2 changes: 1 addition & 1 deletion module/zfs/zfs_vnops.c
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ int zfs_bclone_enabled = 1;
* a copy of the file and is therefore not the default. However, in certain
* scenarios this behavior may be desirable so a tunable is provided.
*/
static int zfs_bclone_wait_dirty = 0;
int zfs_bclone_wait_dirty = 0;

/*
* Enable Direct I/O. If this setting is 0, then all I/O requests will be
Expand Down
Loading

0 comments on commit 83e855f

Please sign in to comment.