[GEDI] [PULL 11/20] block: use int64_t instead of int in driver discard handlers

Eric Blake eblake at redhat.com
Mon Sep 27 21:55:36 UTC 2021


From: Vladimir Sementsov-Ogievskiy <vsementsov at virtuozzo.com>

We are generally moving to int64_t for both offset and bytes parameters
on all io paths.

Main motivation is realization of 64-bit write_zeroes operation for
fast zeroing large disk chunks, up to the whole disk.

We chose signed type, to be consistent with off_t (which is signed) and
with possibility for signed return type (where negative value means
error).

So, convert driver discard handlers bytes parameter to int64_t.

The only caller of all updated function is bdrv_co_pdiscard in
block/io.c. It is already prepared to work with 64bit requests, but
pass at most max(bs->bl.max_pdiscard, INT_MAX) to the driver.

Let's look at all updated functions:

blkdebug: all calculations are still OK, thanks to
  bdrv_check_qiov_request().
  both rule_check and bdrv_co_pdiscard are 64bit

blklogwrites: pass to blk_loc_writes_co_log which is 64bit

blkreplay, copy-on-read, filter-compress: pass to bdrv_co_pdiscard, OK

copy-before-write: pass to bdrv_co_pdiscard which is 64bit and to
  cbw_do_copy_before_write which is 64bit

file-posix: one handler calls raw_account_discard() is 64bit and both
  handlers calls raw_do_pdiscard(). Update raw_do_pdiscard, which pass
  to RawPosixAIOData::aio_nbytes, which is 64bit (and calls
  raw_account_discard())

gluster: somehow, third argument of glfs_discard_async is size_t.
  Let's set max_pdiscard accordingly.

iscsi: iscsi_allocmap_set_invalid is 64bit,
  !is_byte_request_lun_aligned is 64bit.
  list.num is uint32_t. Let's clarify max_pdiscard and
  pdiscard_alignment.

mirror_top: pass to bdrv_mirror_top_do_write() which is
  64bit

nbd: protocol limitation. max_pdiscard is alredy set strict enough,
  keep it as is for now.

nvme: buf.nlb is uint32_t and we do shift. So, add corresponding limits
  to nvme_refresh_limits().

preallocate: pass to bdrv_co_pdiscard() which is 64bit.

rbd: pass to qemu_rbd_start_co() which is 64bit.

qcow2: calculations are still OK, thanks to bdrv_check_qiov_request(),
  qcow2_cluster_discard() is 64bit.

raw-format: raw_adjust_offset() is 64bit, bdrv_co_pdiscard too.

throttle: pass to bdrv_co_pdiscard() which is 64bit and to
  throttle_group_co_io_limits_intercept() which is 64bit as well.

test-block-iothread: bytes argument is unused

Great! Now all drivers are prepared to handle 64bit discard requests,
or else have explicit max_pdiscard limits.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov at virtuozzo.com>
Message-Id: <20210903102807.27127-11-vsementsov at virtuozzo.com>
Reviewed-by: Eric Blake <eblake at redhat.com>
Signed-off-by: Eric Blake <eblake at redhat.com>
---
 include/block/block_int.h        |  2 +-
 block/blkdebug.c                 |  2 +-
 block/blklogwrites.c             |  4 ++--
 block/blkreplay.c                |  2 +-
 block/copy-before-write.c        |  2 +-
 block/copy-on-read.c             |  2 +-
 block/file-posix.c               |  7 ++++---
 block/filter-compress.c          |  2 +-
 block/gluster.c                  |  7 +++++--
 block/iscsi.c                    | 16 +++++++++++-----
 block/mirror.c                   |  2 +-
 block/nbd.c                      |  6 ++++--
 block/nvme.c                     | 14 +++++++++++++-
 block/preallocate.c              |  2 +-
 block/qcow2.c                    |  2 +-
 block/raw-format.c               |  2 +-
 block/rbd.c                      |  4 ++--
 block/throttle.c                 |  2 +-
 tests/unit/test-block-iothread.c |  2 +-
 block/trace-events               |  4 ++--
 20 files changed, 55 insertions(+), 31 deletions(-)

diff --git a/include/block/block_int.h b/include/block/block_int.h
index 9b4e0748bc86..ffe86068d4d5 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -303,7 +303,7 @@ struct BlockDriver {
     int coroutine_fn (*bdrv_co_pwrite_zeroes)(BlockDriverState *bs,
         int64_t offset, int64_t bytes, BdrvRequestFlags flags);
     int coroutine_fn (*bdrv_co_pdiscard)(BlockDriverState *bs,
-        int64_t offset, int bytes);
+        int64_t offset, int64_t bytes);

     /* Map [offset, offset + nbytes) range onto a child of @bs to copy from,
      * and invoke bdrv_co_copy_range_from(child, ...), or invoke
diff --git a/block/blkdebug.c b/block/blkdebug.c
index 742b4a3834d8..bbf294870308 100644
--- a/block/blkdebug.c
+++ b/block/blkdebug.c
@@ -717,7 +717,7 @@ static int coroutine_fn blkdebug_co_pwrite_zeroes(BlockDriverState *bs,
 }

 static int coroutine_fn blkdebug_co_pdiscard(BlockDriverState *bs,
-                                             int64_t offset, int bytes)
+                                             int64_t offset, int64_t bytes)
 {
     uint32_t align = bs->bl.pdiscard_alignment;
     int err;
diff --git a/block/blklogwrites.c b/block/blklogwrites.c
index d7ae64c22d81..f7a251e91f9e 100644
--- a/block/blklogwrites.c
+++ b/block/blklogwrites.c
@@ -484,9 +484,9 @@ static int coroutine_fn blk_log_writes_co_flush_to_disk(BlockDriverState *bs)
 }

 static int coroutine_fn
-blk_log_writes_co_pdiscard(BlockDriverState *bs, int64_t offset, int count)
+blk_log_writes_co_pdiscard(BlockDriverState *bs, int64_t offset, int64_t bytes)
 {
-    return blk_log_writes_co_log(bs, offset, count, NULL, 0,
+    return blk_log_writes_co_log(bs, offset, bytes, NULL, 0,
                                  blk_log_writes_co_do_file_pdiscard,
                                  LOG_DISCARD_FLAG, false);
 }
diff --git a/block/blkreplay.c b/block/blkreplay.c
index 89d74a3cca29..dcbe780ddbd3 100644
--- a/block/blkreplay.c
+++ b/block/blkreplay.c
@@ -105,7 +105,7 @@ static int coroutine_fn blkreplay_co_pwrite_zeroes(BlockDriverState *bs,
 }

 static int coroutine_fn blkreplay_co_pdiscard(BlockDriverState *bs,
-                                              int64_t offset, int bytes)
+                                              int64_t offset, int64_t bytes)
 {
     uint64_t reqid = blkreplay_next_id();
     int ret = bdrv_co_pdiscard(bs->file, offset, bytes);
diff --git a/block/copy-before-write.c b/block/copy-before-write.c
index d210e87a4511..c30a5ff8dea9 100644
--- a/block/copy-before-write.c
+++ b/block/copy-before-write.c
@@ -64,7 +64,7 @@ static coroutine_fn int cbw_do_copy_before_write(BlockDriverState *bs,
 }

 static int coroutine_fn cbw_co_pdiscard(BlockDriverState *bs,
-                                        int64_t offset, int bytes)
+                                        int64_t offset, int64_t bytes)
 {
     int ret = cbw_do_copy_before_write(bs, offset, bytes, 0);
     if (ret < 0) {
diff --git a/block/copy-on-read.c b/block/copy-on-read.c
index f83dd83f14ed..1fc7fb3333b1 100644
--- a/block/copy-on-read.c
+++ b/block/copy-on-read.c
@@ -201,7 +201,7 @@ static int coroutine_fn cor_co_pwrite_zeroes(BlockDriverState *bs,


 static int coroutine_fn cor_co_pdiscard(BlockDriverState *bs,
-                                        int64_t offset, int bytes)
+                                        int64_t offset, int64_t bytes)
 {
     return bdrv_co_pdiscard(bs->file, offset, bytes);
 }
diff --git a/block/file-posix.c b/block/file-posix.c
index f375070f25bb..c62e42743de1 100644
--- a/block/file-posix.c
+++ b/block/file-posix.c
@@ -2942,7 +2942,8 @@ static void raw_account_discard(BDRVRawState *s, uint64_t nbytes, int ret)
 }

 static coroutine_fn int
-raw_do_pdiscard(BlockDriverState *bs, int64_t offset, int bytes, bool blkdev)
+raw_do_pdiscard(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                bool blkdev)
 {
     BDRVRawState *s = bs->opaque;
     RawPosixAIOData acb;
@@ -2966,7 +2967,7 @@ raw_do_pdiscard(BlockDriverState *bs, int64_t offset, int bytes, bool blkdev)
 }

 static coroutine_fn int
-raw_co_pdiscard(BlockDriverState *bs, int64_t offset, int bytes)
+raw_co_pdiscard(BlockDriverState *bs, int64_t offset, int64_t bytes)
 {
     return raw_do_pdiscard(bs, offset, bytes, false);
 }
@@ -3591,7 +3592,7 @@ hdev_co_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
 #endif /* linux */

 static coroutine_fn int
-hdev_co_pdiscard(BlockDriverState *bs, int64_t offset, int bytes)
+hdev_co_pdiscard(BlockDriverState *bs, int64_t offset, int64_t bytes)
 {
     BDRVRawState *s = bs->opaque;
     int ret;
diff --git a/block/filter-compress.c b/block/filter-compress.c
index fb85686b6930..d5be538619ae 100644
--- a/block/filter-compress.c
+++ b/block/filter-compress.c
@@ -94,7 +94,7 @@ static int coroutine_fn compress_co_pwrite_zeroes(BlockDriverState *bs,


 static int coroutine_fn compress_co_pdiscard(BlockDriverState *bs,
-                                             int64_t offset, int bytes)
+                                             int64_t offset, int64_t bytes)
 {
     return bdrv_co_pdiscard(bs->file, offset, bytes);
 }
diff --git a/block/gluster.c b/block/gluster.c
index 4e3c9cd14fd9..398976bc66d2 100644
--- a/block/gluster.c
+++ b/block/gluster.c
@@ -891,6 +891,7 @@ out:
 static void qemu_gluster_refresh_limits(BlockDriverState *bs, Error **errp)
 {
     bs->bl.max_transfer = GLUSTER_MAX_TRANSFER;
+    bs->bl.max_pdiscard = SIZE_MAX;
 }

 static int qemu_gluster_reopen_prepare(BDRVReopenState *state,
@@ -1297,18 +1298,20 @@ error:

 #ifdef CONFIG_GLUSTERFS_DISCARD
 static coroutine_fn int qemu_gluster_co_pdiscard(BlockDriverState *bs,
-                                                 int64_t offset, int size)
+                                                 int64_t offset, int64_t bytes)
 {
     int ret;
     GlusterAIOCB acb;
     BDRVGlusterState *s = bs->opaque;

+    assert(bytes <= SIZE_MAX); /* rely on max_pdiscard */
+
     acb.size = 0;
     acb.ret = 0;
     acb.coroutine = qemu_coroutine_self();
     acb.aio_context = bdrv_get_aio_context(bs);

-    ret = glfs_discard_async(s->fd, offset, size, gluster_finish_aiocb, &acb);
+    ret = glfs_discard_async(s->fd, offset, bytes, gluster_finish_aiocb, &acb);
     if (ret < 0) {
         return -errno;
     }
diff --git a/block/iscsi.c b/block/iscsi.c
index 74ff7e307ece..57aa07a40d7f 100644
--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -1138,7 +1138,8 @@ iscsi_getlength(BlockDriverState *bs)
 }

 static int
-coroutine_fn iscsi_co_pdiscard(BlockDriverState *bs, int64_t offset, int bytes)
+coroutine_fn iscsi_co_pdiscard(BlockDriverState *bs, int64_t offset,
+                               int64_t bytes)
 {
     IscsiLun *iscsilun = bs->opaque;
     struct IscsiTask iTask;
@@ -1154,6 +1155,12 @@ coroutine_fn iscsi_co_pdiscard(BlockDriverState *bs, int64_t offset, int bytes)
         return 0;
     }

+    /*
+     * We don't want to overflow list.num which is uint32_t.
+     * We rely on our max_pdiscard.
+     */
+    assert(bytes / iscsilun->block_size <= UINT32_MAX);
+
     list.lba = offset / iscsilun->block_size;
     list.num = bytes / iscsilun->block_size;

@@ -2071,10 +2078,9 @@ static void iscsi_refresh_limits(BlockDriverState *bs, Error **errp)
     }

     if (iscsilun->lbp.lbpu) {
-        if (iscsilun->bl.max_unmap < 0xffffffff / block_size) {
-            bs->bl.max_pdiscard =
-                iscsilun->bl.max_unmap * iscsilun->block_size;
-        }
+        bs->bl.max_pdiscard =
+            MIN_NON_ZERO(iscsilun->bl.max_unmap * iscsilun->block_size,
+                         (uint64_t)UINT32_MAX * iscsilun->block_size);
         bs->bl.pdiscard_alignment =
             iscsilun->bl.opt_unmap_gran * iscsilun->block_size;
     } else {
diff --git a/block/mirror.c b/block/mirror.c
index fab75087b0ba..c962e8b47128 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -1508,7 +1508,7 @@ static int coroutine_fn bdrv_mirror_top_pwrite_zeroes(BlockDriverState *bs,
 }

 static int coroutine_fn bdrv_mirror_top_pdiscard(BlockDriverState *bs,
-    int64_t offset, int bytes)
+    int64_t offset, int64_t bytes)
 {
     return bdrv_mirror_top_do_write(bs, MIRROR_METHOD_DISCARD, offset, bytes,
                                     NULL, 0);
diff --git a/block/nbd.c b/block/nbd.c
index c0c479abe9ce..a66b2c282dc3 100644
--- a/block/nbd.c
+++ b/block/nbd.c
@@ -1457,15 +1457,17 @@ static int nbd_client_co_flush(BlockDriverState *bs)
 }

 static int nbd_client_co_pdiscard(BlockDriverState *bs, int64_t offset,
-                                  int bytes)
+                                  int64_t bytes)
 {
     BDRVNBDState *s = (BDRVNBDState *)bs->opaque;
     NBDRequest request = {
         .type = NBD_CMD_TRIM,
         .from = offset,
-        .len = bytes,
+        .len = bytes, /* len is uint32_t */
     };

+    assert(bytes <= UINT32_MAX); /* rely on max_pdiscard */
+
     assert(!(s->info.flags & NBD_FLAG_READ_ONLY));
     if (!(s->info.flags & NBD_FLAG_SEND_TRIM) || !bytes) {
         return 0;
diff --git a/block/nvme.c b/block/nvme.c
index 2e0fd9e76a2d..1cc7b62bb4b6 100644
--- a/block/nvme.c
+++ b/block/nvme.c
@@ -1360,7 +1360,7 @@ static coroutine_fn int nvme_co_pwrite_zeroes(BlockDriverState *bs,

 static int coroutine_fn nvme_co_pdiscard(BlockDriverState *bs,
                                          int64_t offset,
-                                         int bytes)
+                                         int64_t bytes)
 {
     BDRVNVMeState *s = bs->opaque;
     NVMeQueuePair *ioq = s->queues[INDEX_IO(0)];
@@ -1387,6 +1387,14 @@ static int coroutine_fn nvme_co_pdiscard(BlockDriverState *bs,

     assert(s->queue_count > 1);

+    /*
+     * Filling the @buf requires @offset and @bytes to satisfy restrictions
+     * defined in nvme_refresh_limits().
+     */
+    assert(QEMU_IS_ALIGNED(bytes, 1UL << s->blkshift));
+    assert(QEMU_IS_ALIGNED(offset, 1UL << s->blkshift));
+    assert((bytes >> s->blkshift) <= UINT32_MAX);
+
     buf = qemu_try_memalign(s->page_size, s->page_size);
     if (!buf) {
         return -ENOMEM;
@@ -1490,6 +1498,10 @@ static void nvme_refresh_limits(BlockDriverState *bs, Error **errp)
     bs->bl.max_pwrite_zeroes = 1ULL << (s->blkshift + 16);
     bs->bl.pwrite_zeroes_alignment = MAX(bs->bl.request_alignment,
                                          1UL << s->blkshift);
+
+    bs->bl.max_pdiscard = (uint64_t)UINT32_MAX << s->blkshift;
+    bs->bl.pdiscard_alignment = MAX(bs->bl.request_alignment,
+                                    1UL << s->blkshift);
 }

 static void nvme_detach_aio_context(BlockDriverState *bs)
diff --git a/block/preallocate.c b/block/preallocate.c
index 99e28d9f086e..1d4233f73008 100644
--- a/block/preallocate.c
+++ b/block/preallocate.c
@@ -235,7 +235,7 @@ static coroutine_fn int preallocate_co_preadv_part(
 }

 static int coroutine_fn preallocate_co_pdiscard(BlockDriverState *bs,
-                                               int64_t offset, int bytes)
+                                               int64_t offset, int64_t bytes)
 {
     return bdrv_co_pdiscard(bs->file, offset, bytes);
 }
diff --git a/block/qcow2.c b/block/qcow2.c
index 4b2e8694952f..d50901675699 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -3996,7 +3996,7 @@ static coroutine_fn int qcow2_co_pwrite_zeroes(BlockDriverState *bs,
 }

 static coroutine_fn int qcow2_co_pdiscard(BlockDriverState *bs,
-                                          int64_t offset, int bytes)
+                                          int64_t offset, int64_t bytes)
 {
     int ret;
     BDRVQcow2State *s = bs->opaque;
diff --git a/block/raw-format.c b/block/raw-format.c
index a2485926b8cd..bda757fd1954 100644
--- a/block/raw-format.c
+++ b/block/raw-format.c
@@ -302,7 +302,7 @@ static int coroutine_fn raw_co_pwrite_zeroes(BlockDriverState *bs,
 }

 static int coroutine_fn raw_co_pdiscard(BlockDriverState *bs,
-                                        int64_t offset, int bytes)
+                                        int64_t offset, int64_t bytes)
 {
     int ret;

diff --git a/block/rbd.c b/block/rbd.c
index 053eb8e48f44..701fbf2b0cf0 100644
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -1197,9 +1197,9 @@ static int coroutine_fn qemu_rbd_co_flush(BlockDriverState *bs)
 }

 static int coroutine_fn qemu_rbd_co_pdiscard(BlockDriverState *bs,
-                                             int64_t offset, int count)
+                                             int64_t offset, int64_t bytes)
 {
-    return qemu_rbd_start_co(bs, offset, count, NULL, 0, RBD_AIO_DISCARD);
+    return qemu_rbd_start_co(bs, offset, bytes, NULL, 0, RBD_AIO_DISCARD);
 }

 #ifdef LIBRBD_SUPPORTS_WRITE_ZEROES
diff --git a/block/throttle.c b/block/throttle.c
index c13fe9067f4e..6e8d52fa2451 100644
--- a/block/throttle.c
+++ b/block/throttle.c
@@ -145,7 +145,7 @@ static int coroutine_fn throttle_co_pwrite_zeroes(BlockDriverState *bs,
 }

 static int coroutine_fn throttle_co_pdiscard(BlockDriverState *bs,
-                                             int64_t offset, int bytes)
+                                             int64_t offset, int64_t bytes)
 {
     ThrottleGroupMember *tgm = bs->opaque;
     throttle_group_co_io_limits_intercept(tgm, bytes, true);
diff --git a/tests/unit/test-block-iothread.c b/tests/unit/test-block-iothread.c
index c86954c7baa1..aea660aeed86 100644
--- a/tests/unit/test-block-iothread.c
+++ b/tests/unit/test-block-iothread.c
@@ -48,7 +48,7 @@ static int coroutine_fn bdrv_test_co_pwritev(BlockDriverState *bs,
 }

 static int coroutine_fn bdrv_test_co_pdiscard(BlockDriverState *bs,
-                                              int64_t offset, int bytes)
+                                              int64_t offset, int64_t bytes)
 {
     return 0;
 }
diff --git a/block/trace-events b/block/trace-events
index d8a08563f1ca..f2d0a9b62a7e 100644
--- a/block/trace-events
+++ b/block/trace-events
@@ -152,8 +152,8 @@ nvme_write_zeroes(void *s, uint64_t offset, uint64_t bytes, int flags) "s %p off
 nvme_qiov_unaligned(const void *qiov, int n, void *base, size_t size, int align) "qiov %p n %d base %p size 0x%zx align 0x%x"
 nvme_prw_buffered(void *s, uint64_t offset, uint64_t bytes, int niov, int is_write) "s %p offset 0x%"PRIx64" bytes %"PRId64" niov %d is_write %d"
 nvme_rw_done(void *s, int is_write, uint64_t offset, uint64_t bytes, int ret) "s %p is_write %d offset 0x%"PRIx64" bytes %"PRId64" ret %d"
-nvme_dsm(void *s, uint64_t offset, uint64_t bytes) "s %p offset 0x%"PRIx64" bytes %"PRId64""
-nvme_dsm_done(void *s, uint64_t offset, uint64_t bytes, int ret) "s %p offset 0x%"PRIx64" bytes %"PRId64" ret %d"
+nvme_dsm(void *s, int64_t offset, int64_t bytes) "s %p offset 0x%"PRIx64" bytes %"PRId64""
+nvme_dsm_done(void *s, int64_t offset, int64_t bytes, int ret) "s %p offset 0x%"PRIx64" bytes %"PRId64" ret %d"
 nvme_dma_map_flush(void *s) "s %p"
 nvme_free_req_queue_wait(void *s, unsigned q_index) "s %p q #%u"
 nvme_create_queue_pair(unsigned q_index, void *q, size_t size, void *aio_context, int fd) "index %u q %p size %zu aioctx %p fd %d"
-- 
2.31.1



More information about the integration mailing list