From e439ee83c1316b58d480148c9c851f42c1458305 Mon Sep 17 00:00:00 2001
From: Christian Schwarz <me@cschwarz.com>
Date: Wed, 3 Mar 2021 17:15:28 +0100
Subject: [PATCH 37/38] linux: zvol: avoid heap allocation for
 zvol_request_sync=1

The spl_kmem_alloc showed up in some flamegraphs in a single-threaded
4k sync write workload at 85k IOPS on an
Intel(R) Xeon(R) Silver 4215 CPU @ 2.50GHz.
Certainly not a huge win but I believe the change is clean and
easy to maintain down the road.

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Matthew Ahrens <mahrens@delphix.com>
Signed-off-by: Christian Schwarz <me@cschwarz.com>
Closes #11666
---
 module/os/linux/zfs/zvol_os.c | 93 ++++++++++++++++++++++++-----------
 1 file changed, 64 insertions(+), 29 deletions(-)

Index: zfs/module/os/linux/zfs/zvol_os.c
===================================================================
--- zfs.orig/module/os/linux/zfs/zvol_os.c
+++ zfs/module/os/linux/zfs/zvol_os.c
@@ -56,12 +56,32 @@ struct zvol_state_os {
 taskq_t *zvol_taskq;
 static struct ida zvol_ida;
 
-typedef struct zv_request {
+typedef struct zv_request_stack {
 	zvol_state_t	*zv;
 	struct bio	*bio;
-	taskq_ent_t	ent;
 } zv_request_t;
 
+typedef struct zv_request_task {
+	zv_request_t zvr;
+	taskq_ent_t	ent;
+} zv_request_task_t;
+
+static zv_request_task_t *
+zv_request_task_create(zv_request_t zvr)
+{
+	zv_request_task_t *task;
+	task = kmem_alloc(sizeof (zv_request_task_t), KM_SLEEP);
+	taskq_init_ent(&task->ent);
+	task->zvr = zvr;
+	return (task);
+}
+
+static void
+zv_request_task_free(zv_request_task_t *task)
+{
+	kmem_free(task, sizeof (*task));
+}
+
 /*
  * Given a path, return TRUE if path is a ZVOL.
  */
@@ -80,9 +100,8 @@ zvol_is_zvol_impl(const char *path)
 }
 
 static void
-zvol_write(void *arg)
+zvol_write(zv_request_t *zvr)
 {
-	zv_request_t *zvr = arg;
 	struct bio *bio = zvr->bio;
 	int error = 0;
 	uio_t uio;
@@ -102,7 +121,6 @@ zvol_write(void *arg)
 	if (uio.uio_resid == 0) {
 		rw_exit(&zv->zv_suspend_lock);
 		BIO_END_IO(bio, 0);
-		kmem_free(zvr, sizeof (zv_request_t));
 		return;
 	}
 
@@ -162,13 +180,19 @@ zvol_write(void *arg)
 		blk_generic_end_io_acct(q, disk, WRITE, bio, start_time);
 
 	BIO_END_IO(bio, -error);
-	kmem_free(zvr, sizeof (zv_request_t));
 }
 
 static void
-zvol_discard(void *arg)
+zvol_write_task(void *arg)
+{
+	zv_request_task_t *task = arg;
+	zvol_write(&task->zvr);
+	zv_request_task_free(task);
+}
+
+static void
+zvol_discard(zv_request_t *zvr)
 {
-	zv_request_t *zvr = arg;
 	struct bio *bio = zvr->bio;
 	zvol_state_t *zv = zvr->zv;
 	uint64_t start = BIO_BI_SECTOR(bio) << 9;
@@ -238,13 +262,19 @@ unlock:
 		blk_generic_end_io_acct(q, disk, WRITE, bio, start_time);
 
 	BIO_END_IO(bio, -error);
-	kmem_free(zvr, sizeof (zv_request_t));
 }
 
 static void
-zvol_read(void *arg)
+zvol_discard_task(void *arg)
+{
+	zv_request_task_t *task = arg;
+	zvol_discard(&task->zvr);
+	zv_request_task_free(task);
+}
+
+static void
+zvol_read(zv_request_t *zvr)
 {
-	zv_request_t *zvr = arg;
 	struct bio *bio = zvr->bio;
 	int error = 0;
 	uio_t uio;
@@ -295,7 +325,14 @@ zvol_read(void *arg)
 		blk_generic_end_io_acct(q, disk, READ, bio, start_time);
 
 	BIO_END_IO(bio, -error);
-	kmem_free(zvr, sizeof (zv_request_t));
+}
+
+static void
+zvol_read_task(void *arg)
+{
+	zv_request_task_t *task = arg;
+	zvol_read(&task->zvr);
+	zv_request_task_free(task);
 }
 
 #ifdef HAVE_SUBMIT_BIO_IN_BLOCK_DEVICE_OPERATIONS
@@ -314,7 +351,6 @@ zvol_request(struct request_queue *q, st
 	uint64_t offset = BIO_BI_SECTOR(bio) << 9;
 	uint64_t size = BIO_BI_SIZE(bio);
 	int rw = bio_data_dir(bio);
-	zv_request_t *zvr;
 
 	if (bio_has_data(bio) && offset + size > zv->zv_volsize) {
 		printk(KERN_INFO
@@ -327,6 +363,12 @@ zvol_request(struct request_queue *q, st
 		goto out;
 	}
 
+	zv_request_t zvr = {
+		.zv = zv,
+		.bio = bio,
+	};
+	zv_request_task_t *task;
+
 	if (rw == WRITE) {
 		if (unlikely(zv->zv_flags & ZVOL_RDONLY)) {
 			BIO_END_IO(bio, -SET_ERROR(EROFS));
@@ -357,11 +399,6 @@ zvol_request(struct request_queue *q, st
 			rw_downgrade(&zv->zv_suspend_lock);
 		}
 
-		zvr = kmem_alloc(sizeof (zv_request_t), KM_SLEEP);
-		zvr->zv = zv;
-		zvr->bio = bio;
-		taskq_init_ent(&zvr->ent);
-
 		/*
 		 * We don't want this thread to be blocked waiting for i/o to
 		 * complete, so we instead wait from a taskq callback. The
@@ -394,17 +431,19 @@ zvol_request(struct request_queue *q, st
 		 */
 		if (bio_is_discard(bio) || bio_is_secure_erase(bio)) {
 			if (zvol_request_sync) {
-				zvol_discard(zvr);
+				zvol_discard(&zvr);
 			} else {
+				task = zv_request_task_create(zvr);
 				taskq_dispatch_ent(zvol_taskq,
-				    zvol_discard, zvr, 0, &zvr->ent);
+				    zvol_discard_task, task, 0, &task->ent);
 			}
 		} else {
 			if (zvol_request_sync) {
-				zvol_write(zvr);
+				zvol_write(&zvr);
 			} else {
+				task = zv_request_task_create(zvr);
 				taskq_dispatch_ent(zvol_taskq,
-				    zvol_write, zvr, 0, &zvr->ent);
+				    zvol_write_task, task, 0, &task->ent);
 			}
 		}
 	} else {
@@ -418,19 +457,15 @@ zvol_request(struct request_queue *q, st
 			goto out;
 		}
 
-		zvr = kmem_alloc(sizeof (zv_request_t), KM_SLEEP);
-		zvr->zv = zv;
-		zvr->bio = bio;
-		taskq_init_ent(&zvr->ent);
-
 		rw_enter(&zv->zv_suspend_lock, RW_READER);
 
 		/* See comment in WRITE case above. */
 		if (zvol_request_sync) {
-			zvol_read(zvr);
+			zvol_read(&zvr);
 		} else {
+			task = zv_request_task_create(zvr);
 			taskq_dispatch_ent(zvol_taskq,
-			    zvol_read, zvr, 0, &zvr->ent);
+			    zvol_read_task, task, 0, &task->ent);
 		}
 	}
 
