Skip to content

Commit

Permalink
drbd: introduce WRITE_SAME support
Browse files Browse the repository at this point in the history
We will support WRITE_SAME, if
 * all peers support WRITE_SAME (both in kernel and DRBD version),
 * all peer devices support WRITE_SAME
 * logical_block_size is identical on all peers.

We may at some point introduce a fallback on the receiving side
for devices/kernels that do not support WRITE_SAME,
by open-coding a submit loop. But not yet.

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
  • Loading branch information
Lars Ellenberg authored and Jens Axboe committed Jun 14, 2016
1 parent 60bac04 commit 9104d31
Show file tree
Hide file tree
Showing 10 changed files with 360 additions and 80 deletions.
9 changes: 8 additions & 1 deletion drivers/block/drbd/drbd_actlog.c
Original file line number Diff line number Diff line change
Expand Up @@ -840,6 +840,13 @@ static int update_sync_bits(struct drbd_device *device,
return count;
}

static bool plausible_request_size(int size)
{
return size > 0
&& size <= DRBD_MAX_BATCH_BIO_SIZE
&& IS_ALIGNED(size, 512);
}

/* clear the bit corresponding to the piece of storage in question:
* size byte of data starting from sector. Only clear a bits of the affected
* one ore more _aligned_ BM_BLOCK_SIZE blocks.
Expand All @@ -859,7 +866,7 @@ int __drbd_change_sync(struct drbd_device *device, sector_t sector, int size,
if ((mode == SET_OUT_OF_SYNC) && size == 0)
return 0;

if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_DISCARD_SIZE) {
if (!plausible_request_size(size)) {
drbd_err(device, "%s: sector=%llus size=%d nonsense!\n",
drbd_change_sync_fname[mode],
(unsigned long long)sector, size);
Expand Down
11 changes: 3 additions & 8 deletions drivers/block/drbd/drbd_debugfs.c
Original file line number Diff line number Diff line change
Expand Up @@ -237,14 +237,9 @@ static void seq_print_peer_request_flags(struct seq_file *m, struct drbd_peer_re
seq_print_rq_state_bit(m, f & EE_SEND_WRITE_ACK, &sep, "C");
seq_print_rq_state_bit(m, f & EE_MAY_SET_IN_SYNC, &sep, "set-in-sync");

if (f & EE_IS_TRIM) {
seq_putc(m, sep);
sep = '|';
if (f & EE_IS_TRIM_USE_ZEROOUT)
seq_puts(m, "zero-out");
else
seq_puts(m, "trim");
}
if (f & EE_IS_TRIM)
__seq_print_rq_state_bit(m, f & EE_IS_TRIM_USE_ZEROOUT, &sep, "zero-out", "trim");
seq_print_rq_state_bit(m, f & EE_WRITE_SAME, &sep, "write-same");
seq_putc(m, '\n');
}

Expand Down
13 changes: 9 additions & 4 deletions drivers/block/drbd/drbd_int.h
Original file line number Diff line number Diff line change
Expand Up @@ -468,6 +468,9 @@ enum {
/* this is/was a write request */
__EE_WRITE,

/* this is/was a write same request */
__EE_WRITE_SAME,

/* this originates from application on peer
* (not some resync or verify or other DRBD internal request) */
__EE_APPLICATION,
Expand All @@ -487,6 +490,7 @@ enum {
#define EE_IN_INTERVAL_TREE (1<<__EE_IN_INTERVAL_TREE)
#define EE_SUBMITTED (1<<__EE_SUBMITTED)
#define EE_WRITE (1<<__EE_WRITE)
#define EE_WRITE_SAME (1<<__EE_WRITE_SAME)
#define EE_APPLICATION (1<<__EE_APPLICATION)
#define EE_RS_THIN_REQ (1<<__EE_RS_THIN_REQ)

Expand Down Expand Up @@ -1350,8 +1354,8 @@ struct bm_extent {
/* For now, don't allow more than half of what we can "activate" in one
* activity log transaction to be discarded in one go. We may need to rework
* drbd_al_begin_io() to allow for even larger discard ranges */
#define DRBD_MAX_DISCARD_SIZE (AL_UPDATES_PER_TRANSACTION/2*AL_EXTENT_SIZE)
#define DRBD_MAX_DISCARD_SECTORS (DRBD_MAX_DISCARD_SIZE >> 9)
#define DRBD_MAX_BATCH_BIO_SIZE (AL_UPDATES_PER_TRANSACTION/2*AL_EXTENT_SIZE)
#define DRBD_MAX_BBIO_SECTORS (DRBD_MAX_BATCH_BIO_SIZE >> 9)

extern int drbd_bm_init(struct drbd_device *device);
extern int drbd_bm_resize(struct drbd_device *device, sector_t sectors, int set_new_bits);
Expand Down Expand Up @@ -1488,7 +1492,8 @@ enum determine_dev_size {
extern enum determine_dev_size
drbd_determine_dev_size(struct drbd_device *, enum dds_flags, struct resize_parms *) __must_hold(local);
extern void resync_after_online_grow(struct drbd_device *);
extern void drbd_reconsider_queue_parameters(struct drbd_device *device, struct drbd_backing_dev *bdev);
extern void drbd_reconsider_queue_parameters(struct drbd_device *device,
struct drbd_backing_dev *bdev, struct o_qlim *o);
extern enum drbd_state_rv drbd_set_role(struct drbd_device *device,
enum drbd_role new_role,
int force);
Expand Down Expand Up @@ -1569,7 +1574,7 @@ extern int drbd_submit_peer_request(struct drbd_device *,
extern int drbd_free_peer_reqs(struct drbd_device *, struct list_head *);
extern struct drbd_peer_request *drbd_alloc_peer_req(struct drbd_peer_device *, u64,
sector_t, unsigned int,
bool,
unsigned int,
gfp_t) __must_hold(local);
extern void __drbd_free_peer_req(struct drbd_device *, struct drbd_peer_request *,
int);
Expand Down
82 changes: 72 additions & 10 deletions drivers/block/drbd/drbd_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -920,6 +920,31 @@ void drbd_gen_and_send_sync_uuid(struct drbd_peer_device *peer_device)
}
}

/* communicated if (agreed_features & DRBD_FF_WSAME) */
void assign_p_sizes_qlim(struct drbd_device *device, struct p_sizes *p, struct request_queue *q)
{
if (q) {
p->qlim->physical_block_size = cpu_to_be32(queue_physical_block_size(q));
p->qlim->logical_block_size = cpu_to_be32(queue_logical_block_size(q));
p->qlim->alignment_offset = cpu_to_be32(queue_alignment_offset(q));
p->qlim->io_min = cpu_to_be32(queue_io_min(q));
p->qlim->io_opt = cpu_to_be32(queue_io_opt(q));
p->qlim->discard_enabled = blk_queue_discard(q);
p->qlim->discard_zeroes_data = queue_discard_zeroes_data(q);
p->qlim->write_same_capable = !!q->limits.max_write_same_sectors;
} else {
q = device->rq_queue;
p->qlim->physical_block_size = cpu_to_be32(queue_physical_block_size(q));
p->qlim->logical_block_size = cpu_to_be32(queue_logical_block_size(q));
p->qlim->alignment_offset = 0;
p->qlim->io_min = cpu_to_be32(queue_io_min(q));
p->qlim->io_opt = cpu_to_be32(queue_io_opt(q));
p->qlim->discard_enabled = 0;
p->qlim->discard_zeroes_data = 0;
p->qlim->write_same_capable = 0;
}
}

int drbd_send_sizes(struct drbd_peer_device *peer_device, int trigger_reply, enum dds_flags flags)
{
struct drbd_device *device = peer_device->device;
Expand All @@ -928,29 +953,37 @@ int drbd_send_sizes(struct drbd_peer_device *peer_device, int trigger_reply, enu
sector_t d_size, u_size;
int q_order_type;
unsigned int max_bio_size;
unsigned int packet_size;

sock = &peer_device->connection->data;
p = drbd_prepare_command(peer_device, sock);
if (!p)
return -EIO;

packet_size = sizeof(*p);
if (peer_device->connection->agreed_features & DRBD_FF_WSAME)
packet_size += sizeof(p->qlim[0]);

memset(p, 0, packet_size);
if (get_ldev_if_state(device, D_NEGOTIATING)) {
D_ASSERT(device, device->ldev->backing_bdev);
struct request_queue *q = bdev_get_queue(device->ldev->backing_bdev);
d_size = drbd_get_max_capacity(device->ldev);
rcu_read_lock();
u_size = rcu_dereference(device->ldev->disk_conf)->disk_size;
rcu_read_unlock();
q_order_type = drbd_queue_order_type(device);
max_bio_size = queue_max_hw_sectors(device->ldev->backing_bdev->bd_disk->queue) << 9;
max_bio_size = queue_max_hw_sectors(q) << 9;
max_bio_size = min(max_bio_size, DRBD_MAX_BIO_SIZE);
assign_p_sizes_qlim(device, p, q);
put_ldev(device);
} else {
d_size = 0;
u_size = 0;
q_order_type = QUEUE_ORDERED_NONE;
max_bio_size = DRBD_MAX_BIO_SIZE; /* ... multiple BIOs per peer_request */
assign_p_sizes_qlim(device, p, NULL);
}

sock = &peer_device->connection->data;
p = drbd_prepare_command(peer_device, sock);
if (!p)
return -EIO;

if (peer_device->connection->agreed_pro_version <= 94)
max_bio_size = min(max_bio_size, DRBD_MAX_SIZE_H80_PACKET);
else if (peer_device->connection->agreed_pro_version < 100)
Expand All @@ -962,7 +995,8 @@ int drbd_send_sizes(struct drbd_peer_device *peer_device, int trigger_reply, enu
p->max_bio_size = cpu_to_be32(max_bio_size);
p->queue_order_type = cpu_to_be16(q_order_type);
p->dds_flags = cpu_to_be16(flags);
return drbd_send_command(peer_device, sock, P_SIZES, sizeof(*p), NULL, 0);

return drbd_send_command(peer_device, sock, P_SIZES, packet_size, NULL, 0);
}

/**
Expand Down Expand Up @@ -1577,6 +1611,9 @@ static int _drbd_send_bio(struct drbd_peer_device *peer_device, struct bio *bio)
? 0 : MSG_MORE);
if (err)
return err;
/* REQ_OP_WRITE_SAME has only one segment */
if (bio_op(bio) == REQ_OP_WRITE_SAME)
break;
}
return 0;
}
Expand All @@ -1595,6 +1632,9 @@ static int _drbd_send_zc_bio(struct drbd_peer_device *peer_device, struct bio *b
bio_iter_last(bvec, iter) ? 0 : MSG_MORE);
if (err)
return err;
/* REQ_OP_WRITE_SAME has only one segment */
if (bio_op(bio) == REQ_OP_WRITE_SAME)
break;
}
return 0;
}
Expand Down Expand Up @@ -1626,6 +1666,7 @@ static u32 bio_flags_to_wire(struct drbd_connection *connection,
return (bio->bi_rw & REQ_SYNC ? DP_RW_SYNC : 0) |
(bio->bi_rw & REQ_FUA ? DP_FUA : 0) |
(bio->bi_rw & REQ_PREFLUSH ? DP_FLUSH : 0) |
(bio_op(bio) == REQ_OP_WRITE_SAME ? DP_WSAME : 0) |
(bio_op(bio) == REQ_OP_DISCARD ? DP_DISCARD : 0);
else
return bio->bi_rw & REQ_SYNC ? DP_RW_SYNC : 0;
Expand All @@ -1639,6 +1680,8 @@ int drbd_send_dblock(struct drbd_peer_device *peer_device, struct drbd_request *
struct drbd_device *device = peer_device->device;
struct drbd_socket *sock;
struct p_data *p;
struct p_wsame *wsame = NULL;
void *digest_out;
unsigned int dp_flags = 0;
int digest_size;
int err;
Expand Down Expand Up @@ -1674,12 +1717,29 @@ int drbd_send_dblock(struct drbd_peer_device *peer_device, struct drbd_request *
err = __send_command(peer_device->connection, device->vnr, sock, P_TRIM, sizeof(*t), NULL, 0);
goto out;
}
if (dp_flags & DP_WSAME) {
/* this will only work if DRBD_FF_WSAME is set AND the
* handshake agreed that all nodes and backend devices are
* WRITE_SAME capable and agree on logical_block_size */
wsame = (struct p_wsame*)p;
digest_out = wsame + 1;
wsame->size = cpu_to_be32(req->i.size);
} else
digest_out = p + 1;

/* our digest is still only over the payload.
* TRIM does not carry any payload. */
if (digest_size)
drbd_csum_bio(peer_device->connection->integrity_tfm, req->master_bio, p + 1);
err = __send_command(peer_device->connection, device->vnr, sock, P_DATA, sizeof(*p) + digest_size, NULL, req->i.size);
drbd_csum_bio(peer_device->connection->integrity_tfm, req->master_bio, digest_out);
if (wsame) {
err =
__send_command(peer_device->connection, device->vnr, sock, P_WSAME,
sizeof(*wsame) + digest_size, NULL,
bio_iovec(req->master_bio).bv_len);
} else
err =
__send_command(peer_device->connection, device->vnr, sock, P_DATA,
sizeof(*p) + digest_size, NULL, req->i.size);
if (!err) {
/* For protocol A, we have to memcpy the payload into
* socket buffers, as we may complete right away
Expand Down Expand Up @@ -3660,6 +3720,8 @@ const char *cmdname(enum drbd_packet cmd)
* one PRO_VERSION */
static const char *cmdnames[] = {
[P_DATA] = "Data",
[P_WSAME] = "WriteSame",
[P_TRIM] = "Trim",
[P_DATA_REPLY] = "DataReply",
[P_RS_DATA_REPLY] = "RSDataReply",
[P_BARRIER] = "Barrier",
Expand Down
Loading

0 comments on commit 9104d31

Please sign in to comment.