linux/block/blk-integrity.c
Christoph Hellwig 3c3e85ddff block: bypass the STABLE_WRITES flag for protection information
Currently registering a checksum-enabled (aka PI) integrity profile sets
the QUEUE_FLAG_STABLE_WRITE flag, and unregistering it clears the flag.
This can incorrectly clear the flag when the driver requires stable
writes even without PI, e.g. in case of iSCSI or NVMe/TCP with data
digest enabled.

Fix this by looking at the csum_type directly in bdev_stable_writes and
not setting the queue flag.  Also remove the blk_queue_stable_writes
helper as the only user in nvme wants to only look at the actual
QUEUE_FLAG_STABLE_WRITE flag as it inherits the integrity configuration
by other means.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Link: https://lore.kernel.org/r/20240613084839.1044015-11-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
2024-06-14 10:20:06 -06:00

407 lines
10 KiB
C

// SPDX-License-Identifier: GPL-2.0
/*
* blk-integrity.c - Block layer data integrity extensions
*
* Copyright (C) 2007, 2008 Oracle Corporation
* Written by: Martin K. Petersen <martin.petersen@oracle.com>
*/
#include <linux/blk-integrity.h>
#include <linux/backing-dev.h>
#include <linux/mempool.h>
#include <linux/bio.h>
#include <linux/scatterlist.h>
#include <linux/export.h>
#include <linux/slab.h>
#include "blk.h"
/**
* blk_rq_count_integrity_sg - Count number of integrity scatterlist elements
* @q: request queue
* @bio: bio with integrity metadata attached
*
* Description: Returns the number of elements required in a
* scatterlist corresponding to the integrity metadata in a bio.
*/
int blk_rq_count_integrity_sg(struct request_queue *q, struct bio *bio)
{
struct bio_vec iv, ivprv = { NULL };
unsigned int segments = 0;
unsigned int seg_size = 0;
struct bvec_iter iter;
int prev = 0;
bio_for_each_integrity_vec(iv, bio, iter) {
if (prev) {
if (!biovec_phys_mergeable(q, &ivprv, &iv))
goto new_segment;
if (seg_size + iv.bv_len > queue_max_segment_size(q))
goto new_segment;
seg_size += iv.bv_len;
} else {
new_segment:
segments++;
seg_size = iv.bv_len;
}
prev = 1;
ivprv = iv;
}
return segments;
}
EXPORT_SYMBOL(blk_rq_count_integrity_sg);
/**
* blk_rq_map_integrity_sg - Map integrity metadata into a scatterlist
* @q: request queue
* @bio: bio with integrity metadata attached
* @sglist: target scatterlist
*
* Description: Map the integrity vectors in request into a
* scatterlist. The scatterlist must be big enough to hold all
* elements. I.e. sized using blk_rq_count_integrity_sg().
*/
int blk_rq_map_integrity_sg(struct request_queue *q, struct bio *bio,
struct scatterlist *sglist)
{
struct bio_vec iv, ivprv = { NULL };
struct scatterlist *sg = NULL;
unsigned int segments = 0;
struct bvec_iter iter;
int prev = 0;
bio_for_each_integrity_vec(iv, bio, iter) {
if (prev) {
if (!biovec_phys_mergeable(q, &ivprv, &iv))
goto new_segment;
if (sg->length + iv.bv_len > queue_max_segment_size(q))
goto new_segment;
sg->length += iv.bv_len;
} else {
new_segment:
if (!sg)
sg = sglist;
else {
sg_unmark_end(sg);
sg = sg_next(sg);
}
sg_set_page(sg, iv.bv_page, iv.bv_len, iv.bv_offset);
segments++;
}
prev = 1;
ivprv = iv;
}
if (sg)
sg_mark_end(sg);
return segments;
}
EXPORT_SYMBOL(blk_rq_map_integrity_sg);
/**
* blk_integrity_compare - Compare integrity profile of two disks
* @gd1: Disk to compare
* @gd2: Disk to compare
*
* Description: Meta-devices like DM and MD need to verify that all
* sub-devices use the same integrity format before advertising to
* upper layers that they can send/receive integrity metadata. This
* function can be used to check whether two gendisk devices have
* compatible integrity formats.
*/
int blk_integrity_compare(struct gendisk *gd1, struct gendisk *gd2)
{
struct blk_integrity *b1 = &gd1->queue->integrity;
struct blk_integrity *b2 = &gd2->queue->integrity;
if (!b1->tuple_size && !b2->tuple_size)
return 0;
if (!b1->tuple_size || !b2->tuple_size)
return -1;
if (b1->interval_exp != b2->interval_exp) {
pr_err("%s: %s/%s protection interval %u != %u\n",
__func__, gd1->disk_name, gd2->disk_name,
1 << b1->interval_exp, 1 << b2->interval_exp);
return -1;
}
if (b1->tuple_size != b2->tuple_size) {
pr_err("%s: %s/%s tuple sz %u != %u\n", __func__,
gd1->disk_name, gd2->disk_name,
b1->tuple_size, b2->tuple_size);
return -1;
}
if (b1->tag_size && b2->tag_size && (b1->tag_size != b2->tag_size)) {
pr_err("%s: %s/%s tag sz %u != %u\n", __func__,
gd1->disk_name, gd2->disk_name,
b1->tag_size, b2->tag_size);
return -1;
}
if (b1->csum_type != b2->csum_type ||
(b1->flags & BLK_INTEGRITY_REF_TAG) !=
(b2->flags & BLK_INTEGRITY_REF_TAG)) {
pr_err("%s: %s/%s type %s != %s\n", __func__,
gd1->disk_name, gd2->disk_name,
blk_integrity_profile_name(b1),
blk_integrity_profile_name(b2));
return -1;
}
return 0;
}
EXPORT_SYMBOL(blk_integrity_compare);
bool blk_integrity_merge_rq(struct request_queue *q, struct request *req,
struct request *next)
{
if (blk_integrity_rq(req) == 0 && blk_integrity_rq(next) == 0)
return true;
if (blk_integrity_rq(req) == 0 || blk_integrity_rq(next) == 0)
return false;
if (bio_integrity(req->bio)->bip_flags !=
bio_integrity(next->bio)->bip_flags)
return false;
if (req->nr_integrity_segments + next->nr_integrity_segments >
q->limits.max_integrity_segments)
return false;
if (integrity_req_gap_back_merge(req, next->bio))
return false;
return true;
}
bool blk_integrity_merge_bio(struct request_queue *q, struct request *req,
struct bio *bio)
{
int nr_integrity_segs;
struct bio *next = bio->bi_next;
if (blk_integrity_rq(req) == 0 && bio_integrity(bio) == NULL)
return true;
if (blk_integrity_rq(req) == 0 || bio_integrity(bio) == NULL)
return false;
if (bio_integrity(req->bio)->bip_flags != bio_integrity(bio)->bip_flags)
return false;
bio->bi_next = NULL;
nr_integrity_segs = blk_rq_count_integrity_sg(q, bio);
bio->bi_next = next;
if (req->nr_integrity_segments + nr_integrity_segs >
q->limits.max_integrity_segments)
return false;
req->nr_integrity_segments += nr_integrity_segs;
return true;
}
static inline struct blk_integrity *dev_to_bi(struct device *dev)
{
return &dev_to_disk(dev)->queue->integrity;
}
const char *blk_integrity_profile_name(struct blk_integrity *bi)
{
switch (bi->csum_type) {
case BLK_INTEGRITY_CSUM_IP:
if (bi->flags & BLK_INTEGRITY_REF_TAG)
return "T10-DIF-TYPE1-IP";
return "T10-DIF-TYPE3-IP";
case BLK_INTEGRITY_CSUM_CRC:
if (bi->flags & BLK_INTEGRITY_REF_TAG)
return "T10-DIF-TYPE1-CRC";
return "T10-DIF-TYPE3-CRC";
case BLK_INTEGRITY_CSUM_CRC64:
if (bi->flags & BLK_INTEGRITY_REF_TAG)
return "EXT-DIF-TYPE1-CRC64";
return "EXT-DIF-TYPE3-CRC64";
case BLK_INTEGRITY_CSUM_NONE:
break;
}
return "nop";
}
EXPORT_SYMBOL_GPL(blk_integrity_profile_name);
static ssize_t flag_store(struct device *dev, struct device_attribute *attr,
const char *page, size_t count, unsigned char flag)
{
struct blk_integrity *bi = dev_to_bi(dev);
unsigned long val;
int err;
err = kstrtoul(page, 10, &val);
if (err)
return err;
if (val)
bi->flags |= flag;
else
bi->flags &= ~flag;
return count;
}
static ssize_t flag_show(struct device *dev, struct device_attribute *attr,
char *page, unsigned char flag)
{
struct blk_integrity *bi = dev_to_bi(dev);
return sysfs_emit(page, "%d\n", !!(bi->flags & flag));
}
static ssize_t format_show(struct device *dev, struct device_attribute *attr,
char *page)
{
struct blk_integrity *bi = dev_to_bi(dev);
if (!bi->tuple_size)
return sysfs_emit(page, "none\n");
return sysfs_emit(page, "%s\n", blk_integrity_profile_name(bi));
}
static ssize_t tag_size_show(struct device *dev, struct device_attribute *attr,
char *page)
{
struct blk_integrity *bi = dev_to_bi(dev);
return sysfs_emit(page, "%u\n", bi->tag_size);
}
static ssize_t protection_interval_bytes_show(struct device *dev,
struct device_attribute *attr,
char *page)
{
struct blk_integrity *bi = dev_to_bi(dev);
return sysfs_emit(page, "%u\n",
bi->interval_exp ? 1 << bi->interval_exp : 0);
}
static ssize_t read_verify_store(struct device *dev,
struct device_attribute *attr,
const char *page, size_t count)
{
return flag_store(dev, attr, page, count, BLK_INTEGRITY_VERIFY);
}
static ssize_t read_verify_show(struct device *dev,
struct device_attribute *attr, char *page)
{
return flag_show(dev, attr, page, BLK_INTEGRITY_VERIFY);
}
static ssize_t write_generate_store(struct device *dev,
struct device_attribute *attr,
const char *page, size_t count)
{
return flag_store(dev, attr, page, count, BLK_INTEGRITY_GENERATE);
}
static ssize_t write_generate_show(struct device *dev,
struct device_attribute *attr, char *page)
{
return flag_show(dev, attr, page, BLK_INTEGRITY_GENERATE);
}
static ssize_t device_is_integrity_capable_show(struct device *dev,
struct device_attribute *attr,
char *page)
{
struct blk_integrity *bi = dev_to_bi(dev);
return sysfs_emit(page, "%u\n",
!!(bi->flags & BLK_INTEGRITY_DEVICE_CAPABLE));
}
static DEVICE_ATTR_RO(format);
static DEVICE_ATTR_RO(tag_size);
static DEVICE_ATTR_RO(protection_interval_bytes);
static DEVICE_ATTR_RW(read_verify);
static DEVICE_ATTR_RW(write_generate);
static DEVICE_ATTR_RO(device_is_integrity_capable);
static struct attribute *integrity_attrs[] = {
&dev_attr_format.attr,
&dev_attr_tag_size.attr,
&dev_attr_protection_interval_bytes.attr,
&dev_attr_read_verify.attr,
&dev_attr_write_generate.attr,
&dev_attr_device_is_integrity_capable.attr,
NULL
};
const struct attribute_group blk_integrity_attr_group = {
.name = "integrity",
.attrs = integrity_attrs,
};
/**
* blk_integrity_register - Register a gendisk as being integrity-capable
* @disk: struct gendisk pointer to make integrity-aware
* @template: block integrity profile to register
*
* Description: When a device needs to advertise itself as being able to
* send/receive integrity metadata it must use this function to register
* the capability with the block layer. The template is a blk_integrity
* struct with values appropriate for the underlying hardware. See
* Documentation/block/data-integrity.rst.
*/
void blk_integrity_register(struct gendisk *disk, struct blk_integrity *template)
{
struct blk_integrity *bi = &disk->queue->integrity;
bi->csum_type = template->csum_type;
bi->flags = BLK_INTEGRITY_VERIFY | BLK_INTEGRITY_GENERATE |
template->flags;
bi->interval_exp = template->interval_exp ? :
ilog2(queue_logical_block_size(disk->queue));
bi->tuple_size = template->tuple_size;
bi->tag_size = template->tag_size;
bi->pi_offset = template->pi_offset;
#ifdef CONFIG_BLK_INLINE_ENCRYPTION
if (disk->queue->crypto_profile) {
pr_warn("blk-integrity: Integrity and hardware inline encryption are not supported together. Disabling hardware inline encryption.\n");
disk->queue->crypto_profile = NULL;
}
#endif
}
EXPORT_SYMBOL(blk_integrity_register);
/**
* blk_integrity_unregister - Unregister block integrity profile
* @disk: disk whose integrity profile to unregister
*
* Description: This function unregisters the integrity capability from
* a block device.
*/
void blk_integrity_unregister(struct gendisk *disk)
{
struct blk_integrity *bi = &disk->queue->integrity;
if (!bi->tuple_size)
return;
memset(bi, 0, sizeof(*bi));
}
EXPORT_SYMBOL(blk_integrity_unregister);