vfs-6.17-rc1.integrity

-----BEGIN PGP SIGNATURE-----
 
 iHUEABYKAB0WIQRAhzRXHqcMeLMyaSiRxhvAZXjcogUCaINCngAKCRCRxhvAZXjc
 ogAMAP9LqNHFf7JfDIvF/PJBxzYa0ToWwPsWACERknwkvtBRCwEAhkmscIcIMQ4t
 LPGLGha17dfpaE4RurRhBYgS9x2/1Ao=
 =jSnJ
 -----END PGP SIGNATURE-----

Merge tag 'vfs-6.17-rc1.integrity' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs

Pull vfs 'protection info' updates from Christian Brauner:
 "This adds the new FS_IOC_GETLBMD_CAP ioctl() to query metadata and
  protection info (PI) capabilities. This ioctl returns information
  about the files integrity profile. This is useful for userspace
  applications to understand a files end-to-end data protection support
  and configure the I/O accordingly.

  For now this interface is only supported by block devices. However the
  design and placement of this ioctl in generic FS ioctl space allows us
  to extend it to work over files as well. This maybe useful when
  filesystems start supporting PI-aware layouts.

  A new structure struct logical_block_metadata_cap is introduced, which
  contains the following fields:

   - lbmd_flags:
     bitmask of logical block metadata capability flags

   - lbmd_interval:
     the amount of data described by each unit of logical block metadata

   - lbmd_size:
     size in bytes of the logical block metadata associated with each
     interval

   - lbmd_opaque_size:
     size in bytes of the opaque block tag associated with each interval

   - lbmd_opaque_offset:
     offset in bytes of the opaque block tag within the logical block
     metadata

   - lbmd_pi_size:
     size in bytes of the T10 PI tuple associated with each interval

   - lbmd_pi_offset:
     offset in bytes of T10 PI tuple within the logical block metadata

   - lbmd_pi_guard_tag_type:
     T10 PI guard tag type

   - lbmd_pi_app_tag_size:
     size in bytes of the T10 PI application tag

   - lbmd_pi_ref_tag_size:
     size in bytes of the T10 PI reference tag

   - lbmd_pi_storage_tag_size:
     size in bytes of the T10 PI storage tag

  The internal logic to fetch the capability is encapsulated in a helper
  function blk_get_meta_cap(), which uses the blk_integrity profile
  associated with the device. The ioctl returns -EOPNOTSUPP, if
  CONFIG_BLK_DEV_INTEGRITY is not enabled"

* tag 'vfs-6.17-rc1.integrity' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs:
  block: fix lbmd_guard_tag_type assignment in FS_IOC_GETLBMD_CAP
  block: fix FS_IOC_GETLBMD_CAP parsing in blkdev_common_ioctl()
  fs: add ioctl to query metadata and protection info capabilities
  nvme: set pi_offset only when checksum type is not BLK_INTEGRITY_CSUM_NONE
  block: introduce pi_tuple_size field in blk_integrity
  block: rename tuple_size field in blk_integrity to metadata_size
This commit is contained in:
Linus Torvalds 2025-07-28 15:12:00 -07:00
commit cec40a7c80
14 changed files with 209 additions and 31 deletions

View file

@ -54,10 +54,10 @@ static bool bi_offload_capable(struct blk_integrity *bi)
{
switch (bi->csum_type) {
case BLK_INTEGRITY_CSUM_CRC64:
return bi->tuple_size == sizeof(struct crc64_pi_tuple);
return bi->metadata_size == sizeof(struct crc64_pi_tuple);
case BLK_INTEGRITY_CSUM_CRC:
case BLK_INTEGRITY_CSUM_IP:
return bi->tuple_size == sizeof(struct t10_pi_tuple);
return bi->metadata_size == sizeof(struct t10_pi_tuple);
default:
pr_warn_once("%s: unknown integrity checksum type:%d\n",
__func__, bi->csum_type);

View file

@ -13,6 +13,7 @@
#include <linux/scatterlist.h>
#include <linux/export.h>
#include <linux/slab.h>
#include <linux/t10-pi.h>
#include "blk.h"
@ -54,6 +55,73 @@ new_segment:
return segments;
}
int blk_get_meta_cap(struct block_device *bdev, unsigned int cmd,
struct logical_block_metadata_cap __user *argp)
{
struct blk_integrity *bi = blk_get_integrity(bdev->bd_disk);
struct logical_block_metadata_cap meta_cap = {};
size_t usize = _IOC_SIZE(cmd);
if (_IOC_DIR(cmd) != _IOC_DIR(FS_IOC_GETLBMD_CAP) ||
_IOC_TYPE(cmd) != _IOC_TYPE(FS_IOC_GETLBMD_CAP) ||
_IOC_NR(cmd) != _IOC_NR(FS_IOC_GETLBMD_CAP) ||
_IOC_SIZE(cmd) < LBMD_SIZE_VER0)
return -ENOIOCTLCMD;
if (!bi)
goto out;
if (bi->flags & BLK_INTEGRITY_DEVICE_CAPABLE)
meta_cap.lbmd_flags |= LBMD_PI_CAP_INTEGRITY;
if (bi->flags & BLK_INTEGRITY_REF_TAG)
meta_cap.lbmd_flags |= LBMD_PI_CAP_REFTAG;
meta_cap.lbmd_interval = 1 << bi->interval_exp;
meta_cap.lbmd_size = bi->metadata_size;
meta_cap.lbmd_pi_size = bi->pi_tuple_size;
meta_cap.lbmd_pi_offset = bi->pi_offset;
meta_cap.lbmd_opaque_size = bi->metadata_size - bi->pi_tuple_size;
if (meta_cap.lbmd_opaque_size && !bi->pi_offset)
meta_cap.lbmd_opaque_offset = bi->pi_tuple_size;
switch (bi->csum_type) {
case BLK_INTEGRITY_CSUM_NONE:
meta_cap.lbmd_guard_tag_type = LBMD_PI_CSUM_NONE;
break;
case BLK_INTEGRITY_CSUM_IP:
meta_cap.lbmd_guard_tag_type = LBMD_PI_CSUM_IP;
break;
case BLK_INTEGRITY_CSUM_CRC:
meta_cap.lbmd_guard_tag_type = LBMD_PI_CSUM_CRC16_T10DIF;
break;
case BLK_INTEGRITY_CSUM_CRC64:
meta_cap.lbmd_guard_tag_type = LBMD_PI_CSUM_CRC64_NVME;
break;
}
if (bi->csum_type != BLK_INTEGRITY_CSUM_NONE)
meta_cap.lbmd_app_tag_size = 2;
if (bi->flags & BLK_INTEGRITY_REF_TAG) {
switch (bi->csum_type) {
case BLK_INTEGRITY_CSUM_CRC64:
meta_cap.lbmd_ref_tag_size =
sizeof_field(struct crc64_pi_tuple, ref_tag);
break;
case BLK_INTEGRITY_CSUM_CRC:
case BLK_INTEGRITY_CSUM_IP:
meta_cap.lbmd_ref_tag_size =
sizeof_field(struct t10_pi_tuple, ref_tag);
break;
default:
break;
}
}
out:
return copy_struct_to_user(argp, usize, &meta_cap, sizeof(meta_cap),
NULL);
}
/**
* blk_rq_map_integrity_sg - Map integrity metadata into a scatterlist
* @rq: request to map
@ -239,7 +307,7 @@ static ssize_t format_show(struct device *dev, struct device_attribute *attr,
{
struct blk_integrity *bi = dev_to_bi(dev);
if (!bi->tuple_size)
if (!bi->metadata_size)
return sysfs_emit(page, "none\n");
return sysfs_emit(page, "%s\n", blk_integrity_profile_name(bi));
}

View file

@ -14,6 +14,8 @@
#include <linux/jiffies.h>
#include <linux/gfp.h>
#include <linux/dma-mapping.h>
#include <linux/t10-pi.h>
#include <linux/crc64.h>
#include "blk.h"
#include "blk-rq-qos.h"
@ -116,7 +118,7 @@ static int blk_validate_integrity_limits(struct queue_limits *lim)
{
struct blk_integrity *bi = &lim->integrity;
if (!bi->tuple_size) {
if (!bi->metadata_size) {
if (bi->csum_type != BLK_INTEGRITY_CSUM_NONE ||
bi->tag_size || ((bi->flags & BLK_INTEGRITY_REF_TAG))) {
pr_warn("invalid PI settings.\n");
@ -137,6 +139,42 @@ static int blk_validate_integrity_limits(struct queue_limits *lim)
return -EINVAL;
}
if (bi->pi_tuple_size > bi->metadata_size) {
pr_warn("pi_tuple_size (%u) exceeds metadata_size (%u)\n",
bi->pi_tuple_size,
bi->metadata_size);
return -EINVAL;
}
switch (bi->csum_type) {
case BLK_INTEGRITY_CSUM_NONE:
if (bi->pi_tuple_size) {
pr_warn("pi_tuple_size must be 0 when checksum type \
is none\n");
return -EINVAL;
}
break;
case BLK_INTEGRITY_CSUM_CRC:
case BLK_INTEGRITY_CSUM_IP:
if (bi->pi_tuple_size != sizeof(struct t10_pi_tuple)) {
pr_warn("pi_tuple_size mismatch for T10 PI: expected \
%zu, got %u\n",
sizeof(struct t10_pi_tuple),
bi->pi_tuple_size);
return -EINVAL;
}
break;
case BLK_INTEGRITY_CSUM_CRC64:
if (bi->pi_tuple_size != sizeof(struct crc64_pi_tuple)) {
pr_warn("pi_tuple_size mismatch for CRC64 PI: \
expected %zu, got %u\n",
sizeof(struct crc64_pi_tuple),
bi->pi_tuple_size);
return -EINVAL;
}
break;
}
if (!bi->interval_exp)
bi->interval_exp = ilog2(lim->logical_block_size);
@ -891,7 +929,7 @@ bool queue_limits_stack_integrity(struct queue_limits *t,
return true;
if (ti->flags & BLK_INTEGRITY_STACKED) {
if (ti->tuple_size != bi->tuple_size)
if (ti->metadata_size != bi->metadata_size)
goto incompatible;
if (ti->interval_exp != bi->interval_exp)
goto incompatible;
@ -907,7 +945,7 @@ bool queue_limits_stack_integrity(struct queue_limits *t,
ti->flags |= (bi->flags & BLK_INTEGRITY_DEVICE_CAPABLE) |
(bi->flags & BLK_INTEGRITY_REF_TAG);
ti->csum_type = bi->csum_type;
ti->tuple_size = bi->tuple_size;
ti->metadata_size = bi->metadata_size;
ti->pi_offset = bi->pi_offset;
ti->interval_exp = bi->interval_exp;
ti->tag_size = bi->tag_size;

View file

@ -13,6 +13,7 @@
#include <linux/uaccess.h>
#include <linux/pagemap.h>
#include <linux/io_uring/cmd.h>
#include <linux/blk-integrity.h>
#include <uapi/linux/blkdev.h>
#include "blk.h"
#include "blk-crypto-internal.h"
@ -644,7 +645,7 @@ static int blkdev_common_ioctl(struct block_device *bdev, blk_mode_t mode,
case IOC_PR_CLEAR:
return blkdev_pr_clear(bdev, mode, argp);
default:
return -ENOIOCTLCMD;
return blk_get_meta_cap(bdev, cmd, argp);
}
}

View file

@ -56,7 +56,7 @@ static void t10_pi_generate(struct blk_integrity_iter *iter,
pi->ref_tag = 0;
iter->data_buf += iter->interval;
iter->prot_buf += bi->tuple_size;
iter->prot_buf += bi->metadata_size;
iter->seed++;
}
}
@ -105,7 +105,7 @@ static blk_status_t t10_pi_verify(struct blk_integrity_iter *iter,
next:
iter->data_buf += iter->interval;
iter->prot_buf += bi->tuple_size;
iter->prot_buf += bi->metadata_size;
iter->seed++;
}
@ -125,7 +125,7 @@ next:
static void t10_pi_type1_prepare(struct request *rq)
{
struct blk_integrity *bi = &rq->q->limits.integrity;
const int tuple_sz = bi->tuple_size;
const int tuple_sz = bi->metadata_size;
u32 ref_tag = t10_pi_ref_tag(rq);
u8 offset = bi->pi_offset;
struct bio *bio;
@ -177,7 +177,7 @@ static void t10_pi_type1_complete(struct request *rq, unsigned int nr_bytes)
{
struct blk_integrity *bi = &rq->q->limits.integrity;
unsigned intervals = nr_bytes >> bi->interval_exp;
const int tuple_sz = bi->tuple_size;
const int tuple_sz = bi->metadata_size;
u32 ref_tag = t10_pi_ref_tag(rq);
u8 offset = bi->pi_offset;
struct bio *bio;
@ -234,7 +234,7 @@ static void ext_pi_crc64_generate(struct blk_integrity_iter *iter,
put_unaligned_be48(0ULL, pi->ref_tag);
iter->data_buf += iter->interval;
iter->prot_buf += bi->tuple_size;
iter->prot_buf += bi->metadata_size;
iter->seed++;
}
}
@ -289,7 +289,7 @@ static blk_status_t ext_pi_crc64_verify(struct blk_integrity_iter *iter,
next:
iter->data_buf += iter->interval;
iter->prot_buf += bi->tuple_size;
iter->prot_buf += bi->metadata_size;
iter->seed++;
}
@ -299,7 +299,7 @@ next:
static void ext_pi_type1_prepare(struct request *rq)
{
struct blk_integrity *bi = &rq->q->limits.integrity;
const int tuple_sz = bi->tuple_size;
const int tuple_sz = bi->metadata_size;
u64 ref_tag = ext_pi_ref_tag(rq);
u8 offset = bi->pi_offset;
struct bio *bio;
@ -340,7 +340,7 @@ static void ext_pi_type1_complete(struct request *rq, unsigned int nr_bytes)
{
struct blk_integrity *bi = &rq->q->limits.integrity;
unsigned intervals = nr_bytes >> bi->interval_exp;
const int tuple_sz = bi->tuple_size;
const int tuple_sz = bi->metadata_size;
u64 ref_tag = ext_pi_ref_tag(rq);
u8 offset = bi->pi_offset;
struct bio *bio;

View file

@ -1192,11 +1192,11 @@ static int crypt_integrity_ctr(struct crypt_config *cc, struct dm_target *ti)
return -EINVAL;
}
if (bi->tuple_size < cc->used_tag_size) {
if (bi->metadata_size < cc->used_tag_size) {
ti->error = "Integrity profile tag size mismatch.";
return -EINVAL;
}
cc->tuple_size = bi->tuple_size;
cc->tuple_size = bi->metadata_size;
if (1 << bi->interval_exp != cc->sector_size) {
ti->error = "Integrity profile sector size mismatch.";
return -EINVAL;

View file

@ -3906,8 +3906,8 @@ static void dm_integrity_io_hints(struct dm_target *ti, struct queue_limits *lim
struct blk_integrity *bi = &limits->integrity;
memset(bi, 0, sizeof(*bi));
bi->tuple_size = ic->tag_size;
bi->tag_size = bi->tuple_size;
bi->metadata_size = ic->tag_size;
bi->tag_size = bi->metadata_size;
bi->interval_exp =
ic->sb->log2_sectors_per_block + SECTOR_SHIFT;
}
@ -4746,18 +4746,18 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned int argc, char **argv
ti->error = "Integrity profile not supported";
goto bad;
}
/*printk("tag_size: %u, tuple_size: %u\n", bi->tag_size, bi->tuple_size);*/
if (bi->tuple_size < ic->tag_size) {
/*printk("tag_size: %u, metadata_size: %u\n", bi->tag_size, bi->metadata_size);*/
if (bi->metadata_size < ic->tag_size) {
r = -EINVAL;
ti->error = "The integrity profile is smaller than tag size";
goto bad;
}
if ((unsigned long)bi->tuple_size > PAGE_SIZE / 2) {
if ((unsigned long)bi->metadata_size > PAGE_SIZE / 2) {
r = -EINVAL;
ti->error = "Too big tuple size";
goto bad;
}
ic->tuple_size = bi->tuple_size;
ic->tuple_size = bi->metadata_size;
if (1 << bi->interval_exp != ic->sectors_per_block << SECTOR_SHIFT) {
r = -EINVAL;
ti->error = "Integrity profile sector size mismatch";

View file

@ -1506,7 +1506,7 @@ static int btt_blk_init(struct btt *btt)
int rc;
if (btt_meta_size(btt) && IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY)) {
lim.integrity.tuple_size = btt_meta_size(btt);
lim.integrity.metadata_size = btt_meta_size(btt);
lim.integrity.tag_size = btt_meta_size(btt);
}

View file

@ -1870,8 +1870,11 @@ static bool nvme_init_integrity(struct nvme_ns_head *head,
break;
}
bi->tuple_size = head->ms;
bi->pi_offset = info->pi_offset;
bi->metadata_size = head->ms;
if (bi->csum_type) {
bi->pi_tuple_size = head->pi_size;
bi->pi_offset = info->pi_offset;
}
return true;
}

View file

@ -69,7 +69,7 @@ static void nvmet_bdev_ns_enable_integrity(struct nvmet_ns *ns)
return;
if (bi->csum_type == BLK_INTEGRITY_CSUM_CRC) {
ns->metadata_size = bi->tuple_size;
ns->metadata_size = bi->metadata_size;
if (bi->flags & BLK_INTEGRITY_REF_TAG)
ns->pi_type = NVME_NS_DPS_PI_TYPE1;
else

View file

@ -52,7 +52,8 @@ void sd_dif_config_host(struct scsi_disk *sdkp, struct queue_limits *lim)
if (type != T10_PI_TYPE3_PROTECTION)
bi->flags |= BLK_INTEGRITY_REF_TAG;
bi->tuple_size = sizeof(struct t10_pi_tuple);
bi->metadata_size = sizeof(struct t10_pi_tuple);
bi->pi_tuple_size = bi->metadata_size;
if (dif && type) {
bi->flags |= BLK_INTEGRITY_DEVICE_CAPABLE;

View file

@ -29,11 +29,13 @@ int blk_rq_map_integrity_sg(struct request *, struct scatterlist *);
int blk_rq_count_integrity_sg(struct request_queue *, struct bio *);
int blk_rq_integrity_map_user(struct request *rq, void __user *ubuf,
ssize_t bytes);
int blk_get_meta_cap(struct block_device *bdev, unsigned int cmd,
struct logical_block_metadata_cap __user *argp);
static inline bool
blk_integrity_queue_supports_integrity(struct request_queue *q)
{
return q->limits.integrity.tuple_size;
return q->limits.integrity.metadata_size;
}
static inline struct blk_integrity *blk_get_integrity(struct gendisk *disk)
@ -74,7 +76,7 @@ static inline unsigned int bio_integrity_intervals(struct blk_integrity *bi,
static inline unsigned int bio_integrity_bytes(struct blk_integrity *bi,
unsigned int sectors)
{
return bio_integrity_intervals(bi, sectors) * bi->tuple_size;
return bio_integrity_intervals(bi, sectors) * bi->metadata_size;
}
static inline bool blk_integrity_rq(struct request *rq)
@ -92,6 +94,11 @@ static inline struct bio_vec rq_integrity_vec(struct request *rq)
rq->bio->bi_integrity->bip_iter);
}
#else /* CONFIG_BLK_DEV_INTEGRITY */
static inline int blk_get_meta_cap(struct block_device *bdev, unsigned int cmd,
struct logical_block_metadata_cap __user *argp)
{
return -EOPNOTSUPP;
}
static inline int blk_rq_count_integrity_sg(struct request_queue *q,
struct bio *b)
{

View file

@ -116,10 +116,11 @@ enum blk_integrity_checksum {
struct blk_integrity {
unsigned char flags;
enum blk_integrity_checksum csum_type;
unsigned char tuple_size;
unsigned char metadata_size;
unsigned char pi_offset;
unsigned char interval_exp;
unsigned char tag_size;
unsigned char pi_tuple_size;
};
typedef unsigned int __bitwise blk_mode_t;

View file

@ -102,6 +102,63 @@ struct fs_sysfs_path {
__u8 name[128];
};
/* Protection info capability flags */
#define LBMD_PI_CAP_INTEGRITY (1 << 0)
#define LBMD_PI_CAP_REFTAG (1 << 1)
/* Checksum types for Protection Information */
#define LBMD_PI_CSUM_NONE 0
#define LBMD_PI_CSUM_IP 1
#define LBMD_PI_CSUM_CRC16_T10DIF 2
#define LBMD_PI_CSUM_CRC64_NVME 4
/* sizeof first published struct */
#define LBMD_SIZE_VER0 16
/*
* Logical block metadata capability descriptor
* If the device does not support metadata, all the fields will be zero.
* Applications must check lbmd_flags to determine whether metadata is
* supported or not.
*/
struct logical_block_metadata_cap {
/* Bitmask of logical block metadata capability flags */
__u32 lbmd_flags;
/*
* The amount of data described by each unit of logical block
* metadata
*/
__u16 lbmd_interval;
/*
* Size in bytes of the logical block metadata associated with each
* interval
*/
__u8 lbmd_size;
/*
* Size in bytes of the opaque block tag associated with each
* interval
*/
__u8 lbmd_opaque_size;
/*
* Offset in bytes of the opaque block tag within the logical block
* metadata
*/
__u8 lbmd_opaque_offset;
/* Size in bytes of the T10 PI tuple associated with each interval */
__u8 lbmd_pi_size;
/* Offset in bytes of T10 PI tuple within the logical block metadata */
__u8 lbmd_pi_offset;
/* T10 PI guard tag type */
__u8 lbmd_guard_tag_type;
/* Size in bytes of the T10 PI application tag */
__u8 lbmd_app_tag_size;
/* Size in bytes of the T10 PI reference tag */
__u8 lbmd_ref_tag_size;
/* Size in bytes of the T10 PI storage tag */
__u8 lbmd_storage_tag_size;
__u8 pad;
};
/* extent-same (dedupe) ioctls; these MUST match the btrfs ioctl definitions */
#define FILE_DEDUPE_RANGE_SAME 0
#define FILE_DEDUPE_RANGE_DIFFERS 1
@ -258,6 +315,8 @@ struct fsxattr {
* also /sys/kernel/debug/ for filesystems with debugfs exports
*/
#define FS_IOC_GETFSSYSFSPATH _IOR(0x15, 1, struct fs_sysfs_path)
/* Get logical block metadata capability details */
#define FS_IOC_GETLBMD_CAP _IOWR(0x15, 2, struct logical_block_metadata_cap)
/*
* Inode flags (FS_IOC_GETFLAGS / FS_IOC_SETFLAGS)