mirror of
https://github.com/torvalds/linux.git
synced 2025-08-15 14:11:42 +02:00
for-6.15/block-20250322
-----BEGIN PGP SIGNATURE----- iQJEBAABCAAuFiEEwPw5LcreJtl1+l5K99NY+ylx4KYFAmfe8BkQHGF4Ym9lQGtl cm5lbC5kawAKCRD301j7KXHgpvTqD/4pOeGi/QfLyocn4TcJcidRGZAvBxecTVuM upeyr+dCyCi9Wk+EJKeAFooGe15upzxDxKj06HhCixaLx4etDK78uGV4FMM1Z4oa 2dtchz1Zd0HyBPgQIUY8OuOgbS7tstMS/KdvL+gr5IjfapeTF+54WVLCD8eVyvO/ vUIppgJBhrqy2qui4xF2lw4t2COt+/PqinGQuYALn4V4Po9NWA7lSh3ZI4F/byj1 v68jXyt2fqCAyxwkzRDv4GxhN8c6W+TPJpzivrEAuSkLacovESKztinOrafrBnLR zdyO4n0V0yGOXbAcxRbADVA4HUsqhLl4JRnvE5P5zIaD7rkE0UqggF7vrSeCvVA1 hsi1BhkAMNimKX7CZMnT3dJpxRQj1eDJxpwUAusLHWjMyQbNFhV7WAtthMtVJon8 lAS4e5+xzjqKhF15GpVg5Lzy8SAwdqgNXwwq2zbM8OaPKG0FpajG8DXAqqcj4fpy WXnwg72KZDmRcSNJhVZK6B9xSAwIMXPgH4ClCMP9/xlw8EDpM38MDmzrs35TAVtI HGE3Qv9CjFjVj/OG3el+bTGIQJFVgYEVPV5TYfNCpKoxpj5cLn5OQY5u6MJawtgK HeDgKv3jw3lHatDALMVfwJqqVlUht0R6SIxtP9WHV+CcFrqN1LJKmdhDQbm7b4XK EbbawIsdxw== =Ci5m -----END PGP SIGNATURE----- Merge tag 'for-6.15/block-20250322' of git://git.kernel.dk/linux Pull block updates from Jens Axboe: - Fixes for integrity handling - NVMe pull request via Keith: - Secure concatenation for TCP transport (Hannes) - Multipath sysfs visibility (Nilay) - Various cleanups (Qasim, Baruch, Wang, Chen, Mike, Damien, Li) - Correct use of 64-bit BARs for pci-epf target (Niklas) - Socket fix for selinux when used in containers (Peijie) - MD pull request via Yu: - fix recovery can preempt resync (Li Nan) - fix md-bitmap IO limit (Su Yue) - fix raid10 discard with REQ_NOWAIT (Xiao Ni) - fix raid1 memory leak (Zheng Qixing) - fix mddev uaf (Yu Kuai) - fix raid1,raid10 IO flags (Yu Kuai) - some refactor and cleanup (Yu Kuai) - Series cleaning up and fixing bugs in the bad block handling code - Improve support for write failure simulation in null_blk - Various lock ordering fixes - Fixes for locking for debugfs attributes - Various ublk related fixes and improvements - Cleanups for blk-rq-qos wait handling - blk-throttle fixes - Fixes for loop dio and sync handling - Fixes and cleanups for the auto-PI code - Block side support for hardware encryption keys in blk-crypto - Various cleanups and fixes * tag 'for-6.15/block-20250322' of git://git.kernel.dk/linux: (105 commits) nvmet: replace max(a, min(b, c)) by clamp(val, lo, hi) nvme-tcp: fix selinux denied when calling sock_sendmsg nvmet: pci-epf: Always configure BAR0 as 64-bit nvmet: Remove duplicate uuid_copy nvme: zns: Simplify nvme_zone_parse_entry() nvmet: pci-epf: Remove redundant 'flush_workqueue()' calls nvmet-fc: Remove unused functions nvme-pci: remove stale comment nvme-fc: Utilise min3() to simplify queue count calculation nvme-multipath: Add visibility for queue-depth io-policy nvme-multipath: Add visibility for numa io-policy nvme-multipath: Add visibility for round-robin io-policy nvmet: add tls_concat and tls_key debugfs entries nvmet-tcp: support secure channel concatenation nvmet: Add 'sq' argument to alloc_ctrl_args nvme-fabrics: reset admin connection for secure concatenation nvme-tcp: request secure channel concatenation nvme-keyring: add nvme_tls_psk_refresh() nvme: add nvme_auth_derive_tls_psk() nvme: add nvme_auth_generate_digest() ...
This commit is contained in:
commit
9b960d8cd6
128 changed files with 4069 additions and 1571 deletions
|
@ -109,6 +109,10 @@ Contact: Martin K. Petersen <martin.petersen@oracle.com>
|
|||
Description:
|
||||
Indicates whether a storage device is capable of storing
|
||||
integrity metadata. Set if the device is T10 PI-capable.
|
||||
This flag is set to 1 if the storage media is formatted
|
||||
with T10 Protection Information. If the storage media is
|
||||
not formatted with T10 Protection Information, this flag
|
||||
is set to 0.
|
||||
|
||||
|
||||
What: /sys/block/<disk>/integrity/format
|
||||
|
@ -117,6 +121,13 @@ Contact: Martin K. Petersen <martin.petersen@oracle.com>
|
|||
Description:
|
||||
Metadata format for integrity capable block device.
|
||||
E.g. T10-DIF-TYPE1-CRC.
|
||||
This field describes the type of T10 Protection Information
|
||||
that the block device can send and receive.
|
||||
If the device can store application integrity metadata but
|
||||
no T10 Protection Information profile is used, this field
|
||||
contains "nop".
|
||||
If the device does not support integrity metadata, this
|
||||
field contains "none".
|
||||
|
||||
|
||||
What: /sys/block/<disk>/integrity/protection_interval_bytes
|
||||
|
@ -142,7 +153,17 @@ Date: June 2008
|
|||
Contact: Martin K. Petersen <martin.petersen@oracle.com>
|
||||
Description:
|
||||
Number of bytes of integrity tag space available per
|
||||
512 bytes of data.
|
||||
protection_interval_bytes, which is typically
|
||||
the device's logical block size.
|
||||
This field describes the size of the application tag
|
||||
if the storage device is formatted with T10 Protection
|
||||
Information and permits use of the application tag.
|
||||
The tag_size is reported in bytes and indicates the
|
||||
space available for adding an opaque tag to each block
|
||||
(protection_interval_bytes).
|
||||
If the device does not support T10 Protection Information
|
||||
(even if the device provides application integrity
|
||||
metadata space), this field is set to 0.
|
||||
|
||||
|
||||
What: /sys/block/<disk>/integrity/write_generate
|
||||
|
@ -229,6 +250,17 @@ Description:
|
|||
encryption, refer to Documentation/block/inline-encryption.rst.
|
||||
|
||||
|
||||
What: /sys/block/<disk>/queue/crypto/hw_wrapped_keys
|
||||
Date: February 2025
|
||||
Contact: linux-block@vger.kernel.org
|
||||
Description:
|
||||
[RO] The presence of this file indicates that the device
|
||||
supports hardware-wrapped inline encryption keys, i.e. key blobs
|
||||
that can only be unwrapped and used by dedicated hardware. For
|
||||
more information about hardware-wrapped inline encryption keys,
|
||||
see Documentation/block/inline-encryption.rst.
|
||||
|
||||
|
||||
What: /sys/block/<disk>/queue/crypto/max_dun_bits
|
||||
Date: February 2022
|
||||
Contact: linux-block@vger.kernel.org
|
||||
|
@ -267,6 +299,15 @@ Description:
|
|||
use with inline encryption.
|
||||
|
||||
|
||||
What: /sys/block/<disk>/queue/crypto/raw_keys
|
||||
Date: February 2025
|
||||
Contact: linux-block@vger.kernel.org
|
||||
Description:
|
||||
[RO] The presence of this file indicates that the device
|
||||
supports raw inline encryption keys, i.e. keys that are managed
|
||||
in raw, plaintext form in software.
|
||||
|
||||
|
||||
What: /sys/block/<disk>/queue/dax
|
||||
Date: June 2016
|
||||
Contact: linux-block@vger.kernel.org
|
||||
|
|
|
@ -77,10 +77,10 @@ Basic design
|
|||
============
|
||||
|
||||
We introduce ``struct blk_crypto_key`` to represent an inline encryption key and
|
||||
how it will be used. This includes the actual bytes of the key; the size of the
|
||||
key; the algorithm and data unit size the key will be used with; and the number
|
||||
of bytes needed to represent the maximum data unit number the key will be used
|
||||
with.
|
||||
how it will be used. This includes the type of the key (raw or
|
||||
hardware-wrapped); the actual bytes of the key; the size of the key; the
|
||||
algorithm and data unit size the key will be used with; and the number of bytes
|
||||
needed to represent the maximum data unit number the key will be used with.
|
||||
|
||||
We introduce ``struct bio_crypt_ctx`` to represent an encryption context. It
|
||||
contains a data unit number and a pointer to a blk_crypto_key. We add pointers
|
||||
|
@ -301,3 +301,250 @@ kernel will pretend that the device does not support hardware inline encryption
|
|||
When the crypto API fallback is enabled, this means that all bios with and
|
||||
encryption context will use the fallback, and IO will complete as usual. When
|
||||
the fallback is disabled, a bio with an encryption context will be failed.
|
||||
|
||||
.. _hardware_wrapped_keys:
|
||||
|
||||
Hardware-wrapped keys
|
||||
=====================
|
||||
|
||||
Motivation and threat model
|
||||
---------------------------
|
||||
|
||||
Linux storage encryption (dm-crypt, fscrypt, eCryptfs, etc.) traditionally
|
||||
relies on the raw encryption key(s) being present in kernel memory so that the
|
||||
encryption can be performed. This traditionally isn't seen as a problem because
|
||||
the key(s) won't be present during an offline attack, which is the main type of
|
||||
attack that storage encryption is intended to protect from.
|
||||
|
||||
However, there is an increasing desire to also protect users' data from other
|
||||
types of attacks (to the extent possible), including:
|
||||
|
||||
- Cold boot attacks, where an attacker with physical access to a system suddenly
|
||||
powers it off, then immediately dumps the system memory to extract recently
|
||||
in-use encryption keys, then uses these keys to decrypt user data on-disk.
|
||||
|
||||
- Online attacks where the attacker is able to read kernel memory without fully
|
||||
compromising the system, followed by an offline attack where any extracted
|
||||
keys can be used to decrypt user data on-disk. An example of such an online
|
||||
attack would be if the attacker is able to run some code on the system that
|
||||
exploits a Meltdown-like vulnerability but is unable to escalate privileges.
|
||||
|
||||
- Online attacks where the attacker fully compromises the system, but their data
|
||||
exfiltration is significantly time-limited and/or bandwidth-limited, so in
|
||||
order to completely exfiltrate the data they need to extract the encryption
|
||||
keys to use in a later offline attack.
|
||||
|
||||
Hardware-wrapped keys are a feature of inline encryption hardware that is
|
||||
designed to protect users' data from the above attacks (to the extent possible),
|
||||
without introducing limitations such as a maximum number of keys.
|
||||
|
||||
Note that it is impossible to **fully** protect users' data from these attacks.
|
||||
Even in the attacks where the attacker "just" gets read access to kernel memory,
|
||||
they can still extract any user data that is present in memory, including
|
||||
plaintext pagecache pages of encrypted files. The focus here is just on
|
||||
protecting the encryption keys, as those instantly give access to **all** user
|
||||
data in any following offline attack, rather than just some of it (where which
|
||||
data is included in that "some" might not be controlled by the attacker).
|
||||
|
||||
Solution overview
|
||||
-----------------
|
||||
|
||||
Inline encryption hardware typically has "keyslots" into which software can
|
||||
program keys for the hardware to use; the contents of keyslots typically can't
|
||||
be read back by software. As such, the above security goals could be achieved
|
||||
if the kernel simply erased its copy of the key(s) after programming them into
|
||||
keyslot(s) and thereafter only referred to them via keyslot number.
|
||||
|
||||
However, that naive approach runs into a couple problems:
|
||||
|
||||
- It limits the number of unlocked keys to the number of keyslots, which
|
||||
typically is a small number. In cases where there is only one encryption key
|
||||
system-wide (e.g., a full-disk encryption key), that can be tolerable.
|
||||
However, in general there can be many logged-in users with many different
|
||||
keys, and/or many running applications with application-specific encrypted
|
||||
storage areas. This is especially true if file-based encryption (e.g.
|
||||
fscrypt) is being used.
|
||||
|
||||
- Inline crypto engines typically lose the contents of their keyslots if the
|
||||
storage controller (usually UFS or eMMC) is reset. Resetting the storage
|
||||
controller is a standard error recovery procedure that is executed if certain
|
||||
types of storage errors occur, and such errors can occur at any time.
|
||||
Therefore, when inline crypto is being used, the operating system must always
|
||||
be ready to reprogram the keyslots without user intervention.
|
||||
|
||||
Thus, it is important for the kernel to still have a way to "remind" the
|
||||
hardware about a key, without actually having the raw key itself.
|
||||
|
||||
Somewhat less importantly, it is also desirable that the raw keys are never
|
||||
visible to software at all, even while being initially unlocked. This would
|
||||
ensure that a read-only compromise of system memory will never allow a key to be
|
||||
extracted to be used off-system, even if it occurs when a key is being unlocked.
|
||||
|
||||
To solve all these problems, some vendors of inline encryption hardware have
|
||||
made their hardware support *hardware-wrapped keys*. Hardware-wrapped keys
|
||||
are encrypted keys that can only be unwrapped (decrypted) and used by hardware
|
||||
-- either by the inline encryption hardware itself, or by a dedicated hardware
|
||||
block that can directly provision keys to the inline encryption hardware.
|
||||
|
||||
(We refer to them as "hardware-wrapped keys" rather than simply "wrapped keys"
|
||||
to add some clarity in cases where there could be other types of wrapped keys,
|
||||
such as in file-based encryption. Key wrapping is a commonly used technique.)
|
||||
|
||||
The key which wraps (encrypts) hardware-wrapped keys is a hardware-internal key
|
||||
that is never exposed to software; it is either a persistent key (a "long-term
|
||||
wrapping key") or a per-boot key (an "ephemeral wrapping key"). The long-term
|
||||
wrapped form of the key is what is initially unlocked, but it is erased from
|
||||
memory as soon as it is converted into an ephemerally-wrapped key. In-use
|
||||
hardware-wrapped keys are always ephemerally-wrapped, not long-term wrapped.
|
||||
|
||||
As inline encryption hardware can only be used to encrypt/decrypt data on-disk,
|
||||
the hardware also includes a level of indirection; it doesn't use the unwrapped
|
||||
key directly for inline encryption, but rather derives both an inline encryption
|
||||
key and a "software secret" from it. Software can use the "software secret" for
|
||||
tasks that can't use the inline encryption hardware, such as filenames
|
||||
encryption. The software secret is not protected from memory compromise.
|
||||
|
||||
Key hierarchy
|
||||
-------------
|
||||
|
||||
Here is the key hierarchy for a hardware-wrapped key::
|
||||
|
||||
Hardware-wrapped key
|
||||
|
|
||||
|
|
||||
<Hardware KDF>
|
||||
|
|
||||
-----------------------------
|
||||
| |
|
||||
Inline encryption key Software secret
|
||||
|
||||
The components are:
|
||||
|
||||
- *Hardware-wrapped key*: a key for the hardware's KDF (Key Derivation
|
||||
Function), in ephemerally-wrapped form. The key wrapping algorithm is a
|
||||
hardware implementation detail that doesn't impact kernel operation, but a
|
||||
strong authenticated encryption algorithm such as AES-256-GCM is recommended.
|
||||
|
||||
- *Hardware KDF*: a KDF (Key Derivation Function) which the hardware uses to
|
||||
derive subkeys after unwrapping the wrapped key. The hardware's choice of KDF
|
||||
doesn't impact kernel operation, but it does need to be known for testing
|
||||
purposes, and it's also assumed to have at least a 256-bit security strength.
|
||||
All known hardware uses the SP800-108 KDF in Counter Mode with AES-256-CMAC,
|
||||
with a particular choice of labels and contexts; new hardware should use this
|
||||
already-vetted KDF.
|
||||
|
||||
- *Inline encryption key*: a derived key which the hardware directly provisions
|
||||
to a keyslot of the inline encryption hardware, without exposing it to
|
||||
software. In all known hardware, this will always be an AES-256-XTS key.
|
||||
However, in principle other encryption algorithms could be supported too.
|
||||
Hardware must derive distinct subkeys for each supported encryption algorithm.
|
||||
|
||||
- *Software secret*: a derived key which the hardware returns to software so
|
||||
that software can use it for cryptographic tasks that can't use inline
|
||||
encryption. This value is cryptographically isolated from the inline
|
||||
encryption key, i.e. knowing one doesn't reveal the other. (The KDF ensures
|
||||
this.) Currently, the software secret is always 32 bytes and thus is suitable
|
||||
for cryptographic applications that require up to a 256-bit security strength.
|
||||
Some use cases (e.g. full-disk encryption) won't require the software secret.
|
||||
|
||||
Example: in the case of fscrypt, the fscrypt master key (the key that protects a
|
||||
particular set of encrypted directories) is made hardware-wrapped. The inline
|
||||
encryption key is used as the file contents encryption key, while the software
|
||||
secret (rather than the master key directly) is used to key fscrypt's KDF
|
||||
(HKDF-SHA512) to derive other subkeys such as filenames encryption keys.
|
||||
|
||||
Note that currently this design assumes a single inline encryption key per
|
||||
hardware-wrapped key, without any further key derivation. Thus, in the case of
|
||||
fscrypt, currently hardware-wrapped keys are only compatible with the "inline
|
||||
encryption optimized" settings, which use one file contents encryption key per
|
||||
encryption policy rather than one per file. This design could be extended to
|
||||
make the hardware derive per-file keys using per-file nonces passed down the
|
||||
storage stack, and in fact some hardware already supports this; future work is
|
||||
planned to remove this limitation by adding the corresponding kernel support.
|
||||
|
||||
Kernel support
|
||||
--------------
|
||||
|
||||
The inline encryption support of the kernel's block layer ("blk-crypto") has
|
||||
been extended to support hardware-wrapped keys as an alternative to raw keys,
|
||||
when hardware support is available. This works in the following way:
|
||||
|
||||
- A ``key_types_supported`` field is added to the crypto capabilities in
|
||||
``struct blk_crypto_profile``. This allows device drivers to declare that
|
||||
they support raw keys, hardware-wrapped keys, or both.
|
||||
|
||||
- ``struct blk_crypto_key`` can now contain a hardware-wrapped key as an
|
||||
alternative to a raw key; a ``key_type`` field is added to
|
||||
``struct blk_crypto_config`` to distinguish between the different key types.
|
||||
This allows users of blk-crypto to en/decrypt data using a hardware-wrapped
|
||||
key in a way very similar to using a raw key.
|
||||
|
||||
- A new method ``blk_crypto_ll_ops::derive_sw_secret`` is added. Device drivers
|
||||
that support hardware-wrapped keys must implement this method. Users of
|
||||
blk-crypto can call ``blk_crypto_derive_sw_secret()`` to access this method.
|
||||
|
||||
- The programming and eviction of hardware-wrapped keys happens via
|
||||
``blk_crypto_ll_ops::keyslot_program`` and
|
||||
``blk_crypto_ll_ops::keyslot_evict``, just like it does for raw keys. If a
|
||||
driver supports hardware-wrapped keys, then it must handle hardware-wrapped
|
||||
keys being passed to these methods.
|
||||
|
||||
blk-crypto-fallback doesn't support hardware-wrapped keys. Therefore,
|
||||
hardware-wrapped keys can only be used with actual inline encryption hardware.
|
||||
|
||||
All the above deals with hardware-wrapped keys in ephemerally-wrapped form only.
|
||||
To get such keys in the first place, new block device ioctls have been added to
|
||||
provide a generic interface to creating and preparing such keys:
|
||||
|
||||
- ``BLKCRYPTOIMPORTKEY`` converts a raw key to long-term wrapped form. It takes
|
||||
in a pointer to a ``struct blk_crypto_import_key_arg``. The caller must set
|
||||
``raw_key_ptr`` and ``raw_key_size`` to the pointer and size (in bytes) of the
|
||||
raw key to import. On success, ``BLKCRYPTOIMPORTKEY`` returns 0 and writes
|
||||
the resulting long-term wrapped key blob to the buffer pointed to by
|
||||
``lt_key_ptr``, which is of maximum size ``lt_key_size``. It also updates
|
||||
``lt_key_size`` to be the actual size of the key. On failure, it returns -1
|
||||
and sets errno. An errno of ``EOPNOTSUPP`` indicates that the block device
|
||||
does not support hardware-wrapped keys. An errno of ``EOVERFLOW`` indicates
|
||||
that the output buffer did not have enough space for the key blob.
|
||||
|
||||
- ``BLKCRYPTOGENERATEKEY`` is like ``BLKCRYPTOIMPORTKEY``, but it has the
|
||||
hardware generate the key instead of importing one. It takes in a pointer to
|
||||
a ``struct blk_crypto_generate_key_arg``.
|
||||
|
||||
- ``BLKCRYPTOPREPAREKEY`` converts a key from long-term wrapped form to
|
||||
ephemerally-wrapped form. It takes in a pointer to a ``struct
|
||||
blk_crypto_prepare_key_arg``. The caller must set ``lt_key_ptr`` and
|
||||
``lt_key_size`` to the pointer and size (in bytes) of the long-term wrapped
|
||||
key blob to convert. On success, ``BLKCRYPTOPREPAREKEY`` returns 0 and writes
|
||||
the resulting ephemerally-wrapped key blob to the buffer pointed to by
|
||||
``eph_key_ptr``, which is of maximum size ``eph_key_size``. It also updates
|
||||
``eph_key_size`` to be the actual size of the key. On failure, it returns -1
|
||||
and sets errno. Errno values of ``EOPNOTSUPP`` and ``EOVERFLOW`` mean the
|
||||
same as they do for ``BLKCRYPTOIMPORTKEY``. An errno of ``EBADMSG`` indicates
|
||||
that the long-term wrapped key is invalid.
|
||||
|
||||
Userspace needs to use either ``BLKCRYPTOIMPORTKEY`` or ``BLKCRYPTOGENERATEKEY``
|
||||
once to create a key, and then ``BLKCRYPTOPREPAREKEY`` each time the key is
|
||||
unlocked and added to the kernel. Note that these ioctls have no relevance for
|
||||
raw keys; they are only for hardware-wrapped keys.
|
||||
|
||||
Testability
|
||||
-----------
|
||||
|
||||
Both the hardware KDF and the inline encryption itself are well-defined
|
||||
algorithms that don't depend on any secrets other than the unwrapped key.
|
||||
Therefore, if the unwrapped key is known to software, these algorithms can be
|
||||
reproduced in software in order to verify the ciphertext that is written to disk
|
||||
by the inline encryption hardware.
|
||||
|
||||
However, the unwrapped key will only be known to software for testing if the
|
||||
"import" functionality is used. Proper testing is not possible in the
|
||||
"generate" case where the hardware generates the key itself. The correct
|
||||
operation of the "generate" mode thus relies on the security and correctness of
|
||||
the hardware RNG and its use to generate the key, as well as the testing of the
|
||||
"import" mode as that should cover all parts other than the key generation.
|
||||
|
||||
For an example of a test that verifies the ciphertext written to disk in the
|
||||
"import" mode, see the fscrypt hardware-wrapped key tests in xfstests, or
|
||||
`Android's vts_kernel_encryption_test
|
||||
<https://android.googlesource.com/platform/test/vts-testcase/kernel/+/refs/heads/main/encryption/>`_.
|
||||
|
|
|
@ -85,6 +85,8 @@ Code Seq# Include File Comments
|
|||
0x10 20-2F arch/s390/include/uapi/asm/hypfs.h
|
||||
0x12 all linux/fs.h BLK* ioctls
|
||||
linux/blkpg.h
|
||||
linux/blkzoned.h
|
||||
linux/blk-crypto.h
|
||||
0x15 all linux/fs.h FS_IOC_* ioctls
|
||||
0x1b all InfiniBand Subsystem
|
||||
<http://infiniband.sourceforge.net/>
|
||||
|
|
|
@ -26,7 +26,8 @@ obj-$(CONFIG_MQ_IOSCHED_KYBER) += kyber-iosched.o
|
|||
bfq-y := bfq-iosched.o bfq-wf2q.o bfq-cgroup.o
|
||||
obj-$(CONFIG_IOSCHED_BFQ) += bfq.o
|
||||
|
||||
obj-$(CONFIG_BLK_DEV_INTEGRITY) += bio-integrity.o blk-integrity.o t10-pi.o
|
||||
obj-$(CONFIG_BLK_DEV_INTEGRITY) += bio-integrity.o blk-integrity.o t10-pi.o \
|
||||
bio-integrity-auto.o
|
||||
obj-$(CONFIG_BLK_DEV_ZONED) += blk-zoned.o
|
||||
obj-$(CONFIG_BLK_WBT) += blk-wbt.o
|
||||
obj-$(CONFIG_BLK_DEBUG_FS) += blk-mq-debugfs.o
|
||||
|
|
|
@ -527,51 +527,6 @@ out:
|
|||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Return 'true' if the range indicated by 'bad' can be backward merged
|
||||
* with the bad range (from the bad table) index by 'behind'.
|
||||
*/
|
||||
static bool can_merge_behind(struct badblocks *bb,
|
||||
struct badblocks_context *bad, int behind)
|
||||
{
|
||||
sector_t sectors = bad->len;
|
||||
sector_t s = bad->start;
|
||||
u64 *p = bb->page;
|
||||
|
||||
if ((s < BB_OFFSET(p[behind])) &&
|
||||
((s + sectors) >= BB_OFFSET(p[behind])) &&
|
||||
((BB_END(p[behind]) - s) <= BB_MAX_LEN) &&
|
||||
BB_ACK(p[behind]) == bad->ack)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* Do backward merge for range indicated by 'bad' and the bad range
|
||||
* (from the bad table) indexed by 'behind'. The return value is merged
|
||||
* sectors from bad->len.
|
||||
*/
|
||||
static int behind_merge(struct badblocks *bb, struct badblocks_context *bad,
|
||||
int behind)
|
||||
{
|
||||
sector_t sectors = bad->len;
|
||||
sector_t s = bad->start;
|
||||
u64 *p = bb->page;
|
||||
int merged = 0;
|
||||
|
||||
WARN_ON(s >= BB_OFFSET(p[behind]));
|
||||
WARN_ON((s + sectors) < BB_OFFSET(p[behind]));
|
||||
|
||||
if (s < BB_OFFSET(p[behind])) {
|
||||
merged = BB_OFFSET(p[behind]) - s;
|
||||
p[behind] = BB_MAKE(s, BB_LEN(p[behind]) + merged, bad->ack);
|
||||
|
||||
WARN_ON((BB_LEN(p[behind]) + merged) >= BB_MAX_LEN);
|
||||
}
|
||||
|
||||
return merged;
|
||||
}
|
||||
|
||||
/*
|
||||
* Return 'true' if the range indicated by 'bad' can be forward
|
||||
* merged with the bad range (from the bad table) indexed by 'prev'.
|
||||
|
@ -745,7 +700,7 @@ static bool can_front_overwrite(struct badblocks *bb, int prev,
|
|||
*extra = 2;
|
||||
}
|
||||
|
||||
if ((bb->count + (*extra)) >= MAX_BADBLOCKS)
|
||||
if ((bb->count + (*extra)) > MAX_BADBLOCKS)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
|
@ -855,40 +810,60 @@ static void badblocks_update_acked(struct badblocks *bb)
|
|||
bb->unacked_exist = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Return 'true' if the range indicated by 'bad' is exactly backward
|
||||
* overlapped with the bad range (from bad table) indexed by 'behind'.
|
||||
*/
|
||||
static bool try_adjacent_combine(struct badblocks *bb, int prev)
|
||||
{
|
||||
u64 *p = bb->page;
|
||||
|
||||
if (prev >= 0 && (prev + 1) < bb->count &&
|
||||
BB_END(p[prev]) == BB_OFFSET(p[prev + 1]) &&
|
||||
(BB_LEN(p[prev]) + BB_LEN(p[prev + 1])) <= BB_MAX_LEN &&
|
||||
BB_ACK(p[prev]) == BB_ACK(p[prev + 1])) {
|
||||
p[prev] = BB_MAKE(BB_OFFSET(p[prev]),
|
||||
BB_LEN(p[prev]) + BB_LEN(p[prev + 1]),
|
||||
BB_ACK(p[prev]));
|
||||
|
||||
if ((prev + 2) < bb->count)
|
||||
memmove(p + prev + 1, p + prev + 2,
|
||||
(bb->count - (prev + 2)) * 8);
|
||||
bb->count--;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Do exact work to set bad block range into the bad block table */
|
||||
static int _badblocks_set(struct badblocks *bb, sector_t s, int sectors,
|
||||
static bool _badblocks_set(struct badblocks *bb, sector_t s, sector_t sectors,
|
||||
int acknowledged)
|
||||
{
|
||||
int retried = 0, space_desired = 0;
|
||||
int orig_len, len = 0, added = 0;
|
||||
int len = 0, added = 0;
|
||||
struct badblocks_context bad;
|
||||
int prev = -1, hint = -1;
|
||||
sector_t orig_start;
|
||||
unsigned long flags;
|
||||
int rv = 0;
|
||||
u64 *p;
|
||||
|
||||
if (bb->shift < 0)
|
||||
/* badblocks are disabled */
|
||||
return 1;
|
||||
return false;
|
||||
|
||||
if (sectors == 0)
|
||||
/* Invalid sectors number */
|
||||
return 1;
|
||||
return false;
|
||||
|
||||
if (bb->shift) {
|
||||
/* round the start down, and the end up */
|
||||
sector_t next = s + sectors;
|
||||
|
||||
rounddown(s, bb->shift);
|
||||
roundup(next, bb->shift);
|
||||
rounddown(s, 1 << bb->shift);
|
||||
roundup(next, 1 << bb->shift);
|
||||
sectors = next - s;
|
||||
}
|
||||
|
||||
write_seqlock_irqsave(&bb->lock, flags);
|
||||
|
||||
orig_start = s;
|
||||
orig_len = sectors;
|
||||
bad.ack = acknowledged;
|
||||
p = bb->page;
|
||||
|
||||
|
@ -897,6 +872,9 @@ re_insert:
|
|||
bad.len = sectors;
|
||||
len = 0;
|
||||
|
||||
if (badblocks_full(bb))
|
||||
goto out;
|
||||
|
||||
if (badblocks_empty(bb)) {
|
||||
len = insert_at(bb, 0, &bad);
|
||||
bb->count++;
|
||||
|
@ -908,34 +886,16 @@ re_insert:
|
|||
|
||||
/* start before all badblocks */
|
||||
if (prev < 0) {
|
||||
if (!badblocks_full(bb)) {
|
||||
/* insert on the first */
|
||||
if (bad.len > (BB_OFFSET(p[0]) - bad.start))
|
||||
bad.len = BB_OFFSET(p[0]) - bad.start;
|
||||
len = insert_at(bb, 0, &bad);
|
||||
bb->count++;
|
||||
added++;
|
||||
hint = 0;
|
||||
hint = ++prev;
|
||||
goto update_sectors;
|
||||
}
|
||||
|
||||
/* No sapce, try to merge */
|
||||
if (overlap_behind(bb, &bad, 0)) {
|
||||
if (can_merge_behind(bb, &bad, 0)) {
|
||||
len = behind_merge(bb, &bad, 0);
|
||||
added++;
|
||||
} else {
|
||||
len = BB_OFFSET(p[0]) - s;
|
||||
space_desired = 1;
|
||||
}
|
||||
hint = 0;
|
||||
goto update_sectors;
|
||||
}
|
||||
|
||||
/* no table space and give up */
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* in case p[prev-1] can be merged with p[prev] */
|
||||
if (can_combine_front(bb, prev, &bad)) {
|
||||
front_combine(bb, prev);
|
||||
|
@ -945,14 +905,20 @@ re_insert:
|
|||
goto update_sectors;
|
||||
}
|
||||
|
||||
if (overlap_front(bb, prev, &bad)) {
|
||||
if (can_merge_front(bb, prev, &bad)) {
|
||||
len = front_merge(bb, prev, &bad);
|
||||
added++;
|
||||
} else {
|
||||
hint = prev;
|
||||
goto update_sectors;
|
||||
}
|
||||
|
||||
if (overlap_front(bb, prev, &bad)) {
|
||||
int extra = 0;
|
||||
|
||||
if (!can_front_overwrite(bb, prev, &bad, &extra)) {
|
||||
if (extra > 0)
|
||||
goto out;
|
||||
|
||||
len = min_t(sector_t,
|
||||
BB_END(p[prev]) - s, sectors);
|
||||
hint = prev;
|
||||
|
@ -967,36 +933,11 @@ re_insert:
|
|||
front_combine(bb, prev);
|
||||
bb->count--;
|
||||
}
|
||||
}
|
||||
|
||||
hint = prev;
|
||||
goto update_sectors;
|
||||
}
|
||||
|
||||
if (can_merge_front(bb, prev, &bad)) {
|
||||
len = front_merge(bb, prev, &bad);
|
||||
added++;
|
||||
hint = prev;
|
||||
goto update_sectors;
|
||||
}
|
||||
|
||||
/* if no space in table, still try to merge in the covered range */
|
||||
if (badblocks_full(bb)) {
|
||||
/* skip the cannot-merge range */
|
||||
if (((prev + 1) < bb->count) &&
|
||||
overlap_behind(bb, &bad, prev + 1) &&
|
||||
((s + sectors) >= BB_END(p[prev + 1]))) {
|
||||
len = BB_END(p[prev + 1]) - s;
|
||||
hint = prev + 1;
|
||||
goto update_sectors;
|
||||
}
|
||||
|
||||
/* no retry any more */
|
||||
len = sectors;
|
||||
space_desired = 1;
|
||||
hint = -1;
|
||||
goto update_sectors;
|
||||
}
|
||||
|
||||
/* cannot merge and there is space in bad table */
|
||||
if ((prev + 1) < bb->count &&
|
||||
overlap_behind(bb, &bad, prev + 1))
|
||||
|
@ -1006,7 +947,7 @@ re_insert:
|
|||
len = insert_at(bb, prev + 1, &bad);
|
||||
bb->count++;
|
||||
added++;
|
||||
hint = prev + 1;
|
||||
hint = ++prev;
|
||||
|
||||
update_sectors:
|
||||
s += len;
|
||||
|
@ -1015,35 +956,12 @@ update_sectors:
|
|||
if (sectors > 0)
|
||||
goto re_insert;
|
||||
|
||||
WARN_ON(sectors < 0);
|
||||
|
||||
/*
|
||||
* Check whether the following already set range can be
|
||||
* merged. (prev < 0) condition is not handled here,
|
||||
* because it's already complicated enough.
|
||||
*/
|
||||
if (prev >= 0 &&
|
||||
(prev + 1) < bb->count &&
|
||||
BB_END(p[prev]) == BB_OFFSET(p[prev + 1]) &&
|
||||
(BB_LEN(p[prev]) + BB_LEN(p[prev + 1])) <= BB_MAX_LEN &&
|
||||
BB_ACK(p[prev]) == BB_ACK(p[prev + 1])) {
|
||||
p[prev] = BB_MAKE(BB_OFFSET(p[prev]),
|
||||
BB_LEN(p[prev]) + BB_LEN(p[prev + 1]),
|
||||
BB_ACK(p[prev]));
|
||||
|
||||
if ((prev + 2) < bb->count)
|
||||
memmove(p + prev + 1, p + prev + 2,
|
||||
(bb->count - (prev + 2)) * 8);
|
||||
bb->count--;
|
||||
}
|
||||
|
||||
if (space_desired && !badblocks_full(bb)) {
|
||||
s = orig_start;
|
||||
sectors = orig_len;
|
||||
space_desired = 0;
|
||||
if (retried++ < 3)
|
||||
goto re_insert;
|
||||
}
|
||||
try_adjacent_combine(bb, prev);
|
||||
|
||||
out:
|
||||
if (added) {
|
||||
|
@ -1057,10 +975,7 @@ out:
|
|||
|
||||
write_sequnlock_irqrestore(&bb->lock, flags);
|
||||
|
||||
if (!added)
|
||||
rv = 1;
|
||||
|
||||
return rv;
|
||||
return sectors == 0;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -1131,21 +1046,20 @@ static int front_splitting_clear(struct badblocks *bb, int prev,
|
|||
}
|
||||
|
||||
/* Do the exact work to clear bad block range from the bad block table */
|
||||
static int _badblocks_clear(struct badblocks *bb, sector_t s, int sectors)
|
||||
static bool _badblocks_clear(struct badblocks *bb, sector_t s, sector_t sectors)
|
||||
{
|
||||
struct badblocks_context bad;
|
||||
int prev = -1, hint = -1;
|
||||
int len = 0, cleared = 0;
|
||||
int rv = 0;
|
||||
u64 *p;
|
||||
|
||||
if (bb->shift < 0)
|
||||
/* badblocks are disabled */
|
||||
return 1;
|
||||
return false;
|
||||
|
||||
if (sectors == 0)
|
||||
/* Invalid sectors number */
|
||||
return 1;
|
||||
return false;
|
||||
|
||||
if (bb->shift) {
|
||||
sector_t target;
|
||||
|
@ -1157,8 +1071,8 @@ static int _badblocks_clear(struct badblocks *bb, sector_t s, int sectors)
|
|||
* isn't than to think a block is not bad when it is.
|
||||
*/
|
||||
target = s + sectors;
|
||||
roundup(s, bb->shift);
|
||||
rounddown(target, bb->shift);
|
||||
roundup(s, 1 << bb->shift);
|
||||
rounddown(target, 1 << bb->shift);
|
||||
sectors = target - s;
|
||||
}
|
||||
|
||||
|
@ -1214,7 +1128,7 @@ re_clear:
|
|||
if ((BB_OFFSET(p[prev]) < bad.start) &&
|
||||
(BB_END(p[prev]) > (bad.start + bad.len))) {
|
||||
/* Splitting */
|
||||
if ((bb->count + 1) < MAX_BADBLOCKS) {
|
||||
if ((bb->count + 1) <= MAX_BADBLOCKS) {
|
||||
len = front_splitting_clear(bb, prev, &bad);
|
||||
bb->count += 1;
|
||||
cleared++;
|
||||
|
@ -1255,8 +1169,6 @@ update_sectors:
|
|||
if (sectors > 0)
|
||||
goto re_clear;
|
||||
|
||||
WARN_ON(sectors < 0);
|
||||
|
||||
if (cleared) {
|
||||
badblocks_update_acked(bb);
|
||||
set_changed(bb);
|
||||
|
@ -1265,40 +1177,21 @@ update_sectors:
|
|||
write_sequnlock_irq(&bb->lock);
|
||||
|
||||
if (!cleared)
|
||||
rv = 1;
|
||||
return false;
|
||||
|
||||
return rv;
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Do the exact work to check bad blocks range from the bad block table */
|
||||
static int _badblocks_check(struct badblocks *bb, sector_t s, int sectors,
|
||||
sector_t *first_bad, int *bad_sectors)
|
||||
static int _badblocks_check(struct badblocks *bb, sector_t s, sector_t sectors,
|
||||
sector_t *first_bad, sector_t *bad_sectors)
|
||||
{
|
||||
int unacked_badblocks, acked_badblocks;
|
||||
int prev = -1, hint = -1, set = 0;
|
||||
struct badblocks_context bad;
|
||||
unsigned int seq;
|
||||
int unacked_badblocks = 0;
|
||||
int acked_badblocks = 0;
|
||||
u64 *p = bb->page;
|
||||
int len, rv;
|
||||
u64 *p;
|
||||
|
||||
WARN_ON(bb->shift < 0 || sectors == 0);
|
||||
|
||||
if (bb->shift > 0) {
|
||||
sector_t target;
|
||||
|
||||
/* round the start down, and the end up */
|
||||
target = s + sectors;
|
||||
rounddown(s, bb->shift);
|
||||
roundup(target, bb->shift);
|
||||
sectors = target - s;
|
||||
}
|
||||
|
||||
retry:
|
||||
seq = read_seqbegin(&bb->lock);
|
||||
|
||||
p = bb->page;
|
||||
unacked_badblocks = 0;
|
||||
acked_badblocks = 0;
|
||||
|
||||
re_check:
|
||||
bad.start = s;
|
||||
|
@ -1349,14 +1242,15 @@ re_check:
|
|||
len = sectors;
|
||||
|
||||
update_sectors:
|
||||
/* This situation should never happen */
|
||||
WARN_ON(sectors < len);
|
||||
|
||||
s += len;
|
||||
sectors -= len;
|
||||
|
||||
if (sectors > 0)
|
||||
goto re_check;
|
||||
|
||||
WARN_ON(sectors < 0);
|
||||
|
||||
if (unacked_badblocks > 0)
|
||||
rv = -1;
|
||||
else if (acked_badblocks > 0)
|
||||
|
@ -1364,9 +1258,6 @@ update_sectors:
|
|||
else
|
||||
rv = 0;
|
||||
|
||||
if (read_seqretry(&bb->lock, seq))
|
||||
goto retry;
|
||||
|
||||
return rv;
|
||||
}
|
||||
|
||||
|
@ -1404,10 +1295,30 @@ update_sectors:
|
|||
* -1: there are bad blocks which have not yet been acknowledged in metadata.
|
||||
* plus the start/length of the first bad section we overlap.
|
||||
*/
|
||||
int badblocks_check(struct badblocks *bb, sector_t s, int sectors,
|
||||
sector_t *first_bad, int *bad_sectors)
|
||||
int badblocks_check(struct badblocks *bb, sector_t s, sector_t sectors,
|
||||
sector_t *first_bad, sector_t *bad_sectors)
|
||||
{
|
||||
return _badblocks_check(bb, s, sectors, first_bad, bad_sectors);
|
||||
unsigned int seq;
|
||||
int rv;
|
||||
|
||||
WARN_ON(bb->shift < 0 || sectors == 0);
|
||||
|
||||
if (bb->shift > 0) {
|
||||
/* round the start down, and the end up */
|
||||
sector_t target = s + sectors;
|
||||
|
||||
rounddown(s, 1 << bb->shift);
|
||||
roundup(target, 1 << bb->shift);
|
||||
sectors = target - s;
|
||||
}
|
||||
|
||||
retry:
|
||||
seq = read_seqbegin(&bb->lock);
|
||||
rv = _badblocks_check(bb, s, sectors, first_bad, bad_sectors);
|
||||
if (read_seqretry(&bb->lock, seq))
|
||||
goto retry;
|
||||
|
||||
return rv;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(badblocks_check);
|
||||
|
||||
|
@ -1423,10 +1334,11 @@ EXPORT_SYMBOL_GPL(badblocks_check);
|
|||
* decide how best to handle it.
|
||||
*
|
||||
* Return:
|
||||
* 0: success
|
||||
* 1: failed to set badblocks (out of space)
|
||||
* true: success
|
||||
* false: failed to set badblocks (out of space). Parital setting will be
|
||||
* treated as failure.
|
||||
*/
|
||||
int badblocks_set(struct badblocks *bb, sector_t s, int sectors,
|
||||
bool badblocks_set(struct badblocks *bb, sector_t s, sector_t sectors,
|
||||
int acknowledged)
|
||||
{
|
||||
return _badblocks_set(bb, s, sectors, acknowledged);
|
||||
|
@ -1444,10 +1356,10 @@ EXPORT_SYMBOL_GPL(badblocks_set);
|
|||
* drop the remove request.
|
||||
*
|
||||
* Return:
|
||||
* 0: success
|
||||
* 1: failed to clear badblocks
|
||||
* true: success
|
||||
* false: failed to clear badblocks
|
||||
*/
|
||||
int badblocks_clear(struct badblocks *bb, sector_t s, int sectors)
|
||||
bool badblocks_clear(struct badblocks *bb, sector_t s, sector_t sectors)
|
||||
{
|
||||
return _badblocks_clear(bb, s, sectors);
|
||||
}
|
||||
|
@ -1479,6 +1391,11 @@ void ack_all_badblocks(struct badblocks *bb)
|
|||
p[i] = BB_MAKE(start, len, 1);
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < bb->count ; i++)
|
||||
while (try_adjacent_combine(bb, i))
|
||||
;
|
||||
|
||||
bb->unacked_exist = 0;
|
||||
}
|
||||
write_sequnlock_irq(&bb->lock);
|
||||
|
@ -1564,9 +1481,9 @@ ssize_t badblocks_store(struct badblocks *bb, const char *page, size_t len,
|
|||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (badblocks_set(bb, sector, length, !unack))
|
||||
if (!badblocks_set(bb, sector, length, !unack))
|
||||
return -ENOSPC;
|
||||
else
|
||||
|
||||
return len;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(badblocks_store);
|
||||
|
|
191
block/bio-integrity-auto.c
Normal file
191
block/bio-integrity-auto.c
Normal file
|
@ -0,0 +1,191 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Copyright (C) 2007, 2008, 2009 Oracle Corporation
|
||||
* Written by: Martin K. Petersen <martin.petersen@oracle.com>
|
||||
*
|
||||
* Automatically generate and verify integrity data on PI capable devices if the
|
||||
* bio submitter didn't provide PI itself. This ensures that kernel verifies
|
||||
* data integrity even if the file system (or other user of the block device) is
|
||||
* not aware of PI.
|
||||
*/
|
||||
#include <linux/blk-integrity.h>
|
||||
#include <linux/workqueue.h>
|
||||
#include "blk.h"
|
||||
|
||||
struct bio_integrity_data {
|
||||
struct bio *bio;
|
||||
struct bvec_iter saved_bio_iter;
|
||||
struct work_struct work;
|
||||
struct bio_integrity_payload bip;
|
||||
struct bio_vec bvec;
|
||||
};
|
||||
|
||||
static struct kmem_cache *bid_slab;
|
||||
static mempool_t bid_pool;
|
||||
static struct workqueue_struct *kintegrityd_wq;
|
||||
|
||||
static void bio_integrity_finish(struct bio_integrity_data *bid)
|
||||
{
|
||||
bid->bio->bi_integrity = NULL;
|
||||
bid->bio->bi_opf &= ~REQ_INTEGRITY;
|
||||
kfree(bvec_virt(bid->bip.bip_vec));
|
||||
mempool_free(bid, &bid_pool);
|
||||
}
|
||||
|
||||
static void bio_integrity_verify_fn(struct work_struct *work)
|
||||
{
|
||||
struct bio_integrity_data *bid =
|
||||
container_of(work, struct bio_integrity_data, work);
|
||||
struct bio *bio = bid->bio;
|
||||
|
||||
blk_integrity_verify_iter(bio, &bid->saved_bio_iter);
|
||||
bio_integrity_finish(bid);
|
||||
bio_endio(bio);
|
||||
}
|
||||
|
||||
/**
|
||||
* __bio_integrity_endio - Integrity I/O completion function
|
||||
* @bio: Protected bio
|
||||
*
|
||||
* Normally I/O completion is done in interrupt context. However, verifying I/O
|
||||
* integrity is a time-consuming task which must be run in process context.
|
||||
*
|
||||
* This function postpones completion accordingly.
|
||||
*/
|
||||
bool __bio_integrity_endio(struct bio *bio)
|
||||
{
|
||||
struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk);
|
||||
struct bio_integrity_payload *bip = bio_integrity(bio);
|
||||
struct bio_integrity_data *bid =
|
||||
container_of(bip, struct bio_integrity_data, bip);
|
||||
|
||||
if (bio_op(bio) == REQ_OP_READ && !bio->bi_status && bi->csum_type) {
|
||||
INIT_WORK(&bid->work, bio_integrity_verify_fn);
|
||||
queue_work(kintegrityd_wq, &bid->work);
|
||||
return false;
|
||||
}
|
||||
|
||||
bio_integrity_finish(bid);
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* bio_integrity_prep - Prepare bio for integrity I/O
|
||||
* @bio: bio to prepare
|
||||
*
|
||||
* Checks if the bio already has an integrity payload attached. If it does, the
|
||||
* payload has been generated by another kernel subsystem, and we just pass it
|
||||
* through.
|
||||
* Otherwise allocates integrity payload and for writes the integrity metadata
|
||||
* will be generated. For reads, the completion handler will verify the
|
||||
* metadata.
|
||||
*/
|
||||
bool bio_integrity_prep(struct bio *bio)
|
||||
{
|
||||
struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk);
|
||||
struct bio_integrity_data *bid;
|
||||
gfp_t gfp = GFP_NOIO;
|
||||
unsigned int len;
|
||||
void *buf;
|
||||
|
||||
if (!bi)
|
||||
return true;
|
||||
|
||||
if (!bio_sectors(bio))
|
||||
return true;
|
||||
|
||||
/* Already protected? */
|
||||
if (bio_integrity(bio))
|
||||
return true;
|
||||
|
||||
switch (bio_op(bio)) {
|
||||
case REQ_OP_READ:
|
||||
if (bi->flags & BLK_INTEGRITY_NOVERIFY)
|
||||
return true;
|
||||
break;
|
||||
case REQ_OP_WRITE:
|
||||
if (bi->flags & BLK_INTEGRITY_NOGENERATE)
|
||||
return true;
|
||||
|
||||
/*
|
||||
* Zero the memory allocated to not leak uninitialized kernel
|
||||
* memory to disk for non-integrity metadata where nothing else
|
||||
* initializes the memory.
|
||||
*/
|
||||
if (bi->csum_type == BLK_INTEGRITY_CSUM_NONE)
|
||||
gfp |= __GFP_ZERO;
|
||||
break;
|
||||
default:
|
||||
return true;
|
||||
}
|
||||
|
||||
if (WARN_ON_ONCE(bio_has_crypt_ctx(bio)))
|
||||
return true;
|
||||
|
||||
/* Allocate kernel buffer for protection data */
|
||||
len = bio_integrity_bytes(bi, bio_sectors(bio));
|
||||
buf = kmalloc(len, gfp);
|
||||
if (!buf)
|
||||
goto err_end_io;
|
||||
bid = mempool_alloc(&bid_pool, GFP_NOIO);
|
||||
if (!bid)
|
||||
goto err_free_buf;
|
||||
bio_integrity_init(bio, &bid->bip, &bid->bvec, 1);
|
||||
|
||||
bid->bio = bio;
|
||||
|
||||
bid->bip.bip_flags |= BIP_BLOCK_INTEGRITY;
|
||||
bip_set_seed(&bid->bip, bio->bi_iter.bi_sector);
|
||||
|
||||
if (bi->csum_type == BLK_INTEGRITY_CSUM_IP)
|
||||
bid->bip.bip_flags |= BIP_IP_CHECKSUM;
|
||||
if (bi->csum_type)
|
||||
bid->bip.bip_flags |= BIP_CHECK_GUARD;
|
||||
if (bi->flags & BLK_INTEGRITY_REF_TAG)
|
||||
bid->bip.bip_flags |= BIP_CHECK_REFTAG;
|
||||
|
||||
if (bio_integrity_add_page(bio, virt_to_page(buf), len,
|
||||
offset_in_page(buf)) < len)
|
||||
goto err_end_io;
|
||||
|
||||
/* Auto-generate integrity metadata if this is a write */
|
||||
if (bio_data_dir(bio) == WRITE)
|
||||
blk_integrity_generate(bio);
|
||||
else
|
||||
bid->saved_bio_iter = bio->bi_iter;
|
||||
return true;
|
||||
|
||||
err_free_buf:
|
||||
kfree(buf);
|
||||
err_end_io:
|
||||
bio->bi_status = BLK_STS_RESOURCE;
|
||||
bio_endio(bio);
|
||||
return false;
|
||||
}
|
||||
EXPORT_SYMBOL(bio_integrity_prep);
|
||||
|
||||
void blk_flush_integrity(void)
|
||||
{
|
||||
flush_workqueue(kintegrityd_wq);
|
||||
}
|
||||
|
||||
static int __init blk_integrity_auto_init(void)
|
||||
{
|
||||
bid_slab = kmem_cache_create("bio_integrity_data",
|
||||
sizeof(struct bio_integrity_data), 0,
|
||||
SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
|
||||
|
||||
if (mempool_init_slab_pool(&bid_pool, BIO_POOL_SIZE, bid_slab))
|
||||
panic("bio: can't create integrity pool\n");
|
||||
|
||||
/*
|
||||
* kintegrityd won't block much but may burn a lot of CPU cycles.
|
||||
* Make it highpri CPU intensive wq with max concurrency of 1.
|
||||
*/
|
||||
kintegrityd_wq = alloc_workqueue("kintegrityd", WQ_MEM_RECLAIM |
|
||||
WQ_HIGHPRI | WQ_CPU_INTENSIVE, 1);
|
||||
if (!kintegrityd_wq)
|
||||
panic("Failed to create kintegrityd\n");
|
||||
return 0;
|
||||
}
|
||||
subsys_initcall(blk_integrity_auto_init);
|
|
@ -7,20 +7,12 @@
|
|||
*/
|
||||
|
||||
#include <linux/blk-integrity.h>
|
||||
#include <linux/mempool.h>
|
||||
#include <linux/export.h>
|
||||
#include <linux/bio.h>
|
||||
#include <linux/workqueue.h>
|
||||
#include <linux/slab.h>
|
||||
#include "blk.h"
|
||||
|
||||
static struct kmem_cache *bip_slab;
|
||||
static struct workqueue_struct *kintegrityd_wq;
|
||||
|
||||
void blk_flush_integrity(void)
|
||||
{
|
||||
flush_workqueue(kintegrityd_wq);
|
||||
}
|
||||
struct bio_integrity_alloc {
|
||||
struct bio_integrity_payload bip;
|
||||
struct bio_vec bvecs[];
|
||||
};
|
||||
|
||||
/**
|
||||
* bio_integrity_free - Free bio integrity payload
|
||||
|
@ -30,21 +22,23 @@ void blk_flush_integrity(void)
|
|||
*/
|
||||
void bio_integrity_free(struct bio *bio)
|
||||
{
|
||||
struct bio_integrity_payload *bip = bio_integrity(bio);
|
||||
struct bio_set *bs = bio->bi_pool;
|
||||
|
||||
if (bs && mempool_initialized(&bs->bio_integrity_pool)) {
|
||||
if (bip->bip_vec)
|
||||
bvec_free(&bs->bvec_integrity_pool, bip->bip_vec,
|
||||
bip->bip_max_vcnt);
|
||||
mempool_free(bip, &bs->bio_integrity_pool);
|
||||
} else {
|
||||
kfree(bip);
|
||||
}
|
||||
kfree(bio_integrity(bio));
|
||||
bio->bi_integrity = NULL;
|
||||
bio->bi_opf &= ~REQ_INTEGRITY;
|
||||
}
|
||||
|
||||
void bio_integrity_init(struct bio *bio, struct bio_integrity_payload *bip,
|
||||
struct bio_vec *bvecs, unsigned int nr_vecs)
|
||||
{
|
||||
memset(bip, 0, sizeof(*bip));
|
||||
bip->bip_max_vcnt = nr_vecs;
|
||||
if (nr_vecs)
|
||||
bip->bip_vec = bvecs;
|
||||
|
||||
bio->bi_integrity = bip;
|
||||
bio->bi_opf |= REQ_INTEGRITY;
|
||||
}
|
||||
|
||||
/**
|
||||
* bio_integrity_alloc - Allocate integrity payload and attach it to bio
|
||||
* @bio: bio to attach integrity metadata to
|
||||
|
@ -59,48 +53,16 @@ struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio,
|
|||
gfp_t gfp_mask,
|
||||
unsigned int nr_vecs)
|
||||
{
|
||||
struct bio_integrity_payload *bip;
|
||||
struct bio_set *bs = bio->bi_pool;
|
||||
unsigned inline_vecs;
|
||||
struct bio_integrity_alloc *bia;
|
||||
|
||||
if (WARN_ON_ONCE(bio_has_crypt_ctx(bio)))
|
||||
return ERR_PTR(-EOPNOTSUPP);
|
||||
|
||||
if (!bs || !mempool_initialized(&bs->bio_integrity_pool)) {
|
||||
bip = kmalloc(struct_size(bip, bip_inline_vecs, nr_vecs), gfp_mask);
|
||||
inline_vecs = nr_vecs;
|
||||
} else {
|
||||
bip = mempool_alloc(&bs->bio_integrity_pool, gfp_mask);
|
||||
inline_vecs = BIO_INLINE_VECS;
|
||||
}
|
||||
|
||||
if (unlikely(!bip))
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
memset(bip, 0, sizeof(*bip));
|
||||
|
||||
/* always report as many vecs as asked explicitly, not inline vecs */
|
||||
bip->bip_max_vcnt = nr_vecs;
|
||||
if (nr_vecs > inline_vecs) {
|
||||
bip->bip_vec = bvec_alloc(&bs->bvec_integrity_pool,
|
||||
&bip->bip_max_vcnt, gfp_mask);
|
||||
if (!bip->bip_vec)
|
||||
goto err;
|
||||
} else if (nr_vecs) {
|
||||
bip->bip_vec = bip->bip_inline_vecs;
|
||||
}
|
||||
|
||||
bip->bip_bio = bio;
|
||||
bio->bi_integrity = bip;
|
||||
bio->bi_opf |= REQ_INTEGRITY;
|
||||
|
||||
return bip;
|
||||
err:
|
||||
if (bs && mempool_initialized(&bs->bio_integrity_pool))
|
||||
mempool_free(bip, &bs->bio_integrity_pool);
|
||||
else
|
||||
kfree(bip);
|
||||
bia = kmalloc(struct_size(bia, bvecs, nr_vecs), gfp_mask);
|
||||
if (unlikely(!bia))
|
||||
return ERR_PTR(-ENOMEM);
|
||||
bio_integrity_init(bio, &bia->bip, bia->bvecs, nr_vecs);
|
||||
return &bia->bip;
|
||||
}
|
||||
EXPORT_SYMBOL(bio_integrity_alloc);
|
||||
|
||||
|
@ -413,149 +375,6 @@ int bio_integrity_map_iter(struct bio *bio, struct uio_meta *meta)
|
|||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* bio_integrity_prep - Prepare bio for integrity I/O
|
||||
* @bio: bio to prepare
|
||||
*
|
||||
* Description: Checks if the bio already has an integrity payload attached.
|
||||
* If it does, the payload has been generated by another kernel subsystem,
|
||||
* and we just pass it through. Otherwise allocates integrity payload.
|
||||
* The bio must have data direction, target device and start sector set priot
|
||||
* to calling. In the WRITE case, integrity metadata will be generated using
|
||||
* the block device's integrity function. In the READ case, the buffer
|
||||
* will be prepared for DMA and a suitable end_io handler set up.
|
||||
*/
|
||||
bool bio_integrity_prep(struct bio *bio)
|
||||
{
|
||||
struct bio_integrity_payload *bip;
|
||||
struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk);
|
||||
unsigned int len;
|
||||
void *buf;
|
||||
gfp_t gfp = GFP_NOIO;
|
||||
|
||||
if (!bi)
|
||||
return true;
|
||||
|
||||
if (!bio_sectors(bio))
|
||||
return true;
|
||||
|
||||
/* Already protected? */
|
||||
if (bio_integrity(bio))
|
||||
return true;
|
||||
|
||||
switch (bio_op(bio)) {
|
||||
case REQ_OP_READ:
|
||||
if (bi->flags & BLK_INTEGRITY_NOVERIFY)
|
||||
return true;
|
||||
break;
|
||||
case REQ_OP_WRITE:
|
||||
if (bi->flags & BLK_INTEGRITY_NOGENERATE)
|
||||
return true;
|
||||
|
||||
/*
|
||||
* Zero the memory allocated to not leak uninitialized kernel
|
||||
* memory to disk for non-integrity metadata where nothing else
|
||||
* initializes the memory.
|
||||
*/
|
||||
if (bi->csum_type == BLK_INTEGRITY_CSUM_NONE)
|
||||
gfp |= __GFP_ZERO;
|
||||
break;
|
||||
default:
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Allocate kernel buffer for protection data */
|
||||
len = bio_integrity_bytes(bi, bio_sectors(bio));
|
||||
buf = kmalloc(len, gfp);
|
||||
if (unlikely(buf == NULL)) {
|
||||
goto err_end_io;
|
||||
}
|
||||
|
||||
bip = bio_integrity_alloc(bio, GFP_NOIO, 1);
|
||||
if (IS_ERR(bip)) {
|
||||
kfree(buf);
|
||||
goto err_end_io;
|
||||
}
|
||||
|
||||
bip->bip_flags |= BIP_BLOCK_INTEGRITY;
|
||||
bip_set_seed(bip, bio->bi_iter.bi_sector);
|
||||
|
||||
if (bi->csum_type == BLK_INTEGRITY_CSUM_IP)
|
||||
bip->bip_flags |= BIP_IP_CHECKSUM;
|
||||
|
||||
/* describe what tags to check in payload */
|
||||
if (bi->csum_type)
|
||||
bip->bip_flags |= BIP_CHECK_GUARD;
|
||||
if (bi->flags & BLK_INTEGRITY_REF_TAG)
|
||||
bip->bip_flags |= BIP_CHECK_REFTAG;
|
||||
if (bio_integrity_add_page(bio, virt_to_page(buf), len,
|
||||
offset_in_page(buf)) < len) {
|
||||
printk(KERN_ERR "could not attach integrity payload\n");
|
||||
goto err_end_io;
|
||||
}
|
||||
|
||||
/* Auto-generate integrity metadata if this is a write */
|
||||
if (bio_data_dir(bio) == WRITE)
|
||||
blk_integrity_generate(bio);
|
||||
else
|
||||
bip->bio_iter = bio->bi_iter;
|
||||
return true;
|
||||
|
||||
err_end_io:
|
||||
bio->bi_status = BLK_STS_RESOURCE;
|
||||
bio_endio(bio);
|
||||
return false;
|
||||
}
|
||||
EXPORT_SYMBOL(bio_integrity_prep);
|
||||
|
||||
/**
|
||||
* bio_integrity_verify_fn - Integrity I/O completion worker
|
||||
* @work: Work struct stored in bio to be verified
|
||||
*
|
||||
* Description: This workqueue function is called to complete a READ
|
||||
* request. The function verifies the transferred integrity metadata
|
||||
* and then calls the original bio end_io function.
|
||||
*/
|
||||
static void bio_integrity_verify_fn(struct work_struct *work)
|
||||
{
|
||||
struct bio_integrity_payload *bip =
|
||||
container_of(work, struct bio_integrity_payload, bip_work);
|
||||
struct bio *bio = bip->bip_bio;
|
||||
|
||||
blk_integrity_verify(bio);
|
||||
|
||||
kfree(bvec_virt(bip->bip_vec));
|
||||
bio_integrity_free(bio);
|
||||
bio_endio(bio);
|
||||
}
|
||||
|
||||
/**
|
||||
* __bio_integrity_endio - Integrity I/O completion function
|
||||
* @bio: Protected bio
|
||||
*
|
||||
* Description: Completion for integrity I/O
|
||||
*
|
||||
* Normally I/O completion is done in interrupt context. However,
|
||||
* verifying I/O integrity is a time-consuming task which must be run
|
||||
* in process context. This function postpones completion
|
||||
* accordingly.
|
||||
*/
|
||||
bool __bio_integrity_endio(struct bio *bio)
|
||||
{
|
||||
struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk);
|
||||
struct bio_integrity_payload *bip = bio_integrity(bio);
|
||||
|
||||
if (bio_op(bio) == REQ_OP_READ && !bio->bi_status && bi->csum_type) {
|
||||
INIT_WORK(&bip->bip_work, bio_integrity_verify_fn);
|
||||
queue_work(kintegrityd_wq, &bip->bip_work);
|
||||
return false;
|
||||
}
|
||||
|
||||
kfree(bvec_virt(bip->bip_vec));
|
||||
bio_integrity_free(bio);
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* bio_integrity_advance - Advance integrity vector
|
||||
* @bio: bio whose integrity vector to update
|
||||
|
@ -617,44 +436,3 @@ int bio_integrity_clone(struct bio *bio, struct bio *bio_src,
|
|||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int bioset_integrity_create(struct bio_set *bs, int pool_size)
|
||||
{
|
||||
if (mempool_initialized(&bs->bio_integrity_pool))
|
||||
return 0;
|
||||
|
||||
if (mempool_init_slab_pool(&bs->bio_integrity_pool,
|
||||
pool_size, bip_slab))
|
||||
return -1;
|
||||
|
||||
if (biovec_init_pool(&bs->bvec_integrity_pool, pool_size)) {
|
||||
mempool_exit(&bs->bio_integrity_pool);
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(bioset_integrity_create);
|
||||
|
||||
void bioset_integrity_free(struct bio_set *bs)
|
||||
{
|
||||
mempool_exit(&bs->bio_integrity_pool);
|
||||
mempool_exit(&bs->bvec_integrity_pool);
|
||||
}
|
||||
|
||||
void __init bio_integrity_init(void)
|
||||
{
|
||||
/*
|
||||
* kintegrityd won't block much but may burn a lot of CPU cycles.
|
||||
* Make it highpri CPU intensive wq with max concurrency of 1.
|
||||
*/
|
||||
kintegrityd_wq = alloc_workqueue("kintegrityd", WQ_MEM_RECLAIM |
|
||||
WQ_HIGHPRI | WQ_CPU_INTENSIVE, 1);
|
||||
if (!kintegrityd_wq)
|
||||
panic("Failed to create kintegrityd\n");
|
||||
|
||||
bip_slab = kmem_cache_create("bio_integrity_payload",
|
||||
sizeof(struct bio_integrity_payload) +
|
||||
sizeof(struct bio_vec) * BIO_INLINE_VECS,
|
||||
0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
|
||||
}
|
||||
|
|
17
block/bio.c
17
block/bio.c
|
@ -1026,9 +1026,10 @@ EXPORT_SYMBOL(bio_add_page);
|
|||
void bio_add_folio_nofail(struct bio *bio, struct folio *folio, size_t len,
|
||||
size_t off)
|
||||
{
|
||||
unsigned long nr = off / PAGE_SIZE;
|
||||
|
||||
WARN_ON_ONCE(len > UINT_MAX);
|
||||
WARN_ON_ONCE(off > UINT_MAX);
|
||||
__bio_add_page(bio, &folio->page, len, off);
|
||||
__bio_add_page(bio, folio_page(folio, nr), len, off % PAGE_SIZE);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(bio_add_folio_nofail);
|
||||
|
||||
|
@ -1049,9 +1050,11 @@ EXPORT_SYMBOL_GPL(bio_add_folio_nofail);
|
|||
bool bio_add_folio(struct bio *bio, struct folio *folio, size_t len,
|
||||
size_t off)
|
||||
{
|
||||
if (len > UINT_MAX || off > UINT_MAX)
|
||||
unsigned long nr = off / PAGE_SIZE;
|
||||
|
||||
if (len > UINT_MAX)
|
||||
return false;
|
||||
return bio_add_page(bio, &folio->page, len, off) > 0;
|
||||
return bio_add_page(bio, folio_page(folio, nr), len, off % PAGE_SIZE) > 0;
|
||||
}
|
||||
EXPORT_SYMBOL(bio_add_folio);
|
||||
|
||||
|
@ -1657,7 +1660,6 @@ void bioset_exit(struct bio_set *bs)
|
|||
mempool_exit(&bs->bio_pool);
|
||||
mempool_exit(&bs->bvec_pool);
|
||||
|
||||
bioset_integrity_free(bs);
|
||||
if (bs->bio_slab)
|
||||
bio_put_slab(bs);
|
||||
bs->bio_slab = NULL;
|
||||
|
@ -1737,8 +1739,6 @@ static int __init init_bio(void)
|
|||
|
||||
BUILD_BUG_ON(BIO_FLAG_LAST > 8 * sizeof_field(struct bio, bi_flags));
|
||||
|
||||
bio_integrity_init();
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(bvec_slabs); i++) {
|
||||
struct biovec_slab *bvs = bvec_slabs + i;
|
||||
|
||||
|
@ -1754,9 +1754,6 @@ static int __init init_bio(void)
|
|||
BIOSET_NEED_BVECS | BIOSET_PERCPU_CACHE))
|
||||
panic("bio: can't allocate bios\n");
|
||||
|
||||
if (bioset_integrity_create(&fs_bio_set, BIO_POOL_SIZE))
|
||||
panic("bio: can't create integrity pool\n");
|
||||
|
||||
return 0;
|
||||
}
|
||||
subsys_initcall(init_bio);
|
||||
|
|
|
@ -816,6 +816,41 @@ int blkg_conf_open_bdev(struct blkg_conf_ctx *ctx)
|
|||
ctx->bdev = bdev;
|
||||
return 0;
|
||||
}
|
||||
/*
|
||||
* Similar to blkg_conf_open_bdev, but additionally freezes the queue,
|
||||
* acquires q->elevator_lock, and ensures the correct locking order
|
||||
* between q->elevator_lock and q->rq_qos_mutex.
|
||||
*
|
||||
* This function returns negative error on failure. On success it returns
|
||||
* memflags which must be saved and later passed to blkg_conf_exit_frozen
|
||||
* for restoring the memalloc scope.
|
||||
*/
|
||||
unsigned long __must_check blkg_conf_open_bdev_frozen(struct blkg_conf_ctx *ctx)
|
||||
{
|
||||
int ret;
|
||||
unsigned long memflags;
|
||||
|
||||
if (ctx->bdev)
|
||||
return -EINVAL;
|
||||
|
||||
ret = blkg_conf_open_bdev(ctx);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
/*
|
||||
* At this point, we haven’t started protecting anything related to QoS,
|
||||
* so we release q->rq_qos_mutex here, which was first acquired in blkg_
|
||||
* conf_open_bdev. Later, we re-acquire q->rq_qos_mutex after freezing
|
||||
* the queue and acquiring q->elevator_lock to maintain the correct
|
||||
* locking order.
|
||||
*/
|
||||
mutex_unlock(&ctx->bdev->bd_queue->rq_qos_mutex);
|
||||
|
||||
memflags = blk_mq_freeze_queue(ctx->bdev->bd_queue);
|
||||
mutex_lock(&ctx->bdev->bd_queue->elevator_lock);
|
||||
mutex_lock(&ctx->bdev->bd_queue->rq_qos_mutex);
|
||||
|
||||
return memflags;
|
||||
}
|
||||
|
||||
/**
|
||||
* blkg_conf_prep - parse and prepare for per-blkg config update
|
||||
|
@ -972,6 +1007,22 @@ void blkg_conf_exit(struct blkg_conf_ctx *ctx)
|
|||
}
|
||||
EXPORT_SYMBOL_GPL(blkg_conf_exit);
|
||||
|
||||
/*
|
||||
* Similar to blkg_conf_exit, but also unfreezes the queue and releases
|
||||
* q->elevator_lock. Should be used when blkg_conf_open_bdev_frozen
|
||||
* is used to open the bdev.
|
||||
*/
|
||||
void blkg_conf_exit_frozen(struct blkg_conf_ctx *ctx, unsigned long memflags)
|
||||
{
|
||||
if (ctx->bdev) {
|
||||
struct request_queue *q = ctx->bdev->bd_queue;
|
||||
|
||||
blkg_conf_exit(ctx);
|
||||
mutex_unlock(&q->elevator_lock);
|
||||
blk_mq_unfreeze_queue(q, memflags);
|
||||
}
|
||||
}
|
||||
|
||||
static void blkg_iostat_add(struct blkg_iostat *dst, struct blkg_iostat *src)
|
||||
{
|
||||
int i;
|
||||
|
@ -1728,26 +1779,26 @@ int blkcg_policy_register(struct blkcg_policy *pol)
|
|||
struct blkcg *blkcg;
|
||||
int i, ret;
|
||||
|
||||
mutex_lock(&blkcg_pol_register_mutex);
|
||||
mutex_lock(&blkcg_pol_mutex);
|
||||
|
||||
/* find an empty slot */
|
||||
ret = -ENOSPC;
|
||||
for (i = 0; i < BLKCG_MAX_POLS; i++)
|
||||
if (!blkcg_policy[i])
|
||||
break;
|
||||
if (i >= BLKCG_MAX_POLS) {
|
||||
pr_warn("blkcg_policy_register: BLKCG_MAX_POLS too small\n");
|
||||
goto err_unlock;
|
||||
}
|
||||
|
||||
/*
|
||||
* Make sure cpd/pd_alloc_fn and cpd/pd_free_fn in pairs, and policy
|
||||
* without pd_alloc_fn/pd_free_fn can't be activated.
|
||||
*/
|
||||
if ((!pol->cpd_alloc_fn ^ !pol->cpd_free_fn) ||
|
||||
(!pol->pd_alloc_fn ^ !pol->pd_free_fn))
|
||||
return -EINVAL;
|
||||
|
||||
mutex_lock(&blkcg_pol_register_mutex);
|
||||
mutex_lock(&blkcg_pol_mutex);
|
||||
|
||||
/* find an empty slot */
|
||||
for (i = 0; i < BLKCG_MAX_POLS; i++)
|
||||
if (!blkcg_policy[i])
|
||||
break;
|
||||
if (i >= BLKCG_MAX_POLS) {
|
||||
pr_warn("blkcg_policy_register: BLKCG_MAX_POLS too small\n");
|
||||
ret = -ENOSPC;
|
||||
goto err_unlock;
|
||||
}
|
||||
|
||||
/* register @pol */
|
||||
pol->plid = i;
|
||||
|
@ -1759,8 +1810,10 @@ int blkcg_policy_register(struct blkcg_policy *pol)
|
|||
struct blkcg_policy_data *cpd;
|
||||
|
||||
cpd = pol->cpd_alloc_fn(GFP_KERNEL);
|
||||
if (!cpd)
|
||||
if (!cpd) {
|
||||
ret = -ENOMEM;
|
||||
goto err_free_cpds;
|
||||
}
|
||||
|
||||
blkcg->cpd[pol->plid] = cpd;
|
||||
cpd->blkcg = blkcg;
|
||||
|
|
|
@ -219,9 +219,11 @@ struct blkg_conf_ctx {
|
|||
|
||||
void blkg_conf_init(struct blkg_conf_ctx *ctx, char *input);
|
||||
int blkg_conf_open_bdev(struct blkg_conf_ctx *ctx);
|
||||
unsigned long blkg_conf_open_bdev_frozen(struct blkg_conf_ctx *ctx);
|
||||
int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
|
||||
struct blkg_conf_ctx *ctx);
|
||||
void blkg_conf_exit(struct blkg_conf_ctx *ctx);
|
||||
void blkg_conf_exit_frozen(struct blkg_conf_ctx *ctx, unsigned long memflags);
|
||||
|
||||
/**
|
||||
* bio_issue_as_root_blkg - see if this bio needs to be issued as root blkg
|
||||
|
|
|
@ -429,6 +429,7 @@ struct request_queue *blk_alloc_queue(struct queue_limits *lim, int node_id)
|
|||
|
||||
refcount_set(&q->refs, 1);
|
||||
mutex_init(&q->debugfs_mutex);
|
||||
mutex_init(&q->elevator_lock);
|
||||
mutex_init(&q->sysfs_lock);
|
||||
mutex_init(&q->limits_lock);
|
||||
mutex_init(&q->rq_qos_mutex);
|
||||
|
@ -455,6 +456,12 @@ struct request_queue *blk_alloc_queue(struct queue_limits *lim, int node_id)
|
|||
lockdep_init_map(&q->q_lockdep_map, "&q->q_usage_counter(queue)",
|
||||
&q->q_lock_cls_key, 0);
|
||||
|
||||
/* Teach lockdep about lock ordering (reclaim WRT queue freeze lock). */
|
||||
fs_reclaim_acquire(GFP_KERNEL);
|
||||
rwsem_acquire_read(&q->io_lockdep_map, 0, 0, _RET_IP_);
|
||||
rwsem_release(&q->io_lockdep_map, _RET_IP_);
|
||||
fs_reclaim_release(GFP_KERNEL);
|
||||
|
||||
q->nr_requests = BLKDEV_DEFAULT_RQ;
|
||||
|
||||
return q;
|
||||
|
|
|
@ -87,7 +87,7 @@ static struct bio_set crypto_bio_split;
|
|||
* This is the key we set when evicting a keyslot. This *should* be the all 0's
|
||||
* key, but AES-XTS rejects that key, so we use some random bytes instead.
|
||||
*/
|
||||
static u8 blank_key[BLK_CRYPTO_MAX_KEY_SIZE];
|
||||
static u8 blank_key[BLK_CRYPTO_MAX_RAW_KEY_SIZE];
|
||||
|
||||
static void blk_crypto_fallback_evict_keyslot(unsigned int slot)
|
||||
{
|
||||
|
@ -119,7 +119,7 @@ blk_crypto_fallback_keyslot_program(struct blk_crypto_profile *profile,
|
|||
blk_crypto_fallback_evict_keyslot(slot);
|
||||
|
||||
slotp->crypto_mode = crypto_mode;
|
||||
err = crypto_skcipher_setkey(slotp->tfms[crypto_mode], key->raw,
|
||||
err = crypto_skcipher_setkey(slotp->tfms[crypto_mode], key->bytes,
|
||||
key->size);
|
||||
if (err) {
|
||||
blk_crypto_fallback_evict_keyslot(slot);
|
||||
|
@ -539,7 +539,7 @@ static int blk_crypto_fallback_init(void)
|
|||
if (blk_crypto_fallback_inited)
|
||||
return 0;
|
||||
|
||||
get_random_bytes(blank_key, BLK_CRYPTO_MAX_KEY_SIZE);
|
||||
get_random_bytes(blank_key, sizeof(blank_key));
|
||||
|
||||
err = bioset_init(&crypto_bio_split, 64, 0, 0);
|
||||
if (err)
|
||||
|
@ -561,6 +561,7 @@ static int blk_crypto_fallback_init(void)
|
|||
|
||||
blk_crypto_fallback_profile->ll_ops = blk_crypto_fallback_ll_ops;
|
||||
blk_crypto_fallback_profile->max_dun_bytes_supported = BLK_CRYPTO_MAX_IV_SIZE;
|
||||
blk_crypto_fallback_profile->key_types_supported = BLK_CRYPTO_KEY_TYPE_RAW;
|
||||
|
||||
/* All blk-crypto modes have a crypto API fallback. */
|
||||
for (i = 0; i < BLK_ENCRYPTION_MODE_MAX; i++)
|
||||
|
|
|
@ -14,6 +14,7 @@ struct blk_crypto_mode {
|
|||
const char *name; /* name of this mode, shown in sysfs */
|
||||
const char *cipher_str; /* crypto API name (for fallback case) */
|
||||
unsigned int keysize; /* key size in bytes */
|
||||
unsigned int security_strength; /* security strength in bytes */
|
||||
unsigned int ivsize; /* iv size in bytes */
|
||||
};
|
||||
|
||||
|
@ -82,6 +83,9 @@ int __blk_crypto_evict_key(struct blk_crypto_profile *profile,
|
|||
bool __blk_crypto_cfg_supported(struct blk_crypto_profile *profile,
|
||||
const struct blk_crypto_config *cfg);
|
||||
|
||||
int blk_crypto_ioctl(struct block_device *bdev, unsigned int cmd,
|
||||
void __user *argp);
|
||||
|
||||
#else /* CONFIG_BLK_INLINE_ENCRYPTION */
|
||||
|
||||
static inline int blk_crypto_sysfs_register(struct gendisk *disk)
|
||||
|
@ -129,6 +133,12 @@ static inline bool blk_crypto_rq_has_keyslot(struct request *rq)
|
|||
return false;
|
||||
}
|
||||
|
||||
static inline int blk_crypto_ioctl(struct block_device *bdev, unsigned int cmd,
|
||||
void __user *argp)
|
||||
{
|
||||
return -ENOTTY;
|
||||
}
|
||||
|
||||
#endif /* CONFIG_BLK_INLINE_ENCRYPTION */
|
||||
|
||||
void __bio_crypt_advance(struct bio *bio, unsigned int bytes);
|
||||
|
|
|
@ -352,6 +352,8 @@ bool __blk_crypto_cfg_supported(struct blk_crypto_profile *profile,
|
|||
return false;
|
||||
if (profile->max_dun_bytes_supported < cfg->dun_bytes)
|
||||
return false;
|
||||
if (!(profile->key_types_supported & cfg->key_type))
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -462,6 +464,99 @@ bool blk_crypto_register(struct blk_crypto_profile *profile,
|
|||
}
|
||||
EXPORT_SYMBOL_GPL(blk_crypto_register);
|
||||
|
||||
/**
|
||||
* blk_crypto_derive_sw_secret() - Derive software secret from wrapped key
|
||||
* @bdev: a block device that supports hardware-wrapped keys
|
||||
* @eph_key: a hardware-wrapped key in ephemerally-wrapped form
|
||||
* @eph_key_size: size of @eph_key in bytes
|
||||
* @sw_secret: (output) the software secret
|
||||
*
|
||||
* Given a hardware-wrapped key in ephemerally-wrapped form (the same form that
|
||||
* it is used for I/O), ask the hardware to derive the secret which software can
|
||||
* use for cryptographic tasks other than inline encryption. This secret is
|
||||
* guaranteed to be cryptographically isolated from the inline encryption key,
|
||||
* i.e. derived with a different KDF context.
|
||||
*
|
||||
* Return: 0 on success, -EOPNOTSUPP if the block device doesn't support
|
||||
* hardware-wrapped keys, -EBADMSG if the key isn't a valid
|
||||
* ephemerally-wrapped key, or another -errno code.
|
||||
*/
|
||||
int blk_crypto_derive_sw_secret(struct block_device *bdev,
|
||||
const u8 *eph_key, size_t eph_key_size,
|
||||
u8 sw_secret[BLK_CRYPTO_SW_SECRET_SIZE])
|
||||
{
|
||||
struct blk_crypto_profile *profile =
|
||||
bdev_get_queue(bdev)->crypto_profile;
|
||||
int err;
|
||||
|
||||
if (!profile)
|
||||
return -EOPNOTSUPP;
|
||||
if (!(profile->key_types_supported & BLK_CRYPTO_KEY_TYPE_HW_WRAPPED))
|
||||
return -EOPNOTSUPP;
|
||||
if (!profile->ll_ops.derive_sw_secret)
|
||||
return -EOPNOTSUPP;
|
||||
blk_crypto_hw_enter(profile);
|
||||
err = profile->ll_ops.derive_sw_secret(profile, eph_key, eph_key_size,
|
||||
sw_secret);
|
||||
blk_crypto_hw_exit(profile);
|
||||
return err;
|
||||
}
|
||||
|
||||
int blk_crypto_import_key(struct blk_crypto_profile *profile,
|
||||
const u8 *raw_key, size_t raw_key_size,
|
||||
u8 lt_key[BLK_CRYPTO_MAX_HW_WRAPPED_KEY_SIZE])
|
||||
{
|
||||
int ret;
|
||||
|
||||
if (!profile)
|
||||
return -EOPNOTSUPP;
|
||||
if (!(profile->key_types_supported & BLK_CRYPTO_KEY_TYPE_HW_WRAPPED))
|
||||
return -EOPNOTSUPP;
|
||||
if (!profile->ll_ops.import_key)
|
||||
return -EOPNOTSUPP;
|
||||
blk_crypto_hw_enter(profile);
|
||||
ret = profile->ll_ops.import_key(profile, raw_key, raw_key_size,
|
||||
lt_key);
|
||||
blk_crypto_hw_exit(profile);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int blk_crypto_generate_key(struct blk_crypto_profile *profile,
|
||||
u8 lt_key[BLK_CRYPTO_MAX_HW_WRAPPED_KEY_SIZE])
|
||||
{
|
||||
int ret;
|
||||
|
||||
if (!profile)
|
||||
return -EOPNOTSUPP;
|
||||
if (!(profile->key_types_supported & BLK_CRYPTO_KEY_TYPE_HW_WRAPPED))
|
||||
return -EOPNOTSUPP;
|
||||
if (!profile->ll_ops.generate_key)
|
||||
return -EOPNOTSUPP;
|
||||
blk_crypto_hw_enter(profile);
|
||||
ret = profile->ll_ops.generate_key(profile, lt_key);
|
||||
blk_crypto_hw_exit(profile);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int blk_crypto_prepare_key(struct blk_crypto_profile *profile,
|
||||
const u8 *lt_key, size_t lt_key_size,
|
||||
u8 eph_key[BLK_CRYPTO_MAX_HW_WRAPPED_KEY_SIZE])
|
||||
{
|
||||
int ret;
|
||||
|
||||
if (!profile)
|
||||
return -EOPNOTSUPP;
|
||||
if (!(profile->key_types_supported & BLK_CRYPTO_KEY_TYPE_HW_WRAPPED))
|
||||
return -EOPNOTSUPP;
|
||||
if (!profile->ll_ops.prepare_key)
|
||||
return -EOPNOTSUPP;
|
||||
blk_crypto_hw_enter(profile);
|
||||
ret = profile->ll_ops.prepare_key(profile, lt_key, lt_key_size,
|
||||
eph_key);
|
||||
blk_crypto_hw_exit(profile);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* blk_crypto_intersect_capabilities() - restrict supported crypto capabilities
|
||||
* by child device
|
||||
|
@ -485,10 +580,12 @@ void blk_crypto_intersect_capabilities(struct blk_crypto_profile *parent,
|
|||
child->max_dun_bytes_supported);
|
||||
for (i = 0; i < ARRAY_SIZE(child->modes_supported); i++)
|
||||
parent->modes_supported[i] &= child->modes_supported[i];
|
||||
parent->key_types_supported &= child->key_types_supported;
|
||||
} else {
|
||||
parent->max_dun_bytes_supported = 0;
|
||||
memset(parent->modes_supported, 0,
|
||||
sizeof(parent->modes_supported));
|
||||
parent->key_types_supported = 0;
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_crypto_intersect_capabilities);
|
||||
|
@ -521,6 +618,9 @@ bool blk_crypto_has_capabilities(const struct blk_crypto_profile *target,
|
|||
target->max_dun_bytes_supported)
|
||||
return false;
|
||||
|
||||
if (reference->key_types_supported & ~target->key_types_supported)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_crypto_has_capabilities);
|
||||
|
@ -555,5 +655,6 @@ void blk_crypto_update_capabilities(struct blk_crypto_profile *dst,
|
|||
sizeof(dst->modes_supported));
|
||||
|
||||
dst->max_dun_bytes_supported = src->max_dun_bytes_supported;
|
||||
dst->key_types_supported = src->key_types_supported;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_crypto_update_capabilities);
|
||||
|
|
|
@ -31,6 +31,13 @@ static struct blk_crypto_attr *attr_to_crypto_attr(struct attribute *attr)
|
|||
return container_of(attr, struct blk_crypto_attr, attr);
|
||||
}
|
||||
|
||||
static ssize_t hw_wrapped_keys_show(struct blk_crypto_profile *profile,
|
||||
struct blk_crypto_attr *attr, char *page)
|
||||
{
|
||||
/* Always show supported, since the file doesn't exist otherwise. */
|
||||
return sysfs_emit(page, "supported\n");
|
||||
}
|
||||
|
||||
static ssize_t max_dun_bits_show(struct blk_crypto_profile *profile,
|
||||
struct blk_crypto_attr *attr, char *page)
|
||||
{
|
||||
|
@ -43,20 +50,48 @@ static ssize_t num_keyslots_show(struct blk_crypto_profile *profile,
|
|||
return sysfs_emit(page, "%u\n", profile->num_slots);
|
||||
}
|
||||
|
||||
static ssize_t raw_keys_show(struct blk_crypto_profile *profile,
|
||||
struct blk_crypto_attr *attr, char *page)
|
||||
{
|
||||
/* Always show supported, since the file doesn't exist otherwise. */
|
||||
return sysfs_emit(page, "supported\n");
|
||||
}
|
||||
|
||||
#define BLK_CRYPTO_RO_ATTR(_name) \
|
||||
static struct blk_crypto_attr _name##_attr = __ATTR_RO(_name)
|
||||
|
||||
BLK_CRYPTO_RO_ATTR(hw_wrapped_keys);
|
||||
BLK_CRYPTO_RO_ATTR(max_dun_bits);
|
||||
BLK_CRYPTO_RO_ATTR(num_keyslots);
|
||||
BLK_CRYPTO_RO_ATTR(raw_keys);
|
||||
|
||||
static umode_t blk_crypto_is_visible(struct kobject *kobj,
|
||||
struct attribute *attr, int n)
|
||||
{
|
||||
struct blk_crypto_profile *profile = kobj_to_crypto_profile(kobj);
|
||||
struct blk_crypto_attr *a = attr_to_crypto_attr(attr);
|
||||
|
||||
if (a == &hw_wrapped_keys_attr &&
|
||||
!(profile->key_types_supported & BLK_CRYPTO_KEY_TYPE_HW_WRAPPED))
|
||||
return 0;
|
||||
if (a == &raw_keys_attr &&
|
||||
!(profile->key_types_supported & BLK_CRYPTO_KEY_TYPE_RAW))
|
||||
return 0;
|
||||
|
||||
return 0444;
|
||||
}
|
||||
|
||||
static struct attribute *blk_crypto_attrs[] = {
|
||||
&hw_wrapped_keys_attr.attr,
|
||||
&max_dun_bits_attr.attr,
|
||||
&num_keyslots_attr.attr,
|
||||
&raw_keys_attr.attr,
|
||||
NULL,
|
||||
};
|
||||
|
||||
static const struct attribute_group blk_crypto_attr_group = {
|
||||
.attrs = blk_crypto_attrs,
|
||||
.is_visible = blk_crypto_is_visible,
|
||||
};
|
||||
|
||||
/*
|
||||
|
|
|
@ -23,24 +23,28 @@ const struct blk_crypto_mode blk_crypto_modes[] = {
|
|||
.name = "AES-256-XTS",
|
||||
.cipher_str = "xts(aes)",
|
||||
.keysize = 64,
|
||||
.security_strength = 32,
|
||||
.ivsize = 16,
|
||||
},
|
||||
[BLK_ENCRYPTION_MODE_AES_128_CBC_ESSIV] = {
|
||||
.name = "AES-128-CBC-ESSIV",
|
||||
.cipher_str = "essiv(cbc(aes),sha256)",
|
||||
.keysize = 16,
|
||||
.security_strength = 16,
|
||||
.ivsize = 16,
|
||||
},
|
||||
[BLK_ENCRYPTION_MODE_ADIANTUM] = {
|
||||
.name = "Adiantum",
|
||||
.cipher_str = "adiantum(xchacha12,aes)",
|
||||
.keysize = 32,
|
||||
.security_strength = 32,
|
||||
.ivsize = 32,
|
||||
},
|
||||
[BLK_ENCRYPTION_MODE_SM4_XTS] = {
|
||||
.name = "SM4-XTS",
|
||||
.cipher_str = "xts(sm4)",
|
||||
.keysize = 32,
|
||||
.security_strength = 16,
|
||||
.ivsize = 16,
|
||||
},
|
||||
};
|
||||
|
@ -76,9 +80,15 @@ static int __init bio_crypt_ctx_init(void)
|
|||
/* This is assumed in various places. */
|
||||
BUILD_BUG_ON(BLK_ENCRYPTION_MODE_INVALID != 0);
|
||||
|
||||
/* Sanity check that no algorithm exceeds the defined limits. */
|
||||
/*
|
||||
* Validate the crypto mode properties. This ideally would be done with
|
||||
* static assertions, but boot-time checks are the next best thing.
|
||||
*/
|
||||
for (i = 0; i < BLK_ENCRYPTION_MODE_MAX; i++) {
|
||||
BUG_ON(blk_crypto_modes[i].keysize > BLK_CRYPTO_MAX_KEY_SIZE);
|
||||
BUG_ON(blk_crypto_modes[i].keysize >
|
||||
BLK_CRYPTO_MAX_RAW_KEY_SIZE);
|
||||
BUG_ON(blk_crypto_modes[i].security_strength >
|
||||
blk_crypto_modes[i].keysize);
|
||||
BUG_ON(blk_crypto_modes[i].ivsize > BLK_CRYPTO_MAX_IV_SIZE);
|
||||
}
|
||||
|
||||
|
@ -315,17 +325,20 @@ int __blk_crypto_rq_bio_prep(struct request *rq, struct bio *bio,
|
|||
/**
|
||||
* blk_crypto_init_key() - Prepare a key for use with blk-crypto
|
||||
* @blk_key: Pointer to the blk_crypto_key to initialize.
|
||||
* @raw_key: Pointer to the raw key. Must be the correct length for the chosen
|
||||
* @crypto_mode; see blk_crypto_modes[].
|
||||
* @key_bytes: the bytes of the key
|
||||
* @key_size: size of the key in bytes
|
||||
* @key_type: type of the key -- either raw or hardware-wrapped
|
||||
* @crypto_mode: identifier for the encryption algorithm to use
|
||||
* @dun_bytes: number of bytes that will be used to specify the DUN when this
|
||||
* key is used
|
||||
* @data_unit_size: the data unit size to use for en/decryption
|
||||
*
|
||||
* Return: 0 on success, -errno on failure. The caller is responsible for
|
||||
* zeroizing both blk_key and raw_key when done with them.
|
||||
* zeroizing both blk_key and key_bytes when done with them.
|
||||
*/
|
||||
int blk_crypto_init_key(struct blk_crypto_key *blk_key, const u8 *raw_key,
|
||||
int blk_crypto_init_key(struct blk_crypto_key *blk_key,
|
||||
const u8 *key_bytes, size_t key_size,
|
||||
enum blk_crypto_key_type key_type,
|
||||
enum blk_crypto_mode_num crypto_mode,
|
||||
unsigned int dun_bytes,
|
||||
unsigned int data_unit_size)
|
||||
|
@ -338,8 +351,19 @@ int blk_crypto_init_key(struct blk_crypto_key *blk_key, const u8 *raw_key,
|
|||
return -EINVAL;
|
||||
|
||||
mode = &blk_crypto_modes[crypto_mode];
|
||||
if (mode->keysize == 0)
|
||||
switch (key_type) {
|
||||
case BLK_CRYPTO_KEY_TYPE_RAW:
|
||||
if (key_size != mode->keysize)
|
||||
return -EINVAL;
|
||||
break;
|
||||
case BLK_CRYPTO_KEY_TYPE_HW_WRAPPED:
|
||||
if (key_size < mode->security_strength ||
|
||||
key_size > BLK_CRYPTO_MAX_HW_WRAPPED_KEY_SIZE)
|
||||
return -EINVAL;
|
||||
break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (dun_bytes == 0 || dun_bytes > mode->ivsize)
|
||||
return -EINVAL;
|
||||
|
@ -350,9 +374,10 @@ int blk_crypto_init_key(struct blk_crypto_key *blk_key, const u8 *raw_key,
|
|||
blk_key->crypto_cfg.crypto_mode = crypto_mode;
|
||||
blk_key->crypto_cfg.dun_bytes = dun_bytes;
|
||||
blk_key->crypto_cfg.data_unit_size = data_unit_size;
|
||||
blk_key->crypto_cfg.key_type = key_type;
|
||||
blk_key->data_unit_size_bits = ilog2(data_unit_size);
|
||||
blk_key->size = mode->keysize;
|
||||
memcpy(blk_key->raw, raw_key, mode->keysize);
|
||||
blk_key->size = key_size;
|
||||
memcpy(blk_key->bytes, key_bytes, key_size);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -372,8 +397,10 @@ bool blk_crypto_config_supported_natively(struct block_device *bdev,
|
|||
bool blk_crypto_config_supported(struct block_device *bdev,
|
||||
const struct blk_crypto_config *cfg)
|
||||
{
|
||||
return IS_ENABLED(CONFIG_BLK_INLINE_ENCRYPTION_FALLBACK) ||
|
||||
blk_crypto_config_supported_natively(bdev, cfg);
|
||||
if (IS_ENABLED(CONFIG_BLK_INLINE_ENCRYPTION_FALLBACK) &&
|
||||
cfg->key_type == BLK_CRYPTO_KEY_TYPE_RAW)
|
||||
return true;
|
||||
return blk_crypto_config_supported_natively(bdev, cfg);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -387,15 +414,21 @@ bool blk_crypto_config_supported(struct block_device *bdev,
|
|||
* an skcipher, and *should not* be called from the data path, since that might
|
||||
* cause a deadlock
|
||||
*
|
||||
* Return: 0 on success; -ENOPKG if the hardware doesn't support the key and
|
||||
* blk-crypto-fallback is either disabled or the needed algorithm
|
||||
* is disabled in the crypto API; or another -errno code.
|
||||
* Return: 0 on success; -EOPNOTSUPP if the key is wrapped but the hardware does
|
||||
* not support wrapped keys; -ENOPKG if the key is a raw key but the
|
||||
* hardware does not support raw keys and blk-crypto-fallback is either
|
||||
* disabled or the needed algorithm is disabled in the crypto API; or
|
||||
* another -errno code if something else went wrong.
|
||||
*/
|
||||
int blk_crypto_start_using_key(struct block_device *bdev,
|
||||
const struct blk_crypto_key *key)
|
||||
{
|
||||
if (blk_crypto_config_supported_natively(bdev, &key->crypto_cfg))
|
||||
return 0;
|
||||
if (key->crypto_cfg.key_type != BLK_CRYPTO_KEY_TYPE_RAW) {
|
||||
pr_warn_ratelimited("%pg: no support for wrapped keys\n", bdev);
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
return blk_crypto_fallback_start_using_mode(key->crypto_cfg.crypto_mode);
|
||||
}
|
||||
|
||||
|
@ -436,3 +469,146 @@ void blk_crypto_evict_key(struct block_device *bdev,
|
|||
pr_warn_ratelimited("%pg: error %d evicting key\n", bdev, err);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_crypto_evict_key);
|
||||
|
||||
static int blk_crypto_ioctl_import_key(struct blk_crypto_profile *profile,
|
||||
void __user *argp)
|
||||
{
|
||||
struct blk_crypto_import_key_arg arg;
|
||||
u8 raw_key[BLK_CRYPTO_MAX_RAW_KEY_SIZE];
|
||||
u8 lt_key[BLK_CRYPTO_MAX_HW_WRAPPED_KEY_SIZE];
|
||||
int ret;
|
||||
|
||||
if (copy_from_user(&arg, argp, sizeof(arg)))
|
||||
return -EFAULT;
|
||||
|
||||
if (memchr_inv(arg.reserved, 0, sizeof(arg.reserved)))
|
||||
return -EINVAL;
|
||||
|
||||
if (arg.raw_key_size < 16 || arg.raw_key_size > sizeof(raw_key))
|
||||
return -EINVAL;
|
||||
|
||||
if (copy_from_user(raw_key, u64_to_user_ptr(arg.raw_key_ptr),
|
||||
arg.raw_key_size)) {
|
||||
ret = -EFAULT;
|
||||
goto out;
|
||||
}
|
||||
ret = blk_crypto_import_key(profile, raw_key, arg.raw_key_size, lt_key);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
if (ret > arg.lt_key_size) {
|
||||
ret = -EOVERFLOW;
|
||||
goto out;
|
||||
}
|
||||
arg.lt_key_size = ret;
|
||||
if (copy_to_user(u64_to_user_ptr(arg.lt_key_ptr), lt_key,
|
||||
arg.lt_key_size) ||
|
||||
copy_to_user(argp, &arg, sizeof(arg))) {
|
||||
ret = -EFAULT;
|
||||
goto out;
|
||||
}
|
||||
ret = 0;
|
||||
|
||||
out:
|
||||
memzero_explicit(raw_key, sizeof(raw_key));
|
||||
memzero_explicit(lt_key, sizeof(lt_key));
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int blk_crypto_ioctl_generate_key(struct blk_crypto_profile *profile,
|
||||
void __user *argp)
|
||||
{
|
||||
struct blk_crypto_generate_key_arg arg;
|
||||
u8 lt_key[BLK_CRYPTO_MAX_HW_WRAPPED_KEY_SIZE];
|
||||
int ret;
|
||||
|
||||
if (copy_from_user(&arg, argp, sizeof(arg)))
|
||||
return -EFAULT;
|
||||
|
||||
if (memchr_inv(arg.reserved, 0, sizeof(arg.reserved)))
|
||||
return -EINVAL;
|
||||
|
||||
ret = blk_crypto_generate_key(profile, lt_key);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
if (ret > arg.lt_key_size) {
|
||||
ret = -EOVERFLOW;
|
||||
goto out;
|
||||
}
|
||||
arg.lt_key_size = ret;
|
||||
if (copy_to_user(u64_to_user_ptr(arg.lt_key_ptr), lt_key,
|
||||
arg.lt_key_size) ||
|
||||
copy_to_user(argp, &arg, sizeof(arg))) {
|
||||
ret = -EFAULT;
|
||||
goto out;
|
||||
}
|
||||
ret = 0;
|
||||
|
||||
out:
|
||||
memzero_explicit(lt_key, sizeof(lt_key));
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int blk_crypto_ioctl_prepare_key(struct blk_crypto_profile *profile,
|
||||
void __user *argp)
|
||||
{
|
||||
struct blk_crypto_prepare_key_arg arg;
|
||||
u8 lt_key[BLK_CRYPTO_MAX_HW_WRAPPED_KEY_SIZE];
|
||||
u8 eph_key[BLK_CRYPTO_MAX_HW_WRAPPED_KEY_SIZE];
|
||||
int ret;
|
||||
|
||||
if (copy_from_user(&arg, argp, sizeof(arg)))
|
||||
return -EFAULT;
|
||||
|
||||
if (memchr_inv(arg.reserved, 0, sizeof(arg.reserved)))
|
||||
return -EINVAL;
|
||||
|
||||
if (arg.lt_key_size > sizeof(lt_key))
|
||||
return -EINVAL;
|
||||
|
||||
if (copy_from_user(lt_key, u64_to_user_ptr(arg.lt_key_ptr),
|
||||
arg.lt_key_size)) {
|
||||
ret = -EFAULT;
|
||||
goto out;
|
||||
}
|
||||
ret = blk_crypto_prepare_key(profile, lt_key, arg.lt_key_size, eph_key);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
if (ret > arg.eph_key_size) {
|
||||
ret = -EOVERFLOW;
|
||||
goto out;
|
||||
}
|
||||
arg.eph_key_size = ret;
|
||||
if (copy_to_user(u64_to_user_ptr(arg.eph_key_ptr), eph_key,
|
||||
arg.eph_key_size) ||
|
||||
copy_to_user(argp, &arg, sizeof(arg))) {
|
||||
ret = -EFAULT;
|
||||
goto out;
|
||||
}
|
||||
ret = 0;
|
||||
|
||||
out:
|
||||
memzero_explicit(lt_key, sizeof(lt_key));
|
||||
memzero_explicit(eph_key, sizeof(eph_key));
|
||||
return ret;
|
||||
}
|
||||
|
||||
int blk_crypto_ioctl(struct block_device *bdev, unsigned int cmd,
|
||||
void __user *argp)
|
||||
{
|
||||
struct blk_crypto_profile *profile =
|
||||
bdev_get_queue(bdev)->crypto_profile;
|
||||
|
||||
if (!profile)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
switch (cmd) {
|
||||
case BLKCRYPTOIMPORTKEY:
|
||||
return blk_crypto_ioctl_import_key(profile, argp);
|
||||
case BLKCRYPTOGENERATEKEY:
|
||||
return blk_crypto_ioctl_generate_key(profile, argp);
|
||||
case BLKCRYPTOPREPAREKEY:
|
||||
return blk_crypto_ioctl_prepare_key(profile, argp);
|
||||
default:
|
||||
return -ENOTTY;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -95,9 +95,9 @@ static void blk_kick_flush(struct request_queue *q,
|
|||
struct blk_flush_queue *fq, blk_opf_t flags);
|
||||
|
||||
static inline struct blk_flush_queue *
|
||||
blk_get_flush_queue(struct request_queue *q, struct blk_mq_ctx *ctx)
|
||||
blk_get_flush_queue(struct blk_mq_ctx *ctx)
|
||||
{
|
||||
return blk_mq_map_queue(q, REQ_OP_FLUSH, ctx)->fq;
|
||||
return blk_mq_map_queue(REQ_OP_FLUSH, ctx)->fq;
|
||||
}
|
||||
|
||||
static unsigned int blk_flush_cur_seq(struct request *rq)
|
||||
|
@ -205,7 +205,7 @@ static enum rq_end_io_ret flush_end_io(struct request *flush_rq,
|
|||
struct list_head *running;
|
||||
struct request *rq, *n;
|
||||
unsigned long flags = 0;
|
||||
struct blk_flush_queue *fq = blk_get_flush_queue(q, flush_rq->mq_ctx);
|
||||
struct blk_flush_queue *fq = blk_get_flush_queue(flush_rq->mq_ctx);
|
||||
|
||||
/* release the tag's ownership to the req cloned from */
|
||||
spin_lock_irqsave(&fq->mq_flush_lock, flags);
|
||||
|
@ -341,7 +341,7 @@ static enum rq_end_io_ret mq_flush_data_end_io(struct request *rq,
|
|||
struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
|
||||
struct blk_mq_ctx *ctx = rq->mq_ctx;
|
||||
unsigned long flags;
|
||||
struct blk_flush_queue *fq = blk_get_flush_queue(q, ctx);
|
||||
struct blk_flush_queue *fq = blk_get_flush_queue(ctx);
|
||||
|
||||
if (q->elevator) {
|
||||
WARN_ON(rq->tag < 0);
|
||||
|
@ -382,7 +382,7 @@ static void blk_rq_init_flush(struct request *rq)
|
|||
bool blk_insert_flush(struct request *rq)
|
||||
{
|
||||
struct request_queue *q = rq->q;
|
||||
struct blk_flush_queue *fq = blk_get_flush_queue(q, rq->mq_ctx);
|
||||
struct blk_flush_queue *fq = blk_get_flush_queue(rq->mq_ctx);
|
||||
bool supports_fua = q->limits.features & BLK_FEAT_FUA;
|
||||
unsigned int policy = 0;
|
||||
|
||||
|
|
|
@ -2718,8 +2718,7 @@ retry_lock:
|
|||
* All waiters are on iocg->waitq and the wait states are
|
||||
* synchronized using waitq.lock.
|
||||
*/
|
||||
init_waitqueue_func_entry(&wait.wait, iocg_wake_fn);
|
||||
wait.wait.private = current;
|
||||
init_wait_func(&wait.wait, iocg_wake_fn);
|
||||
wait.bio = bio;
|
||||
wait.abs_cost = abs_cost;
|
||||
wait.committed = false; /* will be set true by waker */
|
||||
|
@ -3223,14 +3222,16 @@ static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input,
|
|||
u32 qos[NR_QOS_PARAMS];
|
||||
bool enable, user;
|
||||
char *body, *p;
|
||||
unsigned int memflags;
|
||||
unsigned long memflags;
|
||||
int ret;
|
||||
|
||||
blkg_conf_init(&ctx, input);
|
||||
|
||||
ret = blkg_conf_open_bdev(&ctx);
|
||||
if (ret)
|
||||
memflags = blkg_conf_open_bdev_frozen(&ctx);
|
||||
if (IS_ERR_VALUE(memflags)) {
|
||||
ret = memflags;
|
||||
goto err;
|
||||
}
|
||||
|
||||
body = ctx.body;
|
||||
disk = ctx.bdev->bd_disk;
|
||||
|
@ -3247,7 +3248,6 @@ static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input,
|
|||
ioc = q_to_ioc(disk->queue);
|
||||
}
|
||||
|
||||
memflags = blk_mq_freeze_queue(disk->queue);
|
||||
blk_mq_quiesce_queue(disk->queue);
|
||||
|
||||
spin_lock_irq(&ioc->lock);
|
||||
|
@ -3347,19 +3347,15 @@ static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input,
|
|||
wbt_enable_default(disk);
|
||||
|
||||
blk_mq_unquiesce_queue(disk->queue);
|
||||
blk_mq_unfreeze_queue(disk->queue, memflags);
|
||||
|
||||
blkg_conf_exit(&ctx);
|
||||
blkg_conf_exit_frozen(&ctx, memflags);
|
||||
return nbytes;
|
||||
einval:
|
||||
spin_unlock_irq(&ioc->lock);
|
||||
|
||||
blk_mq_unquiesce_queue(disk->queue);
|
||||
blk_mq_unfreeze_queue(disk->queue, memflags);
|
||||
|
||||
ret = -EINVAL;
|
||||
err:
|
||||
blkg_conf_exit(&ctx);
|
||||
blkg_conf_exit_frozen(&ctx, memflags);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
|
|
@ -551,8 +551,8 @@ static inline struct scatterlist *blk_next_sg(struct scatterlist **sg,
|
|||
* Map a request to scatterlist, return number of sg entries setup. Caller
|
||||
* must make sure sg can hold rq->nr_phys_segments entries.
|
||||
*/
|
||||
int __blk_rq_map_sg(struct request_queue *q, struct request *rq,
|
||||
struct scatterlist *sglist, struct scatterlist **last_sg)
|
||||
int __blk_rq_map_sg(struct request *rq, struct scatterlist *sglist,
|
||||
struct scatterlist **last_sg)
|
||||
{
|
||||
struct req_iterator iter = {
|
||||
.bio = rq->bio,
|
||||
|
|
|
@ -347,9 +347,14 @@ static int hctx_busy_show(void *data, struct seq_file *m)
|
|||
{
|
||||
struct blk_mq_hw_ctx *hctx = data;
|
||||
struct show_busy_params params = { .m = m, .hctx = hctx };
|
||||
int res;
|
||||
|
||||
res = mutex_lock_interruptible(&hctx->queue->elevator_lock);
|
||||
if (res)
|
||||
return res;
|
||||
blk_mq_tagset_busy_iter(hctx->queue->tag_set, hctx_show_busy_rq,
|
||||
¶ms);
|
||||
mutex_unlock(&hctx->queue->elevator_lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -400,15 +405,14 @@ static int hctx_tags_show(void *data, struct seq_file *m)
|
|||
struct request_queue *q = hctx->queue;
|
||||
int res;
|
||||
|
||||
res = mutex_lock_interruptible(&q->sysfs_lock);
|
||||
res = mutex_lock_interruptible(&q->elevator_lock);
|
||||
if (res)
|
||||
goto out;
|
||||
return res;
|
||||
if (hctx->tags)
|
||||
blk_mq_debugfs_tags_show(m, hctx->tags);
|
||||
mutex_unlock(&q->sysfs_lock);
|
||||
mutex_unlock(&q->elevator_lock);
|
||||
|
||||
out:
|
||||
return res;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int hctx_tags_bitmap_show(void *data, struct seq_file *m)
|
||||
|
@ -417,15 +421,14 @@ static int hctx_tags_bitmap_show(void *data, struct seq_file *m)
|
|||
struct request_queue *q = hctx->queue;
|
||||
int res;
|
||||
|
||||
res = mutex_lock_interruptible(&q->sysfs_lock);
|
||||
res = mutex_lock_interruptible(&q->elevator_lock);
|
||||
if (res)
|
||||
goto out;
|
||||
return res;
|
||||
if (hctx->tags)
|
||||
sbitmap_bitmap_show(&hctx->tags->bitmap_tags.sb, m);
|
||||
mutex_unlock(&q->sysfs_lock);
|
||||
mutex_unlock(&q->elevator_lock);
|
||||
|
||||
out:
|
||||
return res;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int hctx_sched_tags_show(void *data, struct seq_file *m)
|
||||
|
@ -434,15 +437,14 @@ static int hctx_sched_tags_show(void *data, struct seq_file *m)
|
|||
struct request_queue *q = hctx->queue;
|
||||
int res;
|
||||
|
||||
res = mutex_lock_interruptible(&q->sysfs_lock);
|
||||
res = mutex_lock_interruptible(&q->elevator_lock);
|
||||
if (res)
|
||||
goto out;
|
||||
return res;
|
||||
if (hctx->sched_tags)
|
||||
blk_mq_debugfs_tags_show(m, hctx->sched_tags);
|
||||
mutex_unlock(&q->sysfs_lock);
|
||||
mutex_unlock(&q->elevator_lock);
|
||||
|
||||
out:
|
||||
return res;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int hctx_sched_tags_bitmap_show(void *data, struct seq_file *m)
|
||||
|
@ -451,15 +453,14 @@ static int hctx_sched_tags_bitmap_show(void *data, struct seq_file *m)
|
|||
struct request_queue *q = hctx->queue;
|
||||
int res;
|
||||
|
||||
res = mutex_lock_interruptible(&q->sysfs_lock);
|
||||
res = mutex_lock_interruptible(&q->elevator_lock);
|
||||
if (res)
|
||||
goto out;
|
||||
return res;
|
||||
if (hctx->sched_tags)
|
||||
sbitmap_bitmap_show(&hctx->sched_tags->bitmap_tags.sb, m);
|
||||
mutex_unlock(&q->sysfs_lock);
|
||||
mutex_unlock(&q->elevator_lock);
|
||||
|
||||
out:
|
||||
return res;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int hctx_active_show(void *data, struct seq_file *m)
|
||||
|
|
|
@ -349,7 +349,7 @@ bool blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio,
|
|||
}
|
||||
|
||||
ctx = blk_mq_get_ctx(q);
|
||||
hctx = blk_mq_map_queue(q, bio->bi_opf, ctx);
|
||||
hctx = blk_mq_map_queue(bio->bi_opf, ctx);
|
||||
type = hctx->type;
|
||||
if (list_empty_careful(&ctx->rq_lists[type]))
|
||||
goto out_put;
|
||||
|
|
|
@ -61,9 +61,9 @@ static ssize_t blk_mq_hw_sysfs_show(struct kobject *kobj,
|
|||
if (!entry->show)
|
||||
return -EIO;
|
||||
|
||||
mutex_lock(&q->sysfs_lock);
|
||||
mutex_lock(&q->elevator_lock);
|
||||
res = entry->show(hctx, page);
|
||||
mutex_unlock(&q->sysfs_lock);
|
||||
mutex_unlock(&q->elevator_lock);
|
||||
return res;
|
||||
}
|
||||
|
||||
|
|
|
@ -190,8 +190,7 @@ unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data)
|
|||
sbitmap_finish_wait(bt, ws, &wait);
|
||||
|
||||
data->ctx = blk_mq_get_ctx(data->q);
|
||||
data->hctx = blk_mq_map_queue(data->q, data->cmd_flags,
|
||||
data->ctx);
|
||||
data->hctx = blk_mq_map_queue(data->cmd_flags, data->ctx);
|
||||
tags = blk_mq_tags_from_data(data);
|
||||
if (data->flags & BLK_MQ_REQ_RESERVED)
|
||||
bt = &tags->breserved_tags;
|
||||
|
|
|
@ -508,7 +508,7 @@ static struct request *__blk_mq_alloc_requests(struct blk_mq_alloc_data *data)
|
|||
|
||||
retry:
|
||||
data->ctx = blk_mq_get_ctx(q);
|
||||
data->hctx = blk_mq_map_queue(q, data->cmd_flags, data->ctx);
|
||||
data->hctx = blk_mq_map_queue(data->cmd_flags, data->ctx);
|
||||
|
||||
if (q->elevator) {
|
||||
/*
|
||||
|
@ -3314,6 +3314,7 @@ int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
|
|||
rq->special_vec = rq_src->special_vec;
|
||||
}
|
||||
rq->nr_phys_segments = rq_src->nr_phys_segments;
|
||||
rq->nr_integrity_segments = rq_src->nr_integrity_segments;
|
||||
|
||||
if (rq->bio && blk_crypto_rq_bio_prep(rq, rq->bio, gfp_mask) < 0)
|
||||
goto free_and_out;
|
||||
|
@ -4094,6 +4095,8 @@ static void blk_mq_map_swqueue(struct request_queue *q)
|
|||
struct blk_mq_ctx *ctx;
|
||||
struct blk_mq_tag_set *set = q->tag_set;
|
||||
|
||||
mutex_lock(&q->elevator_lock);
|
||||
|
||||
queue_for_each_hw_ctx(q, hctx, i) {
|
||||
cpumask_clear(hctx->cpumask);
|
||||
hctx->nr_ctx = 0;
|
||||
|
@ -4198,6 +4201,8 @@ static void blk_mq_map_swqueue(struct request_queue *q)
|
|||
hctx->next_cpu = blk_mq_first_mapped_cpu(hctx);
|
||||
hctx->next_cpu_batch = BLK_MQ_CPU_WORK_BATCH;
|
||||
}
|
||||
|
||||
mutex_unlock(&q->elevator_lock);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -4467,7 +4472,7 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
|
|||
unsigned long i, j;
|
||||
|
||||
/* protect against switching io scheduler */
|
||||
mutex_lock(&q->sysfs_lock);
|
||||
mutex_lock(&q->elevator_lock);
|
||||
for (i = 0; i < set->nr_hw_queues; i++) {
|
||||
int old_node;
|
||||
int node = blk_mq_get_hctx_node(set, i);
|
||||
|
@ -4500,7 +4505,7 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
|
|||
|
||||
xa_for_each_start(&q->hctx_table, j, hctx, j)
|
||||
blk_mq_exit_hctx(q, set, hctx, j);
|
||||
mutex_unlock(&q->sysfs_lock);
|
||||
mutex_unlock(&q->elevator_lock);
|
||||
|
||||
/* unregister cpuhp callbacks for exited hctxs */
|
||||
blk_mq_remove_hw_queues_cpuhp(q);
|
||||
|
@ -4933,10 +4938,9 @@ static bool blk_mq_elv_switch_none(struct list_head *head,
|
|||
if (!qe)
|
||||
return false;
|
||||
|
||||
/* q->elevator needs protection from ->sysfs_lock */
|
||||
mutex_lock(&q->sysfs_lock);
|
||||
/* Accessing q->elevator needs protection from ->elevator_lock. */
|
||||
mutex_lock(&q->elevator_lock);
|
||||
|
||||
/* the check has to be done with holding sysfs_lock */
|
||||
if (!q->elevator) {
|
||||
kfree(qe);
|
||||
goto unlock;
|
||||
|
@ -4950,7 +4954,7 @@ static bool blk_mq_elv_switch_none(struct list_head *head,
|
|||
list_add(&qe->node, head);
|
||||
elevator_disable(q);
|
||||
unlock:
|
||||
mutex_unlock(&q->sysfs_lock);
|
||||
mutex_unlock(&q->elevator_lock);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
@ -4980,11 +4984,11 @@ static void blk_mq_elv_switch_back(struct list_head *head,
|
|||
list_del(&qe->node);
|
||||
kfree(qe);
|
||||
|
||||
mutex_lock(&q->sysfs_lock);
|
||||
mutex_lock(&q->elevator_lock);
|
||||
elevator_switch(q, t);
|
||||
/* drop the reference acquired in blk_mq_elv_switch_none */
|
||||
elevator_put(t);
|
||||
mutex_unlock(&q->sysfs_lock);
|
||||
mutex_unlock(&q->elevator_lock);
|
||||
}
|
||||
|
||||
static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
|
||||
|
|
|
@ -100,12 +100,10 @@ static inline enum hctx_type blk_mq_get_hctx_type(blk_opf_t opf)
|
|||
|
||||
/*
|
||||
* blk_mq_map_queue() - map (cmd_flags,type) to hardware queue
|
||||
* @q: request queue
|
||||
* @opf: operation type (REQ_OP_*) and flags (e.g. REQ_POLLED).
|
||||
* @ctx: software queue cpu ctx
|
||||
*/
|
||||
static inline struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *q,
|
||||
blk_opf_t opf,
|
||||
static inline struct blk_mq_hw_ctx *blk_mq_map_queue(blk_opf_t opf,
|
||||
struct blk_mq_ctx *ctx)
|
||||
{
|
||||
return ctx->hctxs[blk_mq_get_hctx_type(opf)];
|
||||
|
|
|
@ -196,7 +196,6 @@ bool rq_depth_scale_down(struct rq_depth *rqd, bool hard_throttle)
|
|||
|
||||
struct rq_qos_wait_data {
|
||||
struct wait_queue_entry wq;
|
||||
struct task_struct *task;
|
||||
struct rq_wait *rqw;
|
||||
acquire_inflight_cb_t *cb;
|
||||
void *private_data;
|
||||
|
@ -218,7 +217,20 @@ static int rq_qos_wake_function(struct wait_queue_entry *curr,
|
|||
return -1;
|
||||
|
||||
data->got_token = true;
|
||||
wake_up_process(data->task);
|
||||
/*
|
||||
* autoremove_wake_function() removes the wait entry only when it
|
||||
* actually changed the task state. We want the wait always removed.
|
||||
* Remove explicitly and use default_wake_function().
|
||||
*/
|
||||
default_wake_function(curr, mode, wake_flags, key);
|
||||
/*
|
||||
* Note that the order of operations is important as finish_wait()
|
||||
* tests whether @curr is removed without grabbing the lock. This
|
||||
* should be the last thing to do to make sure we will not have a
|
||||
* UAF access to @data. And the semantics of memory barrier in it
|
||||
* also make sure the waiter will see the latest @data->got_token
|
||||
* once list_empty_careful() in finish_wait() returns true.
|
||||
*/
|
||||
list_del_init_careful(&curr->entry);
|
||||
return 1;
|
||||
}
|
||||
|
@ -244,41 +256,55 @@ void rq_qos_wait(struct rq_wait *rqw, void *private_data,
|
|||
cleanup_cb_t *cleanup_cb)
|
||||
{
|
||||
struct rq_qos_wait_data data = {
|
||||
.wq = {
|
||||
.func = rq_qos_wake_function,
|
||||
.entry = LIST_HEAD_INIT(data.wq.entry),
|
||||
},
|
||||
.task = current,
|
||||
.rqw = rqw,
|
||||
.cb = acquire_inflight_cb,
|
||||
.private_data = private_data,
|
||||
.got_token = false,
|
||||
};
|
||||
bool has_sleeper;
|
||||
bool first_waiter;
|
||||
|
||||
has_sleeper = wq_has_sleeper(&rqw->wait);
|
||||
if (!has_sleeper && acquire_inflight_cb(rqw, private_data))
|
||||
/*
|
||||
* If there are no waiters in the waiting queue, try to increase the
|
||||
* inflight counter if we can. Otherwise, prepare for adding ourselves
|
||||
* to the waiting queue.
|
||||
*/
|
||||
if (!waitqueue_active(&rqw->wait) && acquire_inflight_cb(rqw, private_data))
|
||||
return;
|
||||
|
||||
has_sleeper = !prepare_to_wait_exclusive(&rqw->wait, &data.wq,
|
||||
init_wait_func(&data.wq, rq_qos_wake_function);
|
||||
first_waiter = prepare_to_wait_exclusive(&rqw->wait, &data.wq,
|
||||
TASK_UNINTERRUPTIBLE);
|
||||
do {
|
||||
/* The memory barrier in set_current_state saves us here. */
|
||||
if (data.got_token)
|
||||
break;
|
||||
if (!has_sleeper && acquire_inflight_cb(rqw, private_data)) {
|
||||
/*
|
||||
* Make sure there is at least one inflight process; otherwise, waiters
|
||||
* will never be woken up. Since there may be no inflight process before
|
||||
* adding ourselves to the waiting queue above, we need to try to
|
||||
* increase the inflight counter for ourselves. And it is sufficient to
|
||||
* guarantee that at least the first waiter to enter the waiting queue
|
||||
* will re-check the waiting condition before going to sleep, thus
|
||||
* ensuring forward progress.
|
||||
*/
|
||||
if (!data.got_token && first_waiter && acquire_inflight_cb(rqw, private_data)) {
|
||||
finish_wait(&rqw->wait, &data.wq);
|
||||
|
||||
/*
|
||||
* We raced with rq_qos_wake_function() getting a token,
|
||||
* which means we now have two. Put our local token
|
||||
* and wake anyone else potentially waiting for one.
|
||||
*
|
||||
* Enough memory barrier in list_empty_careful() in
|
||||
* finish_wait() is paired with list_del_init_careful()
|
||||
* in rq_qos_wake_function() to make sure we will see
|
||||
* the latest @data->got_token.
|
||||
*/
|
||||
if (data.got_token)
|
||||
cleanup_cb(rqw, private_data);
|
||||
return;
|
||||
}
|
||||
|
||||
/* we are now relying on the waker to increase our inflight counter. */
|
||||
do {
|
||||
if (data.got_token)
|
||||
break;
|
||||
io_schedule();
|
||||
has_sleeper = true;
|
||||
set_current_state(TASK_UNINTERRUPTIBLE);
|
||||
} while (1);
|
||||
finish_wait(&rqw->wait, &data.wq);
|
||||
|
|
|
@ -21,7 +21,7 @@
|
|||
|
||||
void blk_queue_rq_timeout(struct request_queue *q, unsigned int timeout)
|
||||
{
|
||||
q->rq_timeout = timeout;
|
||||
WRITE_ONCE(q->rq_timeout, timeout);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_queue_rq_timeout);
|
||||
|
||||
|
@ -114,9 +114,15 @@ static int blk_validate_integrity_limits(struct queue_limits *lim)
|
|||
pr_warn("invalid PI settings.\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
bi->flags |= BLK_INTEGRITY_NOGENERATE | BLK_INTEGRITY_NOVERIFY;
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (lim->features & BLK_FEAT_BOUNCE_HIGH) {
|
||||
pr_warn("no bounce buffer support for integrity metadata\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (!IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY)) {
|
||||
pr_warn("integrity support disabled.\n");
|
||||
return -EINVAL;
|
||||
|
@ -867,22 +873,7 @@ bool queue_limits_stack_integrity(struct queue_limits *t,
|
|||
if (!IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY))
|
||||
return true;
|
||||
|
||||
if (!ti->tuple_size) {
|
||||
/* inherit the settings from the first underlying device */
|
||||
if (!(ti->flags & BLK_INTEGRITY_STACKED)) {
|
||||
ti->flags = BLK_INTEGRITY_DEVICE_CAPABLE |
|
||||
(bi->flags & BLK_INTEGRITY_REF_TAG);
|
||||
ti->csum_type = bi->csum_type;
|
||||
ti->tuple_size = bi->tuple_size;
|
||||
ti->pi_offset = bi->pi_offset;
|
||||
ti->interval_exp = bi->interval_exp;
|
||||
ti->tag_size = bi->tag_size;
|
||||
goto done;
|
||||
}
|
||||
if (!bi->tuple_size)
|
||||
goto done;
|
||||
}
|
||||
|
||||
if (ti->flags & BLK_INTEGRITY_STACKED) {
|
||||
if (ti->tuple_size != bi->tuple_size)
|
||||
goto incompatible;
|
||||
if (ti->interval_exp != bi->interval_exp)
|
||||
|
@ -894,9 +885,16 @@ bool queue_limits_stack_integrity(struct queue_limits *t,
|
|||
if ((ti->flags & BLK_INTEGRITY_REF_TAG) !=
|
||||
(bi->flags & BLK_INTEGRITY_REF_TAG))
|
||||
goto incompatible;
|
||||
|
||||
done:
|
||||
ti->flags |= BLK_INTEGRITY_STACKED;
|
||||
} else {
|
||||
ti->flags = BLK_INTEGRITY_STACKED;
|
||||
ti->flags |= (bi->flags & BLK_INTEGRITY_DEVICE_CAPABLE) |
|
||||
(bi->flags & BLK_INTEGRITY_REF_TAG);
|
||||
ti->csum_type = bi->csum_type;
|
||||
ti->tuple_size = bi->tuple_size;
|
||||
ti->pi_offset = bi->pi_offset;
|
||||
ti->interval_exp = bi->interval_exp;
|
||||
ti->tag_size = bi->tag_size;
|
||||
}
|
||||
return true;
|
||||
|
||||
incompatible:
|
||||
|
|
|
@ -23,10 +23,11 @@
|
|||
struct queue_sysfs_entry {
|
||||
struct attribute attr;
|
||||
ssize_t (*show)(struct gendisk *disk, char *page);
|
||||
ssize_t (*show_limit)(struct gendisk *disk, char *page);
|
||||
|
||||
ssize_t (*store)(struct gendisk *disk, const char *page, size_t count);
|
||||
int (*store_limit)(struct gendisk *disk, const char *page,
|
||||
size_t count, struct queue_limits *lim);
|
||||
void (*load_module)(struct gendisk *disk, const char *page, size_t count);
|
||||
};
|
||||
|
||||
static ssize_t
|
||||
|
@ -52,7 +53,12 @@ queue_var_store(unsigned long *var, const char *page, size_t count)
|
|||
|
||||
static ssize_t queue_requests_show(struct gendisk *disk, char *page)
|
||||
{
|
||||
return queue_var_show(disk->queue->nr_requests, page);
|
||||
ssize_t ret;
|
||||
|
||||
mutex_lock(&disk->queue->elevator_lock);
|
||||
ret = queue_var_show(disk->queue->nr_requests, page);
|
||||
mutex_unlock(&disk->queue->elevator_lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static ssize_t
|
||||
|
@ -60,27 +66,38 @@ queue_requests_store(struct gendisk *disk, const char *page, size_t count)
|
|||
{
|
||||
unsigned long nr;
|
||||
int ret, err;
|
||||
unsigned int memflags;
|
||||
struct request_queue *q = disk->queue;
|
||||
|
||||
if (!queue_is_mq(disk->queue))
|
||||
if (!queue_is_mq(q))
|
||||
return -EINVAL;
|
||||
|
||||
ret = queue_var_store(&nr, page, count);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
memflags = blk_mq_freeze_queue(q);
|
||||
mutex_lock(&q->elevator_lock);
|
||||
if (nr < BLKDEV_MIN_RQ)
|
||||
nr = BLKDEV_MIN_RQ;
|
||||
|
||||
err = blk_mq_update_nr_requests(disk->queue, nr);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
ret = err;
|
||||
mutex_unlock(&q->elevator_lock);
|
||||
blk_mq_unfreeze_queue(q, memflags);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static ssize_t queue_ra_show(struct gendisk *disk, char *page)
|
||||
{
|
||||
return queue_var_show(disk->bdi->ra_pages << (PAGE_SHIFT - 10), page);
|
||||
ssize_t ret;
|
||||
|
||||
mutex_lock(&disk->queue->limits_lock);
|
||||
ret = queue_var_show(disk->bdi->ra_pages << (PAGE_SHIFT - 10), page);
|
||||
mutex_unlock(&disk->queue->limits_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static ssize_t
|
||||
|
@ -88,11 +105,22 @@ queue_ra_store(struct gendisk *disk, const char *page, size_t count)
|
|||
{
|
||||
unsigned long ra_kb;
|
||||
ssize_t ret;
|
||||
unsigned int memflags;
|
||||
struct request_queue *q = disk->queue;
|
||||
|
||||
ret = queue_var_store(&ra_kb, page, count);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
/*
|
||||
* ->ra_pages is protected by ->limits_lock because it is usually
|
||||
* calculated from the queue limits by queue_limits_commit_update.
|
||||
*/
|
||||
mutex_lock(&q->limits_lock);
|
||||
memflags = blk_mq_freeze_queue(q);
|
||||
disk->bdi->ra_pages = ra_kb >> (PAGE_SHIFT - 10);
|
||||
mutex_unlock(&q->limits_lock);
|
||||
blk_mq_unfreeze_queue(q, memflags);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -238,6 +266,7 @@ static ssize_t queue_poll_show(struct gendisk *disk, char *page)
|
|||
{
|
||||
if (queue_is_mq(disk->queue))
|
||||
return sysfs_emit(page, "%u\n", blk_mq_can_poll(disk->queue));
|
||||
|
||||
return sysfs_emit(page, "%u\n",
|
||||
!!(disk->queue->limits.features & BLK_FEAT_POLL));
|
||||
}
|
||||
|
@ -286,17 +315,21 @@ static ssize_t queue_nomerges_store(struct gendisk *disk, const char *page,
|
|||
size_t count)
|
||||
{
|
||||
unsigned long nm;
|
||||
unsigned int memflags;
|
||||
struct request_queue *q = disk->queue;
|
||||
ssize_t ret = queue_var_store(&nm, page, count);
|
||||
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
blk_queue_flag_clear(QUEUE_FLAG_NOMERGES, disk->queue);
|
||||
blk_queue_flag_clear(QUEUE_FLAG_NOXMERGES, disk->queue);
|
||||
memflags = blk_mq_freeze_queue(q);
|
||||
blk_queue_flag_clear(QUEUE_FLAG_NOMERGES, q);
|
||||
blk_queue_flag_clear(QUEUE_FLAG_NOXMERGES, q);
|
||||
if (nm == 2)
|
||||
blk_queue_flag_set(QUEUE_FLAG_NOMERGES, disk->queue);
|
||||
blk_queue_flag_set(QUEUE_FLAG_NOMERGES, q);
|
||||
else if (nm)
|
||||
blk_queue_flag_set(QUEUE_FLAG_NOXMERGES, disk->queue);
|
||||
blk_queue_flag_set(QUEUE_FLAG_NOXMERGES, q);
|
||||
blk_mq_unfreeze_queue(q, memflags);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
@ -316,11 +349,19 @@ queue_rq_affinity_store(struct gendisk *disk, const char *page, size_t count)
|
|||
#ifdef CONFIG_SMP
|
||||
struct request_queue *q = disk->queue;
|
||||
unsigned long val;
|
||||
unsigned int memflags;
|
||||
|
||||
ret = queue_var_store(&val, page, count);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
/*
|
||||
* Here we update two queue flags each using atomic bitops, although
|
||||
* updating two flags isn't atomic it should be harmless as those flags
|
||||
* are accessed individually using atomic test_bit operation. So we
|
||||
* don't grab any lock while updating these flags.
|
||||
*/
|
||||
memflags = blk_mq_freeze_queue(q);
|
||||
if (val == 2) {
|
||||
blk_queue_flag_set(QUEUE_FLAG_SAME_COMP, q);
|
||||
blk_queue_flag_set(QUEUE_FLAG_SAME_FORCE, q);
|
||||
|
@ -331,6 +372,7 @@ queue_rq_affinity_store(struct gendisk *disk, const char *page, size_t count)
|
|||
blk_queue_flag_clear(QUEUE_FLAG_SAME_COMP, q);
|
||||
blk_queue_flag_clear(QUEUE_FLAG_SAME_FORCE, q);
|
||||
}
|
||||
blk_mq_unfreeze_queue(q, memflags);
|
||||
#endif
|
||||
return ret;
|
||||
}
|
||||
|
@ -344,29 +386,43 @@ static ssize_t queue_poll_delay_store(struct gendisk *disk, const char *page,
|
|||
static ssize_t queue_poll_store(struct gendisk *disk, const char *page,
|
||||
size_t count)
|
||||
{
|
||||
if (!(disk->queue->limits.features & BLK_FEAT_POLL))
|
||||
return -EINVAL;
|
||||
unsigned int memflags;
|
||||
ssize_t ret = count;
|
||||
struct request_queue *q = disk->queue;
|
||||
|
||||
memflags = blk_mq_freeze_queue(q);
|
||||
if (!(q->limits.features & BLK_FEAT_POLL)) {
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
pr_info_ratelimited("writes to the poll attribute are ignored.\n");
|
||||
pr_info_ratelimited("please use driver specific parameters instead.\n");
|
||||
return count;
|
||||
out:
|
||||
blk_mq_unfreeze_queue(q, memflags);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static ssize_t queue_io_timeout_show(struct gendisk *disk, char *page)
|
||||
{
|
||||
return sysfs_emit(page, "%u\n", jiffies_to_msecs(disk->queue->rq_timeout));
|
||||
return sysfs_emit(page, "%u\n",
|
||||
jiffies_to_msecs(READ_ONCE(disk->queue->rq_timeout)));
|
||||
}
|
||||
|
||||
static ssize_t queue_io_timeout_store(struct gendisk *disk, const char *page,
|
||||
size_t count)
|
||||
{
|
||||
unsigned int val;
|
||||
unsigned int val, memflags;
|
||||
int err;
|
||||
struct request_queue *q = disk->queue;
|
||||
|
||||
err = kstrtou32(page, 10, &val);
|
||||
if (err || val == 0)
|
||||
return -EINVAL;
|
||||
|
||||
blk_queue_rq_timeout(disk->queue, msecs_to_jiffies(val));
|
||||
memflags = blk_mq_freeze_queue(q);
|
||||
blk_queue_rq_timeout(q, msecs_to_jiffies(val));
|
||||
blk_mq_unfreeze_queue(q, memflags);
|
||||
|
||||
return count;
|
||||
}
|
||||
|
@ -412,57 +468,55 @@ static struct queue_sysfs_entry _prefix##_entry = { \
|
|||
.store = _prefix##_store, \
|
||||
};
|
||||
|
||||
#define QUEUE_LIM_RO_ENTRY(_prefix, _name) \
|
||||
static struct queue_sysfs_entry _prefix##_entry = { \
|
||||
.attr = { .name = _name, .mode = 0444 }, \
|
||||
.show_limit = _prefix##_show, \
|
||||
}
|
||||
|
||||
#define QUEUE_LIM_RW_ENTRY(_prefix, _name) \
|
||||
static struct queue_sysfs_entry _prefix##_entry = { \
|
||||
.attr = { .name = _name, .mode = 0644 }, \
|
||||
.show = _prefix##_show, \
|
||||
.show_limit = _prefix##_show, \
|
||||
.store_limit = _prefix##_store, \
|
||||
}
|
||||
|
||||
#define QUEUE_RW_LOAD_MODULE_ENTRY(_prefix, _name) \
|
||||
static struct queue_sysfs_entry _prefix##_entry = { \
|
||||
.attr = { .name = _name, .mode = 0644 }, \
|
||||
.show = _prefix##_show, \
|
||||
.load_module = _prefix##_load_module, \
|
||||
.store = _prefix##_store, \
|
||||
}
|
||||
|
||||
QUEUE_RW_ENTRY(queue_requests, "nr_requests");
|
||||
QUEUE_RW_ENTRY(queue_ra, "read_ahead_kb");
|
||||
QUEUE_LIM_RW_ENTRY(queue_max_sectors, "max_sectors_kb");
|
||||
QUEUE_RO_ENTRY(queue_max_hw_sectors, "max_hw_sectors_kb");
|
||||
QUEUE_RO_ENTRY(queue_max_segments, "max_segments");
|
||||
QUEUE_RO_ENTRY(queue_max_integrity_segments, "max_integrity_segments");
|
||||
QUEUE_RO_ENTRY(queue_max_segment_size, "max_segment_size");
|
||||
QUEUE_RW_LOAD_MODULE_ENTRY(elv_iosched, "scheduler");
|
||||
QUEUE_LIM_RO_ENTRY(queue_max_hw_sectors, "max_hw_sectors_kb");
|
||||
QUEUE_LIM_RO_ENTRY(queue_max_segments, "max_segments");
|
||||
QUEUE_LIM_RO_ENTRY(queue_max_integrity_segments, "max_integrity_segments");
|
||||
QUEUE_LIM_RO_ENTRY(queue_max_segment_size, "max_segment_size");
|
||||
QUEUE_RW_ENTRY(elv_iosched, "scheduler");
|
||||
|
||||
QUEUE_RO_ENTRY(queue_logical_block_size, "logical_block_size");
|
||||
QUEUE_RO_ENTRY(queue_physical_block_size, "physical_block_size");
|
||||
QUEUE_RO_ENTRY(queue_chunk_sectors, "chunk_sectors");
|
||||
QUEUE_RO_ENTRY(queue_io_min, "minimum_io_size");
|
||||
QUEUE_RO_ENTRY(queue_io_opt, "optimal_io_size");
|
||||
QUEUE_LIM_RO_ENTRY(queue_logical_block_size, "logical_block_size");
|
||||
QUEUE_LIM_RO_ENTRY(queue_physical_block_size, "physical_block_size");
|
||||
QUEUE_LIM_RO_ENTRY(queue_chunk_sectors, "chunk_sectors");
|
||||
QUEUE_LIM_RO_ENTRY(queue_io_min, "minimum_io_size");
|
||||
QUEUE_LIM_RO_ENTRY(queue_io_opt, "optimal_io_size");
|
||||
|
||||
QUEUE_RO_ENTRY(queue_max_discard_segments, "max_discard_segments");
|
||||
QUEUE_RO_ENTRY(queue_discard_granularity, "discard_granularity");
|
||||
QUEUE_RO_ENTRY(queue_max_hw_discard_sectors, "discard_max_hw_bytes");
|
||||
QUEUE_LIM_RO_ENTRY(queue_max_discard_segments, "max_discard_segments");
|
||||
QUEUE_LIM_RO_ENTRY(queue_discard_granularity, "discard_granularity");
|
||||
QUEUE_LIM_RO_ENTRY(queue_max_hw_discard_sectors, "discard_max_hw_bytes");
|
||||
QUEUE_LIM_RW_ENTRY(queue_max_discard_sectors, "discard_max_bytes");
|
||||
QUEUE_RO_ENTRY(queue_discard_zeroes_data, "discard_zeroes_data");
|
||||
|
||||
QUEUE_RO_ENTRY(queue_atomic_write_max_sectors, "atomic_write_max_bytes");
|
||||
QUEUE_RO_ENTRY(queue_atomic_write_boundary_sectors,
|
||||
QUEUE_LIM_RO_ENTRY(queue_atomic_write_max_sectors, "atomic_write_max_bytes");
|
||||
QUEUE_LIM_RO_ENTRY(queue_atomic_write_boundary_sectors,
|
||||
"atomic_write_boundary_bytes");
|
||||
QUEUE_RO_ENTRY(queue_atomic_write_unit_max, "atomic_write_unit_max_bytes");
|
||||
QUEUE_RO_ENTRY(queue_atomic_write_unit_min, "atomic_write_unit_min_bytes");
|
||||
QUEUE_LIM_RO_ENTRY(queue_atomic_write_unit_max, "atomic_write_unit_max_bytes");
|
||||
QUEUE_LIM_RO_ENTRY(queue_atomic_write_unit_min, "atomic_write_unit_min_bytes");
|
||||
|
||||
QUEUE_RO_ENTRY(queue_write_same_max, "write_same_max_bytes");
|
||||
QUEUE_RO_ENTRY(queue_max_write_zeroes_sectors, "write_zeroes_max_bytes");
|
||||
QUEUE_RO_ENTRY(queue_max_zone_append_sectors, "zone_append_max_bytes");
|
||||
QUEUE_RO_ENTRY(queue_zone_write_granularity, "zone_write_granularity");
|
||||
QUEUE_LIM_RO_ENTRY(queue_max_write_zeroes_sectors, "write_zeroes_max_bytes");
|
||||
QUEUE_LIM_RO_ENTRY(queue_max_zone_append_sectors, "zone_append_max_bytes");
|
||||
QUEUE_LIM_RO_ENTRY(queue_zone_write_granularity, "zone_write_granularity");
|
||||
|
||||
QUEUE_RO_ENTRY(queue_zoned, "zoned");
|
||||
QUEUE_LIM_RO_ENTRY(queue_zoned, "zoned");
|
||||
QUEUE_RO_ENTRY(queue_nr_zones, "nr_zones");
|
||||
QUEUE_RO_ENTRY(queue_max_open_zones, "max_open_zones");
|
||||
QUEUE_RO_ENTRY(queue_max_active_zones, "max_active_zones");
|
||||
QUEUE_LIM_RO_ENTRY(queue_max_open_zones, "max_open_zones");
|
||||
QUEUE_LIM_RO_ENTRY(queue_max_active_zones, "max_active_zones");
|
||||
|
||||
QUEUE_RW_ENTRY(queue_nomerges, "nomerges");
|
||||
QUEUE_LIM_RW_ENTRY(queue_iostats_passthrough, "iostats_passthrough");
|
||||
|
@ -470,16 +524,16 @@ QUEUE_RW_ENTRY(queue_rq_affinity, "rq_affinity");
|
|||
QUEUE_RW_ENTRY(queue_poll, "io_poll");
|
||||
QUEUE_RW_ENTRY(queue_poll_delay, "io_poll_delay");
|
||||
QUEUE_LIM_RW_ENTRY(queue_wc, "write_cache");
|
||||
QUEUE_RO_ENTRY(queue_fua, "fua");
|
||||
QUEUE_RO_ENTRY(queue_dax, "dax");
|
||||
QUEUE_LIM_RO_ENTRY(queue_fua, "fua");
|
||||
QUEUE_LIM_RO_ENTRY(queue_dax, "dax");
|
||||
QUEUE_RW_ENTRY(queue_io_timeout, "io_timeout");
|
||||
QUEUE_RO_ENTRY(queue_virt_boundary_mask, "virt_boundary_mask");
|
||||
QUEUE_RO_ENTRY(queue_dma_alignment, "dma_alignment");
|
||||
QUEUE_LIM_RO_ENTRY(queue_virt_boundary_mask, "virt_boundary_mask");
|
||||
QUEUE_LIM_RO_ENTRY(queue_dma_alignment, "dma_alignment");
|
||||
|
||||
/* legacy alias for logical_block_size: */
|
||||
static struct queue_sysfs_entry queue_hw_sector_size_entry = {
|
||||
.attr = {.name = "hw_sector_size", .mode = 0444 },
|
||||
.show = queue_logical_block_size_show,
|
||||
.show_limit = queue_logical_block_size_show,
|
||||
};
|
||||
|
||||
QUEUE_LIM_RW_ENTRY(queue_rotational, "rotational");
|
||||
|
@ -503,14 +557,24 @@ static ssize_t queue_var_store64(s64 *var, const char *page)
|
|||
|
||||
static ssize_t queue_wb_lat_show(struct gendisk *disk, char *page)
|
||||
{
|
||||
if (!wbt_rq_qos(disk->queue))
|
||||
return -EINVAL;
|
||||
ssize_t ret;
|
||||
struct request_queue *q = disk->queue;
|
||||
|
||||
if (wbt_disabled(disk->queue))
|
||||
return sysfs_emit(page, "0\n");
|
||||
mutex_lock(&q->elevator_lock);
|
||||
if (!wbt_rq_qos(q)) {
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
return sysfs_emit(page, "%llu\n",
|
||||
div_u64(wbt_get_min_lat(disk->queue), 1000));
|
||||
if (wbt_disabled(q)) {
|
||||
ret = sysfs_emit(page, "0\n");
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = sysfs_emit(page, "%llu\n", div_u64(wbt_get_min_lat(q), 1000));
|
||||
out:
|
||||
mutex_unlock(&q->elevator_lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static ssize_t queue_wb_lat_store(struct gendisk *disk, const char *page,
|
||||
|
@ -520,6 +584,7 @@ static ssize_t queue_wb_lat_store(struct gendisk *disk, const char *page,
|
|||
struct rq_qos *rqos;
|
||||
ssize_t ret;
|
||||
s64 val;
|
||||
unsigned int memflags;
|
||||
|
||||
ret = queue_var_store64(&val, page);
|
||||
if (ret < 0)
|
||||
|
@ -527,20 +592,24 @@ static ssize_t queue_wb_lat_store(struct gendisk *disk, const char *page,
|
|||
if (val < -1)
|
||||
return -EINVAL;
|
||||
|
||||
memflags = blk_mq_freeze_queue(q);
|
||||
mutex_lock(&q->elevator_lock);
|
||||
|
||||
rqos = wbt_rq_qos(q);
|
||||
if (!rqos) {
|
||||
ret = wbt_init(disk);
|
||||
if (ret)
|
||||
return ret;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = count;
|
||||
if (val == -1)
|
||||
val = wbt_default_latency_nsec(q);
|
||||
else if (val >= 0)
|
||||
val *= 1000ULL;
|
||||
|
||||
if (wbt_get_min_lat(q) == val)
|
||||
return count;
|
||||
goto out;
|
||||
|
||||
/*
|
||||
* Ensure that the queue is idled, in case the latency update
|
||||
|
@ -552,8 +621,11 @@ static ssize_t queue_wb_lat_store(struct gendisk *disk, const char *page,
|
|||
wbt_set_min_lat(q, val);
|
||||
|
||||
blk_mq_unquiesce_queue(q);
|
||||
out:
|
||||
mutex_unlock(&q->elevator_lock);
|
||||
blk_mq_unfreeze_queue(q, memflags);
|
||||
|
||||
return count;
|
||||
return ret;
|
||||
}
|
||||
|
||||
QUEUE_RW_ENTRY(queue_wb_lat, "wbt_lat_usec");
|
||||
|
@ -561,7 +633,9 @@ QUEUE_RW_ENTRY(queue_wb_lat, "wbt_lat_usec");
|
|||
|
||||
/* Common attributes for bio-based and request-based queues. */
|
||||
static struct attribute *queue_attrs[] = {
|
||||
&queue_ra_entry.attr,
|
||||
/*
|
||||
* Attributes which are protected with q->limits_lock.
|
||||
*/
|
||||
&queue_max_hw_sectors_entry.attr,
|
||||
&queue_max_sectors_entry.attr,
|
||||
&queue_max_segments_entry.attr,
|
||||
|
@ -577,44 +651,58 @@ static struct attribute *queue_attrs[] = {
|
|||
&queue_discard_granularity_entry.attr,
|
||||
&queue_max_discard_sectors_entry.attr,
|
||||
&queue_max_hw_discard_sectors_entry.attr,
|
||||
&queue_discard_zeroes_data_entry.attr,
|
||||
&queue_atomic_write_max_sectors_entry.attr,
|
||||
&queue_atomic_write_boundary_sectors_entry.attr,
|
||||
&queue_atomic_write_unit_min_entry.attr,
|
||||
&queue_atomic_write_unit_max_entry.attr,
|
||||
&queue_write_same_max_entry.attr,
|
||||
&queue_max_write_zeroes_sectors_entry.attr,
|
||||
&queue_max_zone_append_sectors_entry.attr,
|
||||
&queue_zone_write_granularity_entry.attr,
|
||||
&queue_rotational_entry.attr,
|
||||
&queue_zoned_entry.attr,
|
||||
&queue_nr_zones_entry.attr,
|
||||
&queue_max_open_zones_entry.attr,
|
||||
&queue_max_active_zones_entry.attr,
|
||||
&queue_nomerges_entry.attr,
|
||||
&queue_iostats_passthrough_entry.attr,
|
||||
&queue_iostats_entry.attr,
|
||||
&queue_stable_writes_entry.attr,
|
||||
&queue_add_random_entry.attr,
|
||||
&queue_poll_entry.attr,
|
||||
&queue_wc_entry.attr,
|
||||
&queue_fua_entry.attr,
|
||||
&queue_dax_entry.attr,
|
||||
&queue_poll_delay_entry.attr,
|
||||
&queue_virt_boundary_mask_entry.attr,
|
||||
&queue_dma_alignment_entry.attr,
|
||||
&queue_ra_entry.attr,
|
||||
|
||||
/*
|
||||
* Attributes which don't require locking.
|
||||
*/
|
||||
&queue_discard_zeroes_data_entry.attr,
|
||||
&queue_write_same_max_entry.attr,
|
||||
&queue_nr_zones_entry.attr,
|
||||
&queue_nomerges_entry.attr,
|
||||
&queue_poll_entry.attr,
|
||||
&queue_poll_delay_entry.attr,
|
||||
|
||||
NULL,
|
||||
};
|
||||
|
||||
/* Request-based queue attributes that are not relevant for bio-based queues. */
|
||||
static struct attribute *blk_mq_queue_attrs[] = {
|
||||
&queue_requests_entry.attr,
|
||||
/*
|
||||
* Attributes which require some form of locking other than
|
||||
* q->sysfs_lock.
|
||||
*/
|
||||
&elv_iosched_entry.attr,
|
||||
&queue_rq_affinity_entry.attr,
|
||||
&queue_io_timeout_entry.attr,
|
||||
&queue_requests_entry.attr,
|
||||
#ifdef CONFIG_BLK_WBT
|
||||
&queue_wb_lat_entry.attr,
|
||||
#endif
|
||||
/*
|
||||
* Attributes which don't require locking.
|
||||
*/
|
||||
&queue_rq_affinity_entry.attr,
|
||||
&queue_io_timeout_entry.attr,
|
||||
|
||||
NULL,
|
||||
};
|
||||
|
||||
|
@ -664,14 +752,20 @@ queue_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
|
|||
{
|
||||
struct queue_sysfs_entry *entry = to_queue(attr);
|
||||
struct gendisk *disk = container_of(kobj, struct gendisk, queue_kobj);
|
||||
|
||||
if (!entry->show && !entry->show_limit)
|
||||
return -EIO;
|
||||
|
||||
if (entry->show_limit) {
|
||||
ssize_t res;
|
||||
|
||||
if (!entry->show)
|
||||
return -EIO;
|
||||
mutex_lock(&disk->queue->sysfs_lock);
|
||||
res = entry->show(disk, page);
|
||||
mutex_unlock(&disk->queue->sysfs_lock);
|
||||
mutex_lock(&disk->queue->limits_lock);
|
||||
res = entry->show_limit(disk, page);
|
||||
mutex_unlock(&disk->queue->limits_lock);
|
||||
return res;
|
||||
}
|
||||
|
||||
return entry->show(disk, page);
|
||||
}
|
||||
|
||||
static ssize_t
|
||||
|
@ -681,21 +775,13 @@ queue_attr_store(struct kobject *kobj, struct attribute *attr,
|
|||
struct queue_sysfs_entry *entry = to_queue(attr);
|
||||
struct gendisk *disk = container_of(kobj, struct gendisk, queue_kobj);
|
||||
struct request_queue *q = disk->queue;
|
||||
unsigned int memflags;
|
||||
ssize_t res;
|
||||
|
||||
if (!entry->store_limit && !entry->store)
|
||||
return -EIO;
|
||||
|
||||
/*
|
||||
* If the attribute needs to load a module, do it before freezing the
|
||||
* queue to ensure that the module file can be read when the request
|
||||
* queue is the one for the device storing the module file.
|
||||
*/
|
||||
if (entry->load_module)
|
||||
entry->load_module(disk, page, length);
|
||||
|
||||
if (entry->store_limit) {
|
||||
ssize_t res;
|
||||
|
||||
struct queue_limits lim = queue_limits_start_update(q);
|
||||
|
||||
res = entry->store_limit(disk, page, length, &lim);
|
||||
|
@ -710,12 +796,7 @@ queue_attr_store(struct kobject *kobj, struct attribute *attr,
|
|||
return length;
|
||||
}
|
||||
|
||||
mutex_lock(&q->sysfs_lock);
|
||||
memflags = blk_mq_freeze_queue(q);
|
||||
res = entry->store(disk, page, length);
|
||||
blk_mq_unfreeze_queue(q, memflags);
|
||||
mutex_unlock(&q->sysfs_lock);
|
||||
return res;
|
||||
return entry->store(disk, page, length);
|
||||
}
|
||||
|
||||
static const struct sysfs_ops queue_sysfs_ops = {
|
||||
|
@ -784,18 +865,22 @@ int blk_register_queue(struct gendisk *disk)
|
|||
if (ret)
|
||||
goto out_debugfs_remove;
|
||||
|
||||
if (q->elevator) {
|
||||
ret = elv_register_queue(q, false);
|
||||
if (ret)
|
||||
goto out_unregister_ia_ranges;
|
||||
}
|
||||
|
||||
ret = blk_crypto_sysfs_register(disk);
|
||||
if (ret)
|
||||
goto out_elv_unregister;
|
||||
goto out_unregister_ia_ranges;
|
||||
|
||||
mutex_lock(&q->elevator_lock);
|
||||
if (q->elevator) {
|
||||
ret = elv_register_queue(q, false);
|
||||
if (ret) {
|
||||
mutex_unlock(&q->elevator_lock);
|
||||
goto out_crypto_sysfs_unregister;
|
||||
}
|
||||
}
|
||||
wbt_enable_default(disk);
|
||||
mutex_unlock(&q->elevator_lock);
|
||||
|
||||
blk_queue_flag_set(QUEUE_FLAG_REGISTERED, q);
|
||||
wbt_enable_default(disk);
|
||||
|
||||
/* Now everything is ready and send out KOBJ_ADD uevent */
|
||||
kobject_uevent(&disk->queue_kobj, KOBJ_ADD);
|
||||
|
@ -817,8 +902,8 @@ int blk_register_queue(struct gendisk *disk)
|
|||
|
||||
return ret;
|
||||
|
||||
out_elv_unregister:
|
||||
elv_unregister_queue(q);
|
||||
out_crypto_sysfs_unregister:
|
||||
blk_crypto_sysfs_unregister(disk);
|
||||
out_unregister_ia_ranges:
|
||||
disk_unregister_independent_access_ranges(disk);
|
||||
out_debugfs_remove:
|
||||
|
@ -864,8 +949,11 @@ void blk_unregister_queue(struct gendisk *disk)
|
|||
blk_mq_sysfs_unregister(disk);
|
||||
blk_crypto_sysfs_unregister(disk);
|
||||
|
||||
mutex_lock(&q->sysfs_lock);
|
||||
mutex_lock(&q->elevator_lock);
|
||||
elv_unregister_queue(q);
|
||||
mutex_unlock(&q->elevator_lock);
|
||||
|
||||
mutex_lock(&q->sysfs_lock);
|
||||
disk_unregister_independent_access_ranges(disk);
|
||||
mutex_unlock(&q->sysfs_lock);
|
||||
|
||||
|
|
|
@ -478,8 +478,6 @@ static inline void throtl_start_new_slice_with_credit(struct throtl_grp *tg,
|
|||
{
|
||||
tg->bytes_disp[rw] = 0;
|
||||
tg->io_disp[rw] = 0;
|
||||
tg->carryover_bytes[rw] = 0;
|
||||
tg->carryover_ios[rw] = 0;
|
||||
|
||||
/*
|
||||
* Previous slice has expired. We must have trimmed it after last
|
||||
|
@ -498,16 +496,14 @@ static inline void throtl_start_new_slice_with_credit(struct throtl_grp *tg,
|
|||
}
|
||||
|
||||
static inline void throtl_start_new_slice(struct throtl_grp *tg, bool rw,
|
||||
bool clear_carryover)
|
||||
bool clear)
|
||||
{
|
||||
if (clear) {
|
||||
tg->bytes_disp[rw] = 0;
|
||||
tg->io_disp[rw] = 0;
|
||||
}
|
||||
tg->slice_start[rw] = jiffies;
|
||||
tg->slice_end[rw] = jiffies + tg->td->throtl_slice;
|
||||
if (clear_carryover) {
|
||||
tg->carryover_bytes[rw] = 0;
|
||||
tg->carryover_ios[rw] = 0;
|
||||
}
|
||||
|
||||
throtl_log(&tg->service_queue,
|
||||
"[%c] new slice start=%lu end=%lu jiffies=%lu",
|
||||
|
@ -599,29 +595,34 @@ static inline void throtl_trim_slice(struct throtl_grp *tg, bool rw)
|
|||
* sooner, then we need to reduce slice_end. A high bogus slice_end
|
||||
* is bad because it does not allow new slice to start.
|
||||
*/
|
||||
|
||||
throtl_set_slice_end(tg, rw, jiffies + tg->td->throtl_slice);
|
||||
|
||||
time_elapsed = rounddown(jiffies - tg->slice_start[rw],
|
||||
tg->td->throtl_slice);
|
||||
if (!time_elapsed)
|
||||
/* Don't trim slice until at least 2 slices are used */
|
||||
if (time_elapsed < tg->td->throtl_slice * 2)
|
||||
return;
|
||||
|
||||
/*
|
||||
* The bio submission time may be a few jiffies more than the expected
|
||||
* waiting time, due to 'extra_bytes' can't be divided in
|
||||
* tg_within_bps_limit(), and also due to timer wakeup delay. In this
|
||||
* case, adjust slice_start will discard the extra wait time, causing
|
||||
* lower rate than expected. Therefore, other than the above rounddown,
|
||||
* one extra slice is preserved for deviation.
|
||||
*/
|
||||
time_elapsed -= tg->td->throtl_slice;
|
||||
bytes_trim = calculate_bytes_allowed(tg_bps_limit(tg, rw),
|
||||
time_elapsed) +
|
||||
tg->carryover_bytes[rw];
|
||||
io_trim = calculate_io_allowed(tg_iops_limit(tg, rw), time_elapsed) +
|
||||
tg->carryover_ios[rw];
|
||||
time_elapsed);
|
||||
io_trim = calculate_io_allowed(tg_iops_limit(tg, rw), time_elapsed);
|
||||
if (bytes_trim <= 0 && io_trim <= 0)
|
||||
return;
|
||||
|
||||
tg->carryover_bytes[rw] = 0;
|
||||
if ((long long)tg->bytes_disp[rw] >= bytes_trim)
|
||||
tg->bytes_disp[rw] -= bytes_trim;
|
||||
else
|
||||
tg->bytes_disp[rw] = 0;
|
||||
|
||||
tg->carryover_ios[rw] = 0;
|
||||
if ((int)tg->io_disp[rw] >= io_trim)
|
||||
tg->io_disp[rw] -= io_trim;
|
||||
else
|
||||
|
@ -636,7 +637,8 @@ static inline void throtl_trim_slice(struct throtl_grp *tg, bool rw)
|
|||
jiffies);
|
||||
}
|
||||
|
||||
static void __tg_update_carryover(struct throtl_grp *tg, bool rw)
|
||||
static void __tg_update_carryover(struct throtl_grp *tg, bool rw,
|
||||
long long *bytes, int *ios)
|
||||
{
|
||||
unsigned long jiffy_elapsed = jiffies - tg->slice_start[rw];
|
||||
u64 bps_limit = tg_bps_limit(tg, rw);
|
||||
|
@ -649,26 +651,28 @@ static void __tg_update_carryover(struct throtl_grp *tg, bool rw)
|
|||
* configuration.
|
||||
*/
|
||||
if (bps_limit != U64_MAX)
|
||||
tg->carryover_bytes[rw] +=
|
||||
calculate_bytes_allowed(bps_limit, jiffy_elapsed) -
|
||||
*bytes = calculate_bytes_allowed(bps_limit, jiffy_elapsed) -
|
||||
tg->bytes_disp[rw];
|
||||
if (iops_limit != UINT_MAX)
|
||||
tg->carryover_ios[rw] +=
|
||||
calculate_io_allowed(iops_limit, jiffy_elapsed) -
|
||||
*ios = calculate_io_allowed(iops_limit, jiffy_elapsed) -
|
||||
tg->io_disp[rw];
|
||||
tg->bytes_disp[rw] -= *bytes;
|
||||
tg->io_disp[rw] -= *ios;
|
||||
}
|
||||
|
||||
static void tg_update_carryover(struct throtl_grp *tg)
|
||||
{
|
||||
long long bytes[2] = {0};
|
||||
int ios[2] = {0};
|
||||
|
||||
if (tg->service_queue.nr_queued[READ])
|
||||
__tg_update_carryover(tg, READ);
|
||||
__tg_update_carryover(tg, READ, &bytes[READ], &ios[READ]);
|
||||
if (tg->service_queue.nr_queued[WRITE])
|
||||
__tg_update_carryover(tg, WRITE);
|
||||
__tg_update_carryover(tg, WRITE, &bytes[WRITE], &ios[WRITE]);
|
||||
|
||||
/* see comments in struct throtl_grp for meaning of these fields. */
|
||||
throtl_log(&tg->service_queue, "%s: %lld %lld %d %d\n", __func__,
|
||||
tg->carryover_bytes[READ], tg->carryover_bytes[WRITE],
|
||||
tg->carryover_ios[READ], tg->carryover_ios[WRITE]);
|
||||
bytes[READ], bytes[WRITE], ios[READ], ios[WRITE]);
|
||||
}
|
||||
|
||||
static unsigned long tg_within_iops_limit(struct throtl_grp *tg, struct bio *bio,
|
||||
|
@ -686,8 +690,7 @@ static unsigned long tg_within_iops_limit(struct throtl_grp *tg, struct bio *bio
|
|||
|
||||
/* Round up to the next throttle slice, wait time must be nonzero */
|
||||
jiffy_elapsed_rnd = roundup(jiffy_elapsed + 1, tg->td->throtl_slice);
|
||||
io_allowed = calculate_io_allowed(iops_limit, jiffy_elapsed_rnd) +
|
||||
tg->carryover_ios[rw];
|
||||
io_allowed = calculate_io_allowed(iops_limit, jiffy_elapsed_rnd);
|
||||
if (io_allowed > 0 && tg->io_disp[rw] + 1 <= io_allowed)
|
||||
return 0;
|
||||
|
||||
|
@ -720,8 +723,7 @@ static unsigned long tg_within_bps_limit(struct throtl_grp *tg, struct bio *bio,
|
|||
jiffy_elapsed_rnd = tg->td->throtl_slice;
|
||||
|
||||
jiffy_elapsed_rnd = roundup(jiffy_elapsed_rnd, tg->td->throtl_slice);
|
||||
bytes_allowed = calculate_bytes_allowed(bps_limit, jiffy_elapsed_rnd) +
|
||||
tg->carryover_bytes[rw];
|
||||
bytes_allowed = calculate_bytes_allowed(bps_limit, jiffy_elapsed_rnd);
|
||||
if (bytes_allowed > 0 && tg->bytes_disp[rw] + bio_size <= bytes_allowed)
|
||||
return 0;
|
||||
|
||||
|
@ -810,13 +812,10 @@ static void throtl_charge_bio(struct throtl_grp *tg, struct bio *bio)
|
|||
unsigned int bio_size = throtl_bio_data_size(bio);
|
||||
|
||||
/* Charge the bio to the group */
|
||||
if (!bio_flagged(bio, BIO_BPS_THROTTLED)) {
|
||||
if (!bio_flagged(bio, BIO_BPS_THROTTLED))
|
||||
tg->bytes_disp[rw] += bio_size;
|
||||
tg->last_bytes_disp[rw] += bio_size;
|
||||
}
|
||||
|
||||
tg->io_disp[rw]++;
|
||||
tg->last_io_disp[rw]++;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -1614,13 +1613,6 @@ static bool tg_within_limit(struct throtl_grp *tg, struct bio *bio, bool rw)
|
|||
return tg_may_dispatch(tg, bio, NULL);
|
||||
}
|
||||
|
||||
static void tg_dispatch_in_debt(struct throtl_grp *tg, struct bio *bio, bool rw)
|
||||
{
|
||||
if (!bio_flagged(bio, BIO_BPS_THROTTLED))
|
||||
tg->carryover_bytes[rw] -= throtl_bio_data_size(bio);
|
||||
tg->carryover_ios[rw]--;
|
||||
}
|
||||
|
||||
bool __blk_throtl_bio(struct bio *bio)
|
||||
{
|
||||
struct request_queue *q = bdev_get_queue(bio->bi_bdev);
|
||||
|
@ -1657,10 +1649,12 @@ bool __blk_throtl_bio(struct bio *bio)
|
|||
/*
|
||||
* IOs which may cause priority inversions are
|
||||
* dispatched directly, even if they're over limit.
|
||||
* Debts are handled by carryover_bytes/ios while
|
||||
* calculating wait time.
|
||||
*
|
||||
* Charge and dispatch directly, and our throttle
|
||||
* control algorithm is adaptive, and extra IO bytes
|
||||
* will be throttled for paying the debt
|
||||
*/
|
||||
tg_dispatch_in_debt(tg, bio, rw);
|
||||
throtl_charge_bio(tg, bio);
|
||||
} else {
|
||||
/* if above limits, break to queue */
|
||||
break;
|
||||
|
|
|
@ -102,12 +102,9 @@ struct throtl_grp {
|
|||
unsigned int iops[2];
|
||||
|
||||
/* Number of bytes dispatched in current slice */
|
||||
uint64_t bytes_disp[2];
|
||||
int64_t bytes_disp[2];
|
||||
/* Number of bio's dispatched in current slice */
|
||||
unsigned int io_disp[2];
|
||||
|
||||
uint64_t last_bytes_disp[2];
|
||||
unsigned int last_io_disp[2];
|
||||
int io_disp[2];
|
||||
|
||||
/*
|
||||
* The following two fields are updated when new configuration is
|
||||
|
|
|
@ -136,8 +136,9 @@ enum {
|
|||
RWB_MIN_WRITE_SAMPLES = 3,
|
||||
|
||||
/*
|
||||
* If we have this number of consecutive windows with not enough
|
||||
* information to scale up or down, scale up.
|
||||
* If we have this number of consecutive windows without enough
|
||||
* information to scale up or down, slowly return to center state
|
||||
* (step == 0).
|
||||
*/
|
||||
RWB_UNKNOWN_BUMP = 5,
|
||||
};
|
||||
|
@ -446,9 +447,9 @@ static void wb_timer_fn(struct blk_stat_callback *cb)
|
|||
break;
|
||||
case LAT_UNKNOWN_WRITES:
|
||||
/*
|
||||
* We started a the center step, but don't have a valid
|
||||
* read/write sample, but we do have writes going on.
|
||||
* Allow step to go negative, to increase write perf.
|
||||
* We don't have a valid read/write sample, but we do have
|
||||
* writes going on. Allow step to go negative, to increase
|
||||
* write performance.
|
||||
*/
|
||||
scale_up(rwb);
|
||||
break;
|
||||
|
@ -638,11 +639,7 @@ static void wbt_cleanup(struct rq_qos *rqos, struct bio *bio)
|
|||
__wbt_done(rqos, flags);
|
||||
}
|
||||
|
||||
/*
|
||||
* May sleep, if we have exceeded the writeback limits. Caller can pass
|
||||
* in an irq held spinlock, if it holds one when calling this function.
|
||||
* If we do sleep, we'll release and re-grab it.
|
||||
*/
|
||||
/* May sleep, if we have exceeded the writeback limits. */
|
||||
static void wbt_wait(struct rq_qos *rqos, struct bio *bio)
|
||||
{
|
||||
struct rq_wb *rwb = RQWB(rqos);
|
||||
|
|
|
@ -715,7 +715,7 @@ int bdev_open(struct block_device *bdev, blk_mode_t mode, void *holder,
|
|||
int bdev_permission(dev_t dev, blk_mode_t mode, void *holder);
|
||||
|
||||
void blk_integrity_generate(struct bio *bio);
|
||||
void blk_integrity_verify(struct bio *bio);
|
||||
void blk_integrity_verify_iter(struct bio *bio, struct bvec_iter *saved_iter);
|
||||
void blk_integrity_prepare(struct request *rq);
|
||||
void blk_integrity_complete(struct request *rq, unsigned int nr_bytes);
|
||||
|
||||
|
|
|
@ -41,8 +41,6 @@ static void init_bounce_bioset(void)
|
|||
|
||||
ret = bioset_init(&bounce_bio_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
|
||||
BUG_ON(ret);
|
||||
if (bioset_integrity_create(&bounce_bio_set, BIO_POOL_SIZE))
|
||||
BUG_ON(1);
|
||||
|
||||
ret = bioset_init(&bounce_bio_split, BIO_POOL_SIZE, 0, 0);
|
||||
BUG_ON(ret);
|
||||
|
|
|
@ -219,7 +219,7 @@ static int bsg_map_buffer(struct bsg_buffer *buf, struct request *req)
|
|||
if (!buf->sg_list)
|
||||
return -ENOMEM;
|
||||
sg_init_table(buf->sg_list, req->nr_phys_segments);
|
||||
buf->sg_cnt = blk_rq_map_sg(req->q, req, buf->sg_list);
|
||||
buf->sg_cnt = blk_rq_map_sg(req, buf->sg_list);
|
||||
buf->payload_len = blk_rq_bytes(req);
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -457,7 +457,7 @@ int elv_register_queue(struct request_queue *q, bool uevent)
|
|||
struct elevator_queue *e = q->elevator;
|
||||
int error;
|
||||
|
||||
lockdep_assert_held(&q->sysfs_lock);
|
||||
lockdep_assert_held(&q->elevator_lock);
|
||||
|
||||
error = kobject_add(&e->kobj, &q->disk->queue_kobj, "iosched");
|
||||
if (!error) {
|
||||
|
@ -481,7 +481,7 @@ void elv_unregister_queue(struct request_queue *q)
|
|||
{
|
||||
struct elevator_queue *e = q->elevator;
|
||||
|
||||
lockdep_assert_held(&q->sysfs_lock);
|
||||
lockdep_assert_held(&q->elevator_lock);
|
||||
|
||||
if (e && test_and_clear_bit(ELEVATOR_FLAG_REGISTERED, &e->flags)) {
|
||||
kobject_uevent(&e->kobj, KOBJ_REMOVE);
|
||||
|
@ -618,7 +618,7 @@ int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
|
|||
unsigned int memflags;
|
||||
int ret;
|
||||
|
||||
lockdep_assert_held(&q->sysfs_lock);
|
||||
lockdep_assert_held(&q->elevator_lock);
|
||||
|
||||
memflags = blk_mq_freeze_queue(q);
|
||||
blk_mq_quiesce_queue(q);
|
||||
|
@ -655,7 +655,7 @@ void elevator_disable(struct request_queue *q)
|
|||
{
|
||||
unsigned int memflags;
|
||||
|
||||
lockdep_assert_held(&q->sysfs_lock);
|
||||
lockdep_assert_held(&q->elevator_lock);
|
||||
|
||||
memflags = blk_mq_freeze_queue(q);
|
||||
blk_mq_quiesce_queue(q);
|
||||
|
@ -700,34 +700,44 @@ static int elevator_change(struct request_queue *q, const char *elevator_name)
|
|||
return ret;
|
||||
}
|
||||
|
||||
void elv_iosched_load_module(struct gendisk *disk, const char *buf,
|
||||
size_t count)
|
||||
static void elv_iosched_load_module(char *elevator_name)
|
||||
{
|
||||
char elevator_name[ELV_NAME_MAX];
|
||||
struct elevator_type *found;
|
||||
const char *name;
|
||||
|
||||
strscpy(elevator_name, buf, sizeof(elevator_name));
|
||||
name = strstrip(elevator_name);
|
||||
|
||||
spin_lock(&elv_list_lock);
|
||||
found = __elevator_find(name);
|
||||
found = __elevator_find(elevator_name);
|
||||
spin_unlock(&elv_list_lock);
|
||||
|
||||
if (!found)
|
||||
request_module("%s-iosched", name);
|
||||
request_module("%s-iosched", elevator_name);
|
||||
}
|
||||
|
||||
ssize_t elv_iosched_store(struct gendisk *disk, const char *buf,
|
||||
size_t count)
|
||||
{
|
||||
char elevator_name[ELV_NAME_MAX];
|
||||
char *name;
|
||||
int ret;
|
||||
unsigned int memflags;
|
||||
struct request_queue *q = disk->queue;
|
||||
|
||||
/*
|
||||
* If the attribute needs to load a module, do it before freezing the
|
||||
* queue to ensure that the module file can be read when the request
|
||||
* queue is the one for the device storing the module file.
|
||||
*/
|
||||
strscpy(elevator_name, buf, sizeof(elevator_name));
|
||||
ret = elevator_change(disk->queue, strstrip(elevator_name));
|
||||
name = strstrip(elevator_name);
|
||||
|
||||
elv_iosched_load_module(name);
|
||||
|
||||
memflags = blk_mq_freeze_queue(q);
|
||||
mutex_lock(&q->elevator_lock);
|
||||
ret = elevator_change(q, name);
|
||||
if (!ret)
|
||||
return count;
|
||||
ret = count;
|
||||
mutex_unlock(&q->elevator_lock);
|
||||
blk_mq_unfreeze_queue(q, memflags);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -738,6 +748,7 @@ ssize_t elv_iosched_show(struct gendisk *disk, char *name)
|
|||
struct elevator_type *cur = NULL, *e;
|
||||
int len = 0;
|
||||
|
||||
mutex_lock(&q->elevator_lock);
|
||||
if (!q->elevator) {
|
||||
len += sprintf(name+len, "[none] ");
|
||||
} else {
|
||||
|
@ -755,6 +766,8 @@ ssize_t elv_iosched_show(struct gendisk *disk, char *name)
|
|||
spin_unlock(&elv_list_lock);
|
||||
|
||||
len += sprintf(name+len, "\n");
|
||||
mutex_unlock(&q->elevator_lock);
|
||||
|
||||
return len;
|
||||
}
|
||||
|
||||
|
|
|
@ -148,8 +148,6 @@ extern void elv_unregister(struct elevator_type *);
|
|||
* io scheduler sysfs switching
|
||||
*/
|
||||
ssize_t elv_iosched_show(struct gendisk *disk, char *page);
|
||||
void elv_iosched_load_module(struct gendisk *disk, const char *page,
|
||||
size_t count);
|
||||
ssize_t elv_iosched_store(struct gendisk *disk, const char *page, size_t count);
|
||||
|
||||
extern bool elv_bio_merge_ok(struct request *, struct bio *);
|
||||
|
|
|
@ -565,8 +565,11 @@ out_free_ext_minor:
|
|||
if (disk->major == BLOCK_EXT_MAJOR)
|
||||
blk_free_ext_minor(disk->first_minor);
|
||||
out_exit_elevator:
|
||||
if (disk->queue->elevator)
|
||||
if (disk->queue->elevator) {
|
||||
mutex_lock(&disk->queue->elevator_lock);
|
||||
elevator_exit(disk->queue);
|
||||
mutex_unlock(&disk->queue->elevator_lock);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(add_disk_fwnode);
|
||||
|
@ -742,9 +745,9 @@ void del_gendisk(struct gendisk *disk)
|
|||
|
||||
blk_mq_quiesce_queue(q);
|
||||
if (q->elevator) {
|
||||
mutex_lock(&q->sysfs_lock);
|
||||
mutex_lock(&q->elevator_lock);
|
||||
elevator_exit(q);
|
||||
mutex_unlock(&q->sysfs_lock);
|
||||
mutex_unlock(&q->elevator_lock);
|
||||
}
|
||||
rq_qos_exit(q);
|
||||
blk_mq_unquiesce_queue(q);
|
||||
|
|
|
@ -15,6 +15,7 @@
|
|||
#include <linux/io_uring/cmd.h>
|
||||
#include <uapi/linux/blkdev.h>
|
||||
#include "blk.h"
|
||||
#include "blk-crypto-internal.h"
|
||||
|
||||
static int blkpg_do_ioctl(struct block_device *bdev,
|
||||
struct blkpg_partition __user *upart, int op)
|
||||
|
@ -620,6 +621,10 @@ static int blkdev_common_ioctl(struct block_device *bdev, blk_mode_t mode,
|
|||
case BLKTRACESTOP:
|
||||
case BLKTRACETEARDOWN:
|
||||
return blk_trace_ioctl(bdev, cmd, argp);
|
||||
case BLKCRYPTOIMPORTKEY:
|
||||
case BLKCRYPTOGENERATEKEY:
|
||||
case BLKCRYPTOPREPAREKEY:
|
||||
return blk_crypto_ioctl(bdev, cmd, argp);
|
||||
case IOC_PR_REGISTER:
|
||||
return blkdev_pr_register(bdev, mode, argp);
|
||||
case IOC_PR_RESERVE:
|
||||
|
|
|
@ -568,7 +568,7 @@ static bool kyber_bio_merge(struct request_queue *q, struct bio *bio,
|
|||
unsigned int nr_segs)
|
||||
{
|
||||
struct blk_mq_ctx *ctx = blk_mq_get_ctx(q);
|
||||
struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, bio->bi_opf, ctx);
|
||||
struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(bio->bi_opf, ctx);
|
||||
struct kyber_hctx_data *khd = hctx->sched_data;
|
||||
struct kyber_ctx_queue *kcq = &khd->kcqs[ctx->index_hw[hctx->type]];
|
||||
unsigned int sched_domain = kyber_sched_domain(bio->bi_opf);
|
||||
|
|
|
@ -50,8 +50,6 @@ int sgi_partition(struct parsed_partitions *state)
|
|||
p = &label->partitions[0];
|
||||
magic = label->magic_mushroom;
|
||||
if(be32_to_cpu(magic) != SGI_LABEL_MAGIC) {
|
||||
/*printk("Dev %s SGI disklabel: bad magic %08x\n",
|
||||
state->disk->disk_name, be32_to_cpu(magic));*/
|
||||
put_dev_sector(sect);
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -74,8 +74,6 @@ int sun_partition(struct parsed_partitions *state)
|
|||
|
||||
p = label->partitions;
|
||||
if (be16_to_cpu(label->magic) != SUN_LABEL_MAGIC) {
|
||||
/* printk(KERN_INFO "Dev %s Sun disklabel: bad magic %04x\n",
|
||||
state->disk->disk_name, be16_to_cpu(label->magic)); */
|
||||
put_dev_sector(sect);
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -404,7 +404,7 @@ void blk_integrity_generate(struct bio *bio)
|
|||
}
|
||||
}
|
||||
|
||||
void blk_integrity_verify(struct bio *bio)
|
||||
void blk_integrity_verify_iter(struct bio *bio, struct bvec_iter *saved_iter)
|
||||
{
|
||||
struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk);
|
||||
struct bio_integrity_payload *bip = bio_integrity(bio);
|
||||
|
@ -418,9 +418,9 @@ void blk_integrity_verify(struct bio *bio)
|
|||
*/
|
||||
iter.disk_name = bio->bi_bdev->bd_disk->disk_name;
|
||||
iter.interval = 1 << bi->interval_exp;
|
||||
iter.seed = bip->bio_iter.bi_sector;
|
||||
iter.seed = saved_iter->bi_sector;
|
||||
iter.prot_buf = bvec_virt(bip->bip_vec);
|
||||
__bio_for_each_segment(bv, bio, bviter, bip->bio_iter) {
|
||||
__bio_for_each_segment(bv, bio, bviter, *saved_iter) {
|
||||
void *kaddr = bvec_kmap_local(&bv);
|
||||
blk_status_t ret = BLK_STS_OK;
|
||||
|
||||
|
|
|
@ -141,6 +141,12 @@ config CRYPTO_ACOMP
|
|||
select CRYPTO_ALGAPI
|
||||
select CRYPTO_ACOMP2
|
||||
|
||||
config CRYPTO_HKDF
|
||||
tristate
|
||||
select CRYPTO_SHA256 if !CONFIG_CRYPTO_MANAGER_DISABLE_TESTS
|
||||
select CRYPTO_SHA512 if !CONFIG_CRYPTO_MANAGER_DISABLE_TESTS
|
||||
select CRYPTO_HASH2
|
||||
|
||||
config CRYPTO_MANAGER
|
||||
tristate "Cryptographic algorithm manager"
|
||||
select CRYPTO_MANAGER2
|
||||
|
|
|
@ -34,6 +34,7 @@ obj-$(CONFIG_CRYPTO_HASH2) += crypto_hash.o
|
|||
obj-$(CONFIG_CRYPTO_AKCIPHER2) += akcipher.o
|
||||
obj-$(CONFIG_CRYPTO_SIG2) += sig.o
|
||||
obj-$(CONFIG_CRYPTO_KPP2) += kpp.o
|
||||
obj-$(CONFIG_CRYPTO_HKDF) += hkdf.o
|
||||
|
||||
dh_generic-y := dh.o
|
||||
dh_generic-y += dh_helper.o
|
||||
|
|
573
crypto/hkdf.c
Normal file
573
crypto/hkdf.c
Normal file
|
@ -0,0 +1,573 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Implementation of HKDF ("HMAC-based Extract-and-Expand Key Derivation
|
||||
* Function"), aka RFC 5869. See also the original paper (Krawczyk 2010):
|
||||
* "Cryptographic Extraction and Key Derivation: The HKDF Scheme".
|
||||
*
|
||||
* Copyright 2019 Google LLC
|
||||
*/
|
||||
|
||||
#include <crypto/internal/hash.h>
|
||||
#include <crypto/sha2.h>
|
||||
#include <crypto/hkdf.h>
|
||||
#include <linux/module.h>
|
||||
|
||||
/*
|
||||
* HKDF consists of two steps:
|
||||
*
|
||||
* 1. HKDF-Extract: extract a pseudorandom key from the input keying material
|
||||
* and optional salt.
|
||||
* 2. HKDF-Expand: expand the pseudorandom key into output keying material of
|
||||
* any length, parameterized by an application-specific info string.
|
||||
*
|
||||
*/
|
||||
|
||||
/**
|
||||
* hkdf_extract - HKDF-Extract (RFC 5869 section 2.2)
|
||||
* @hmac_tfm: an HMAC transform using the hash function desired for HKDF. The
|
||||
* caller is responsible for setting the @prk afterwards.
|
||||
* @ikm: input keying material
|
||||
* @ikmlen: length of @ikm
|
||||
* @salt: input salt value
|
||||
* @saltlen: length of @salt
|
||||
* @prk: resulting pseudorandom key
|
||||
*
|
||||
* Extracts a pseudorandom key @prk from the input keying material
|
||||
* @ikm with length @ikmlen and salt @salt with length @saltlen.
|
||||
* The length of @prk is given by the digest size of @hmac_tfm.
|
||||
* For an 'unsalted' version of HKDF-Extract @salt must be set
|
||||
* to all zeroes and @saltlen must be set to the length of @prk.
|
||||
*
|
||||
* Returns 0 on success with the pseudorandom key stored in @prk,
|
||||
* or a negative errno value otherwise.
|
||||
*/
|
||||
int hkdf_extract(struct crypto_shash *hmac_tfm, const u8 *ikm,
|
||||
unsigned int ikmlen, const u8 *salt, unsigned int saltlen,
|
||||
u8 *prk)
|
||||
{
|
||||
int err;
|
||||
|
||||
err = crypto_shash_setkey(hmac_tfm, salt, saltlen);
|
||||
if (!err)
|
||||
err = crypto_shash_tfm_digest(hmac_tfm, ikm, ikmlen, prk);
|
||||
|
||||
return err;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(hkdf_extract);
|
||||
|
||||
/**
|
||||
* hkdf_expand - HKDF-Expand (RFC 5869 section 2.3)
|
||||
* @hmac_tfm: hash context keyed with pseudorandom key
|
||||
* @info: application-specific information
|
||||
* @infolen: length of @info
|
||||
* @okm: output keying material
|
||||
* @okmlen: length of @okm
|
||||
*
|
||||
* This expands the pseudorandom key, which was already keyed into @hmac_tfm,
|
||||
* into @okmlen bytes of output keying material parameterized by the
|
||||
* application-specific @info of length @infolen bytes.
|
||||
* This is thread-safe and may be called by multiple threads in parallel.
|
||||
*
|
||||
* Returns 0 on success with output keying material stored in @okm,
|
||||
* or a negative errno value otherwise.
|
||||
*/
|
||||
int hkdf_expand(struct crypto_shash *hmac_tfm,
|
||||
const u8 *info, unsigned int infolen,
|
||||
u8 *okm, unsigned int okmlen)
|
||||
{
|
||||
SHASH_DESC_ON_STACK(desc, hmac_tfm);
|
||||
unsigned int i, hashlen = crypto_shash_digestsize(hmac_tfm);
|
||||
int err;
|
||||
const u8 *prev = NULL;
|
||||
u8 counter = 1;
|
||||
u8 tmp[HASH_MAX_DIGESTSIZE] = {};
|
||||
|
||||
if (WARN_ON(okmlen > 255 * hashlen))
|
||||
return -EINVAL;
|
||||
|
||||
desc->tfm = hmac_tfm;
|
||||
|
||||
for (i = 0; i < okmlen; i += hashlen) {
|
||||
err = crypto_shash_init(desc);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
if (prev) {
|
||||
err = crypto_shash_update(desc, prev, hashlen);
|
||||
if (err)
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (infolen) {
|
||||
err = crypto_shash_update(desc, info, infolen);
|
||||
if (err)
|
||||
goto out;
|
||||
}
|
||||
|
||||
BUILD_BUG_ON(sizeof(counter) != 1);
|
||||
if (okmlen - i < hashlen) {
|
||||
err = crypto_shash_finup(desc, &counter, 1, tmp);
|
||||
if (err)
|
||||
goto out;
|
||||
memcpy(&okm[i], tmp, okmlen - i);
|
||||
memzero_explicit(tmp, sizeof(tmp));
|
||||
} else {
|
||||
err = crypto_shash_finup(desc, &counter, 1, &okm[i]);
|
||||
if (err)
|
||||
goto out;
|
||||
}
|
||||
counter++;
|
||||
prev = &okm[i];
|
||||
}
|
||||
err = 0;
|
||||
out:
|
||||
if (unlikely(err))
|
||||
memzero_explicit(okm, okmlen); /* so caller doesn't need to */
|
||||
shash_desc_zero(desc);
|
||||
memzero_explicit(tmp, HASH_MAX_DIGESTSIZE);
|
||||
return err;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(hkdf_expand);
|
||||
|
||||
struct hkdf_testvec {
|
||||
const char *test;
|
||||
const u8 *ikm;
|
||||
const u8 *salt;
|
||||
const u8 *info;
|
||||
const u8 *prk;
|
||||
const u8 *okm;
|
||||
u16 ikm_size;
|
||||
u16 salt_size;
|
||||
u16 info_size;
|
||||
u16 prk_size;
|
||||
u16 okm_size;
|
||||
};
|
||||
|
||||
/*
|
||||
* HKDF test vectors from RFC5869
|
||||
*
|
||||
* Additional HKDF test vectors from
|
||||
* https://github.com/brycx/Test-Vector-Generation/blob/master/HKDF/hkdf-hmac-sha2-test-vectors.md
|
||||
*/
|
||||
static const struct hkdf_testvec hkdf_sha256_tv[] = {
|
||||
{
|
||||
.test = "basic hdkf test",
|
||||
.ikm = "\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b"
|
||||
"\x0b\x0b\x0b\x0b\x0b\x0b",
|
||||
.ikm_size = 22,
|
||||
.salt = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c",
|
||||
.salt_size = 13,
|
||||
.info = "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9",
|
||||
.info_size = 10,
|
||||
.prk = "\x07\x77\x09\x36\x2c\x2e\x32\xdf\x0d\xdc\x3f\x0d\xc4\x7b\xba\x63"
|
||||
"\x90\xb6\xc7\x3b\xb5\x0f\x9c\x31\x22\xec\x84\x4a\xd7\xc2\xb3\xe5",
|
||||
.prk_size = 32,
|
||||
.okm = "\x3c\xb2\x5f\x25\xfa\xac\xd5\x7a\x90\x43\x4f\x64\xd0\x36\x2f\x2a"
|
||||
"\x2d\x2d\x0a\x90\xcf\x1a\x5a\x4c\x5d\xb0\x2d\x56\xec\xc4\xc5\xbf"
|
||||
"\x34\x00\x72\x08\xd5\xb8\x87\x18\x58\x65",
|
||||
.okm_size = 42,
|
||||
}, {
|
||||
.test = "hkdf test with long input",
|
||||
.ikm = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
|
||||
"\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
|
||||
"\x20\x21\x22\x23\x24\x25\x26\x27\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
|
||||
"\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f"
|
||||
"\x40\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f",
|
||||
.ikm_size = 80,
|
||||
.salt = "\x60\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f"
|
||||
"\x70\x71\x72\x73\x74\x75\x76\x77\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
|
||||
"\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
|
||||
"\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
|
||||
"\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf",
|
||||
.salt_size = 80,
|
||||
.info = "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
|
||||
"\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
|
||||
"\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
|
||||
"\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
|
||||
"\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff",
|
||||
.info_size = 80,
|
||||
.prk = "\x06\xa6\xb8\x8c\x58\x53\x36\x1a\x06\x10\x4c\x9c\xeb\x35\xb4\x5c"
|
||||
"\xef\x76\x00\x14\x90\x46\x71\x01\x4a\x19\x3f\x40\xc1\x5f\xc2\x44",
|
||||
.prk_size = 32,
|
||||
.okm = "\xb1\x1e\x39\x8d\xc8\x03\x27\xa1\xc8\xe7\xf7\x8c\x59\x6a\x49\x34"
|
||||
"\x4f\x01\x2e\xda\x2d\x4e\xfa\xd8\xa0\x50\xcc\x4c\x19\xaf\xa9\x7c"
|
||||
"\x59\x04\x5a\x99\xca\xc7\x82\x72\x71\xcb\x41\xc6\x5e\x59\x0e\x09"
|
||||
"\xda\x32\x75\x60\x0c\x2f\x09\xb8\x36\x77\x93\xa9\xac\xa3\xdb\x71"
|
||||
"\xcc\x30\xc5\x81\x79\xec\x3e\x87\xc1\x4c\x01\xd5\xc1\xf3\x43\x4f"
|
||||
"\x1d\x87",
|
||||
.okm_size = 82,
|
||||
}, {
|
||||
.test = "hkdf test with zero salt and info",
|
||||
.ikm = "\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b"
|
||||
"\x0b\x0b\x0b\x0b\x0b\x0b",
|
||||
.ikm_size = 22,
|
||||
.salt = NULL,
|
||||
.salt_size = 0,
|
||||
.info = NULL,
|
||||
.info_size = 0,
|
||||
.prk = "\x19\xef\x24\xa3\x2c\x71\x7b\x16\x7f\x33\xa9\x1d\x6f\x64\x8b\xdf"
|
||||
"\x96\x59\x67\x76\xaf\xdb\x63\x77\xac\x43\x4c\x1c\x29\x3c\xcb\x04",
|
||||
.prk_size = 32,
|
||||
.okm = "\x8d\xa4\xe7\x75\xa5\x63\xc1\x8f\x71\x5f\x80\x2a\x06\x3c\x5a\x31"
|
||||
"\xb8\xa1\x1f\x5c\x5e\xe1\x87\x9e\xc3\x45\x4e\x5f\x3c\x73\x8d\x2d"
|
||||
"\x9d\x20\x13\x95\xfa\xa4\xb6\x1a\x96\xc8",
|
||||
.okm_size = 42,
|
||||
}, {
|
||||
.test = "hkdf test with short input",
|
||||
.ikm = "\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b",
|
||||
.ikm_size = 11,
|
||||
.salt = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c",
|
||||
.salt_size = 13,
|
||||
.info = "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9",
|
||||
.info_size = 10,
|
||||
.prk = "\x82\x65\xf6\x9d\x7f\xf7\xe5\x01\x37\x93\x01\x5c\xa0\xef\x92\x0c"
|
||||
"\xb1\x68\x21\x99\xc8\xbc\x3a\x00\xda\x0c\xab\x47\xb7\xb0\x0f\xdf",
|
||||
.prk_size = 32,
|
||||
.okm = "\x58\xdc\xe1\x0d\x58\x01\xcd\xfd\xa8\x31\x72\x6b\xfe\xbc\xb7\x43"
|
||||
"\xd1\x4a\x7e\xe8\x3a\xa0\x57\xa9\x3d\x59\xb0\xa1\x31\x7f\xf0\x9d"
|
||||
"\x10\x5c\xce\xcf\x53\x56\x92\xb1\x4d\xd5",
|
||||
.okm_size = 42,
|
||||
}, {
|
||||
.test = "unsalted hkdf test with zero info",
|
||||
.ikm = "\x0c\x0c\x0c\x0c\x0c\x0c\x0c\x0c\x0c\x0c\x0c\x0c\x0c\x0c\x0c\x0c"
|
||||
"\x0c\x0c\x0c\x0c\x0c\x0c",
|
||||
.ikm_size = 22,
|
||||
.salt = "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
|
||||
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00",
|
||||
.salt_size = 32,
|
||||
.info = NULL,
|
||||
.info_size = 0,
|
||||
.prk = "\xaa\x84\x1e\x1f\x35\x74\xf3\x2d\x13\xfb\xa8\x00\x5f\xcd\x9b\x8d"
|
||||
"\x77\x67\x82\xa5\xdf\xa1\x92\x38\x92\xfd\x8b\x63\x5d\x3a\x89\xdf",
|
||||
.prk_size = 32,
|
||||
.okm = "\x59\x68\x99\x17\x9a\xb1\xbc\x00\xa7\xc0\x37\x86\xff\x43\xee\x53"
|
||||
"\x50\x04\xbe\x2b\xb9\xbe\x68\xbc\x14\x06\x63\x6f\x54\xbd\x33\x8a"
|
||||
"\x66\xa2\x37\xba\x2a\xcb\xce\xe3\xc9\xa7",
|
||||
.okm_size = 42,
|
||||
}
|
||||
};
|
||||
|
||||
static const struct hkdf_testvec hkdf_sha384_tv[] = {
|
||||
{
|
||||
.test = "basic hkdf test",
|
||||
.ikm = "\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b"
|
||||
"\x0b\x0b\x0b\x0b\x0b\x0b",
|
||||
.ikm_size = 22,
|
||||
.salt = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c",
|
||||
.salt_size = 13,
|
||||
.info = "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9",
|
||||
.info_size = 10,
|
||||
.prk = "\x70\x4b\x39\x99\x07\x79\xce\x1d\xc5\x48\x05\x2c\x7d\xc3\x9f\x30"
|
||||
"\x35\x70\xdd\x13\xfb\x39\xf7\xac\xc5\x64\x68\x0b\xef\x80\xe8\xde"
|
||||
"\xc7\x0e\xe9\xa7\xe1\xf3\xe2\x93\xef\x68\xec\xeb\x07\x2a\x5a\xde",
|
||||
.prk_size = 48,
|
||||
.okm = "\x9b\x50\x97\xa8\x60\x38\xb8\x05\x30\x90\x76\xa4\x4b\x3a\x9f\x38"
|
||||
"\x06\x3e\x25\xb5\x16\xdc\xbf\x36\x9f\x39\x4c\xfa\xb4\x36\x85\xf7"
|
||||
"\x48\xb6\x45\x77\x63\xe4\xf0\x20\x4f\xc5",
|
||||
.okm_size = 42,
|
||||
}, {
|
||||
.test = "hkdf test with long input",
|
||||
.ikm = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
|
||||
"\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
|
||||
"\x20\x21\x22\x23\x24\x25\x26\x27\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
|
||||
"\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f"
|
||||
"\x40\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f",
|
||||
.ikm_size = 80,
|
||||
.salt = "\x60\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f"
|
||||
"\x70\x71\x72\x73\x74\x75\x76\x77\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
|
||||
"\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
|
||||
"\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
|
||||
"\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf",
|
||||
.salt_size = 80,
|
||||
.info = "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
|
||||
"\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
|
||||
"\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
|
||||
"\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
|
||||
"\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff",
|
||||
.info_size = 80,
|
||||
.prk = "\xb3\x19\xf6\x83\x1d\xff\x93\x14\xef\xb6\x43\xba\xa2\x92\x63\xb3"
|
||||
"\x0e\x4a\x8d\x77\x9f\xe3\x1e\x9c\x90\x1e\xfd\x7d\xe7\x37\xc8\x5b"
|
||||
"\x62\xe6\x76\xd4\xdc\x87\xb0\x89\x5c\x6a\x7d\xc9\x7b\x52\xce\xbb",
|
||||
.prk_size = 48,
|
||||
.okm = "\x48\x4c\xa0\x52\xb8\xcc\x72\x4f\xd1\xc4\xec\x64\xd5\x7b\x4e\x81"
|
||||
"\x8c\x7e\x25\xa8\xe0\xf4\x56\x9e\xd7\x2a\x6a\x05\xfe\x06\x49\xee"
|
||||
"\xbf\x69\xf8\xd5\xc8\x32\x85\x6b\xf4\xe4\xfb\xc1\x79\x67\xd5\x49"
|
||||
"\x75\x32\x4a\x94\x98\x7f\x7f\x41\x83\x58\x17\xd8\x99\x4f\xdb\xd6"
|
||||
"\xf4\xc0\x9c\x55\x00\xdc\xa2\x4a\x56\x22\x2f\xea\x53\xd8\x96\x7a"
|
||||
"\x8b\x2e",
|
||||
.okm_size = 82,
|
||||
}, {
|
||||
.test = "hkdf test with zero salt and info",
|
||||
.ikm = "\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b"
|
||||
"\x0b\x0b\x0b\x0b\x0b\x0b",
|
||||
.ikm_size = 22,
|
||||
.salt = NULL,
|
||||
.salt_size = 0,
|
||||
.info = NULL,
|
||||
.info_size = 0,
|
||||
.prk = "\x10\xe4\x0c\xf0\x72\xa4\xc5\x62\x6e\x43\xdd\x22\xc1\xcf\x72\x7d"
|
||||
"\x4b\xb1\x40\x97\x5c\x9a\xd0\xcb\xc8\xe4\x5b\x40\x06\x8f\x8f\x0b"
|
||||
"\xa5\x7c\xdb\x59\x8a\xf9\xdf\xa6\x96\x3a\x96\x89\x9a\xf0\x47\xe5",
|
||||
.prk_size = 48,
|
||||
.okm = "\xc8\xc9\x6e\x71\x0f\x89\xb0\xd7\x99\x0b\xca\x68\xbc\xde\xc8\xcf"
|
||||
"\x85\x40\x62\xe5\x4c\x73\xa7\xab\xc7\x43\xfa\xde\x9b\x24\x2d\xaa"
|
||||
"\xcc\x1c\xea\x56\x70\x41\x5b\x52\x84\x9c",
|
||||
.okm_size = 42,
|
||||
}, {
|
||||
.test = "hkdf test with short input",
|
||||
.ikm = "\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b",
|
||||
.ikm_size = 11,
|
||||
.salt = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c",
|
||||
.salt_size = 13,
|
||||
.info = "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9",
|
||||
.info_size = 10,
|
||||
.prk = "\x6d\x31\x69\x98\x28\x79\x80\x88\xb3\x59\xda\xd5\x0b\x8f\x01\xb0"
|
||||
"\x15\xf1\x7a\xa3\xbd\x4e\x27\xa6\xe9\xf8\x73\xb7\x15\x85\xca\x6a"
|
||||
"\x00\xd1\xf0\x82\x12\x8a\xdb\x3c\xf0\x53\x0b\x57\xc0\xf9\xac\x72",
|
||||
.prk_size = 48,
|
||||
.okm = "\xfb\x7e\x67\x43\xeb\x42\xcd\xe9\x6f\x1b\x70\x77\x89\x52\xab\x75"
|
||||
"\x48\xca\xfe\x53\x24\x9f\x7f\xfe\x14\x97\xa1\x63\x5b\x20\x1f\xf1"
|
||||
"\x85\xb9\x3e\x95\x19\x92\xd8\x58\xf1\x1a",
|
||||
.okm_size = 42,
|
||||
}, {
|
||||
.test = "unsalted hkdf test with zero info",
|
||||
.ikm = "\x0c\x0c\x0c\x0c\x0c\x0c\x0c\x0c\x0c\x0c\x0c\x0c\x0c\x0c\x0c\x0c"
|
||||
"\x0c\x0c\x0c\x0c\x0c\x0c",
|
||||
.ikm_size = 22,
|
||||
.salt = "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
|
||||
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
|
||||
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00",
|
||||
.salt_size = 48,
|
||||
.info = NULL,
|
||||
.info_size = 0,
|
||||
.prk = "\x9d\x2d\xa5\x06\x6f\x05\xd1\x6c\x59\xfe\xdf\x6c\x5f\x32\xc7\x5e"
|
||||
"\xda\x9a\x47\xa7\x9c\x93\x6a\xa4\x4c\xb7\x63\xa8\xe2\x2f\xfb\xfc"
|
||||
"\xd8\xfe\x55\x43\x58\x53\x47\x21\x90\x39\xd1\x68\x28\x36\x33\xf5",
|
||||
.prk_size = 48,
|
||||
.okm = "\x6a\xd7\xc7\x26\xc8\x40\x09\x54\x6a\x76\xe0\x54\x5d\xf2\x66\x78"
|
||||
"\x7e\x2b\x2c\xd6\xca\x43\x73\xa1\xf3\x14\x50\xa7\xbd\xf9\x48\x2b"
|
||||
"\xfa\xb8\x11\xf5\x54\x20\x0e\xad\x8f\x53",
|
||||
.okm_size = 42,
|
||||
}
|
||||
};
|
||||
|
||||
static const struct hkdf_testvec hkdf_sha512_tv[] = {
|
||||
{
|
||||
.test = "basic hkdf test",
|
||||
.ikm = "\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b"
|
||||
"\x0b\x0b\x0b\x0b\x0b\x0b",
|
||||
.ikm_size = 22,
|
||||
.salt = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c",
|
||||
.salt_size = 13,
|
||||
.info = "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9",
|
||||
.info_size = 10,
|
||||
.prk = "\x66\x57\x99\x82\x37\x37\xde\xd0\x4a\x88\xe4\x7e\x54\xa5\x89\x0b"
|
||||
"\xb2\xc3\xd2\x47\xc7\xa4\x25\x4a\x8e\x61\x35\x07\x23\x59\x0a\x26"
|
||||
"\xc3\x62\x38\x12\x7d\x86\x61\xb8\x8c\xf8\x0e\xf8\x02\xd5\x7e\x2f"
|
||||
"\x7c\xeb\xcf\x1e\x00\xe0\x83\x84\x8b\xe1\x99\x29\xc6\x1b\x42\x37",
|
||||
.prk_size = 64,
|
||||
.okm = "\x83\x23\x90\x08\x6c\xda\x71\xfb\x47\x62\x5b\xb5\xce\xb1\x68\xe4"
|
||||
"\xc8\xe2\x6a\x1a\x16\xed\x34\xd9\xfc\x7f\xe9\x2c\x14\x81\x57\x93"
|
||||
"\x38\xda\x36\x2c\xb8\xd9\xf9\x25\xd7\xcb",
|
||||
.okm_size = 42,
|
||||
}, {
|
||||
.test = "hkdf test with long input",
|
||||
.ikm = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
|
||||
"\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
|
||||
"\x20\x21\x22\x23\x24\x25\x26\x27\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
|
||||
"\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f"
|
||||
"\x40\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f",
|
||||
.ikm_size = 80,
|
||||
.salt = "\x60\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f"
|
||||
"\x70\x71\x72\x73\x74\x75\x76\x77\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
|
||||
"\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
|
||||
"\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
|
||||
"\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf",
|
||||
.salt_size = 80,
|
||||
.info = "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
|
||||
"\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
|
||||
"\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
|
||||
"\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
|
||||
"\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff",
|
||||
.info_size = 80,
|
||||
.prk = "\x35\x67\x25\x42\x90\x7d\x4e\x14\x2c\x00\xe8\x44\x99\xe7\x4e\x1d"
|
||||
"\xe0\x8b\xe8\x65\x35\xf9\x24\xe0\x22\x80\x4a\xd7\x75\xdd\xe2\x7e"
|
||||
"\xc8\x6c\xd1\xe5\xb7\xd1\x78\xc7\x44\x89\xbd\xbe\xb3\x07\x12\xbe"
|
||||
"\xb8\x2d\x4f\x97\x41\x6c\x5a\x94\xea\x81\xeb\xdf\x3e\x62\x9e\x4a",
|
||||
.prk_size = 64,
|
||||
.okm = "\xce\x6c\x97\x19\x28\x05\xb3\x46\xe6\x16\x1e\x82\x1e\xd1\x65\x67"
|
||||
"\x3b\x84\xf4\x00\xa2\xb5\x14\xb2\xfe\x23\xd8\x4c\xd1\x89\xdd\xf1"
|
||||
"\xb6\x95\xb4\x8c\xbd\x1c\x83\x88\x44\x11\x37\xb3\xce\x28\xf1\x6a"
|
||||
"\xa6\x4b\xa3\x3b\xa4\x66\xb2\x4d\xf6\xcf\xcb\x02\x1e\xcf\xf2\x35"
|
||||
"\xf6\xa2\x05\x6c\xe3\xaf\x1d\xe4\x4d\x57\x20\x97\xa8\x50\x5d\x9e"
|
||||
"\x7a\x93",
|
||||
.okm_size = 82,
|
||||
}, {
|
||||
.test = "hkdf test with zero salt and info",
|
||||
.ikm = "\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b"
|
||||
"\x0b\x0b\x0b\x0b\x0b\x0b",
|
||||
.ikm_size = 22,
|
||||
.salt = NULL,
|
||||
.salt_size = 0,
|
||||
.info = NULL,
|
||||
.info_size = 0,
|
||||
.prk = "\xfd\x20\x0c\x49\x87\xac\x49\x13\x13\xbd\x4a\x2a\x13\x28\x71\x21"
|
||||
"\x24\x72\x39\xe1\x1c\x9e\xf8\x28\x02\x04\x4b\x66\xef\x35\x7e\x5b"
|
||||
"\x19\x44\x98\xd0\x68\x26\x11\x38\x23\x48\x57\x2a\x7b\x16\x11\xde"
|
||||
"\x54\x76\x40\x94\x28\x63\x20\x57\x8a\x86\x3f\x36\x56\x2b\x0d\xf6",
|
||||
.prk_size = 64,
|
||||
.okm = "\xf5\xfa\x02\xb1\x82\x98\xa7\x2a\x8c\x23\x89\x8a\x87\x03\x47\x2c"
|
||||
"\x6e\xb1\x79\xdc\x20\x4c\x03\x42\x5c\x97\x0e\x3b\x16\x4b\xf9\x0f"
|
||||
"\xff\x22\xd0\x48\x36\xd0\xe2\x34\x3b\xac",
|
||||
.okm_size = 42,
|
||||
}, {
|
||||
.test = "hkdf test with short input",
|
||||
.ikm = "\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b",
|
||||
.ikm_size = 11,
|
||||
.salt = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c",
|
||||
.salt_size = 13,
|
||||
.info = "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9",
|
||||
.info_size = 10,
|
||||
.prk = "\x67\x40\x9c\x9c\xac\x28\xb5\x2e\xe9\xfa\xd9\x1c\x2f\xda\x99\x9f"
|
||||
"\x7c\xa2\x2e\x34\x34\xf0\xae\x77\x28\x63\x83\x65\x68\xad\x6a\x7f"
|
||||
"\x10\xcf\x11\x3b\xfd\xdd\x56\x01\x29\xa5\x94\xa8\xf5\x23\x85\xc2"
|
||||
"\xd6\x61\xd7\x85\xd2\x9c\xe9\x3a\x11\x40\x0c\x92\x06\x83\x18\x1d",
|
||||
.prk_size = 64,
|
||||
.okm = "\x74\x13\xe8\x99\x7e\x02\x06\x10\xfb\xf6\x82\x3f\x2c\xe1\x4b\xff"
|
||||
"\x01\x87\x5d\xb1\xca\x55\xf6\x8c\xfc\xf3\x95\x4d\xc8\xaf\xf5\x35"
|
||||
"\x59\xbd\x5e\x30\x28\xb0\x80\xf7\xc0\x68",
|
||||
.okm_size = 42,
|
||||
}, {
|
||||
.test = "unsalted hkdf test with zero info",
|
||||
.ikm = "\x0c\x0c\x0c\x0c\x0c\x0c\x0c\x0c\x0c\x0c\x0c\x0c\x0c\x0c\x0c\x0c"
|
||||
"\x0c\x0c\x0c\x0c\x0c\x0c",
|
||||
.ikm_size = 22,
|
||||
.salt = "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
|
||||
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
|
||||
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
|
||||
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00",
|
||||
.salt_size = 64,
|
||||
.info = NULL,
|
||||
.info_size = 0,
|
||||
.prk = "\x53\x46\xb3\x76\xbf\x3a\xa9\xf8\x4f\x8f\x6e\xd5\xb1\xc4\xf4\x89"
|
||||
"\x17\x2e\x24\x4d\xac\x30\x3d\x12\xf6\x8e\xcc\x76\x6e\xa6\x00\xaa"
|
||||
"\x88\x49\x5e\x7f\xb6\x05\x80\x31\x22\xfa\x13\x69\x24\xa8\x40\xb1"
|
||||
"\xf0\x71\x9d\x2d\x5f\x68\xe2\x9b\x24\x22\x99\xd7\x58\xed\x68\x0c",
|
||||
.prk_size = 64,
|
||||
.okm = "\x14\x07\xd4\x60\x13\xd9\x8b\xc6\xde\xce\xfc\xfe\xe5\x5f\x0f\x90"
|
||||
"\xb0\xc7\xf6\x3d\x68\xeb\x1a\x80\xea\xf0\x7e\x95\x3c\xfc\x0a\x3a"
|
||||
"\x52\x40\xa1\x55\xd6\xe4\xda\xa9\x65\xbb",
|
||||
.okm_size = 42,
|
||||
}
|
||||
};
|
||||
|
||||
static int hkdf_test(const char *shash, const struct hkdf_testvec *tv)
|
||||
{ struct crypto_shash *tfm = NULL;
|
||||
u8 *prk = NULL, *okm = NULL;
|
||||
unsigned int prk_size;
|
||||
const char *driver;
|
||||
int err;
|
||||
|
||||
tfm = crypto_alloc_shash(shash, 0, 0);
|
||||
if (IS_ERR(tfm)) {
|
||||
pr_err("%s(%s): failed to allocate transform: %ld\n",
|
||||
tv->test, shash, PTR_ERR(tfm));
|
||||
return PTR_ERR(tfm);
|
||||
}
|
||||
driver = crypto_shash_driver_name(tfm);
|
||||
|
||||
prk_size = crypto_shash_digestsize(tfm);
|
||||
prk = kzalloc(prk_size, GFP_KERNEL);
|
||||
if (!prk) {
|
||||
err = -ENOMEM;
|
||||
goto out_free;
|
||||
}
|
||||
|
||||
if (tv->prk_size != prk_size) {
|
||||
pr_err("%s(%s): prk size mismatch (vec %u, digest %u\n",
|
||||
tv->test, driver, tv->prk_size, prk_size);
|
||||
err = -EINVAL;
|
||||
goto out_free;
|
||||
}
|
||||
|
||||
err = hkdf_extract(tfm, tv->ikm, tv->ikm_size,
|
||||
tv->salt, tv->salt_size, prk);
|
||||
if (err) {
|
||||
pr_err("%s(%s): hkdf_extract failed with %d\n",
|
||||
tv->test, driver, err);
|
||||
goto out_free;
|
||||
}
|
||||
|
||||
if (memcmp(prk, tv->prk, tv->prk_size)) {
|
||||
pr_err("%s(%s): hkdf_extract prk mismatch\n",
|
||||
tv->test, driver);
|
||||
print_hex_dump(KERN_ERR, "prk: ", DUMP_PREFIX_NONE,
|
||||
16, 1, prk, tv->prk_size, false);
|
||||
err = -EINVAL;
|
||||
goto out_free;
|
||||
}
|
||||
|
||||
okm = kzalloc(tv->okm_size, GFP_KERNEL);
|
||||
if (!okm) {
|
||||
err = -ENOMEM;
|
||||
goto out_free;
|
||||
}
|
||||
|
||||
err = crypto_shash_setkey(tfm, tv->prk, tv->prk_size);
|
||||
if (err) {
|
||||
pr_err("%s(%s): failed to set prk, error %d\n",
|
||||
tv->test, driver, err);
|
||||
goto out_free;
|
||||
}
|
||||
|
||||
err = hkdf_expand(tfm, tv->info, tv->info_size,
|
||||
okm, tv->okm_size);
|
||||
if (err) {
|
||||
pr_err("%s(%s): hkdf_expand() failed with %d\n",
|
||||
tv->test, driver, err);
|
||||
} else if (memcmp(okm, tv->okm, tv->okm_size)) {
|
||||
pr_err("%s(%s): hkdf_expand() okm mismatch\n",
|
||||
tv->test, driver);
|
||||
print_hex_dump(KERN_ERR, "okm: ", DUMP_PREFIX_NONE,
|
||||
16, 1, okm, tv->okm_size, false);
|
||||
err = -EINVAL;
|
||||
}
|
||||
out_free:
|
||||
kfree(okm);
|
||||
kfree(prk);
|
||||
crypto_free_shash(tfm);
|
||||
return err;
|
||||
}
|
||||
|
||||
static int __init crypto_hkdf_module_init(void)
|
||||
{
|
||||
int ret = 0, i;
|
||||
|
||||
if (IS_ENABLED(CONFIG_CRYPTO_MANAGER_DISABLE_TESTS))
|
||||
return 0;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(hkdf_sha256_tv); i++) {
|
||||
ret = hkdf_test("hmac(sha256)", &hkdf_sha256_tv[i]);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
for (i = 0; i < ARRAY_SIZE(hkdf_sha384_tv); i++) {
|
||||
ret = hkdf_test("hmac(sha384)", &hkdf_sha384_tv[i]);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
for (i = 0; i < ARRAY_SIZE(hkdf_sha512_tv); i++) {
|
||||
ret = hkdf_test("hmac(sha512)", &hkdf_sha512_tv[i]);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void __exit crypto_hkdf_module_exit(void) {}
|
||||
|
||||
module_init(crypto_hkdf_module_init);
|
||||
module_exit(crypto_hkdf_module_exit);
|
||||
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_DESCRIPTION("HMAC-based Key Derivation Function (HKDF)");
|
|
@ -45,8 +45,6 @@ enum {
|
|||
Lo_deleting,
|
||||
};
|
||||
|
||||
struct loop_func_table;
|
||||
|
||||
struct loop_device {
|
||||
int lo_number;
|
||||
loff_t lo_offset;
|
||||
|
@ -54,7 +52,8 @@ struct loop_device {
|
|||
int lo_flags;
|
||||
char lo_file_name[LO_NAME_SIZE];
|
||||
|
||||
struct file * lo_backing_file;
|
||||
struct file *lo_backing_file;
|
||||
unsigned int lo_min_dio_size;
|
||||
struct block_device *lo_device;
|
||||
|
||||
gfp_t old_gfp_mask;
|
||||
|
@ -169,29 +168,14 @@ static loff_t get_loop_size(struct loop_device *lo, struct file *file)
|
|||
* of backing device, and the logical block size of loop is bigger than that of
|
||||
* the backing device.
|
||||
*/
|
||||
static bool lo_bdev_can_use_dio(struct loop_device *lo,
|
||||
struct block_device *backing_bdev)
|
||||
{
|
||||
unsigned int sb_bsize = bdev_logical_block_size(backing_bdev);
|
||||
|
||||
if (queue_logical_block_size(lo->lo_queue) < sb_bsize)
|
||||
return false;
|
||||
if (lo->lo_offset & (sb_bsize - 1))
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool lo_can_use_dio(struct loop_device *lo)
|
||||
{
|
||||
struct inode *inode = lo->lo_backing_file->f_mapping->host;
|
||||
|
||||
if (!(lo->lo_backing_file->f_mode & FMODE_CAN_ODIRECT))
|
||||
return false;
|
||||
|
||||
if (S_ISBLK(inode->i_mode))
|
||||
return lo_bdev_can_use_dio(lo, I_BDEV(inode));
|
||||
if (inode->i_sb->s_bdev)
|
||||
return lo_bdev_can_use_dio(lo, inode->i_sb->s_bdev);
|
||||
if (queue_logical_block_size(lo->lo_queue) < lo->lo_min_dio_size)
|
||||
return false;
|
||||
if (lo->lo_offset & (lo->lo_min_dio_size - 1))
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -205,20 +189,12 @@ static bool lo_can_use_dio(struct loop_device *lo)
|
|||
*/
|
||||
static inline void loop_update_dio(struct loop_device *lo)
|
||||
{
|
||||
bool dio_in_use = lo->lo_flags & LO_FLAGS_DIRECT_IO;
|
||||
|
||||
lockdep_assert_held(&lo->lo_mutex);
|
||||
WARN_ON_ONCE(lo->lo_state == Lo_bound &&
|
||||
lo->lo_queue->mq_freeze_depth == 0);
|
||||
|
||||
if (lo->lo_backing_file->f_flags & O_DIRECT)
|
||||
lo->lo_flags |= LO_FLAGS_DIRECT_IO;
|
||||
if ((lo->lo_flags & LO_FLAGS_DIRECT_IO) && !lo_can_use_dio(lo))
|
||||
lo->lo_flags &= ~LO_FLAGS_DIRECT_IO;
|
||||
|
||||
/* flush dirty pages before starting to issue direct I/O */
|
||||
if ((lo->lo_flags & LO_FLAGS_DIRECT_IO) && !dio_in_use)
|
||||
vfs_fsync(lo->lo_backing_file, 0);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -541,6 +517,28 @@ static void loop_reread_partitions(struct loop_device *lo)
|
|||
__func__, lo->lo_number, lo->lo_file_name, rc);
|
||||
}
|
||||
|
||||
static unsigned int loop_query_min_dio_size(struct loop_device *lo)
|
||||
{
|
||||
struct file *file = lo->lo_backing_file;
|
||||
struct block_device *sb_bdev = file->f_mapping->host->i_sb->s_bdev;
|
||||
struct kstat st;
|
||||
|
||||
/*
|
||||
* Use the minimal dio alignment of the file system if provided.
|
||||
*/
|
||||
if (!vfs_getattr(&file->f_path, &st, STATX_DIOALIGN, 0) &&
|
||||
(st.result_mask & STATX_DIOALIGN))
|
||||
return st.dio_offset_align;
|
||||
|
||||
/*
|
||||
* In a perfect world this wouldn't be needed, but as of Linux 6.13 only
|
||||
* a handful of file systems support the STATX_DIOALIGN flag.
|
||||
*/
|
||||
if (sb_bdev)
|
||||
return bdev_logical_block_size(sb_bdev);
|
||||
return SECTOR_SIZE;
|
||||
}
|
||||
|
||||
static inline int is_loop_device(struct file *file)
|
||||
{
|
||||
struct inode *i = file->f_mapping->host;
|
||||
|
@ -573,6 +571,17 @@ static int loop_validate_file(struct file *file, struct block_device *bdev)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static void loop_assign_backing_file(struct loop_device *lo, struct file *file)
|
||||
{
|
||||
lo->lo_backing_file = file;
|
||||
lo->old_gfp_mask = mapping_gfp_mask(file->f_mapping);
|
||||
mapping_set_gfp_mask(file->f_mapping,
|
||||
lo->old_gfp_mask & ~(__GFP_IO | __GFP_FS));
|
||||
if (lo->lo_backing_file->f_flags & O_DIRECT)
|
||||
lo->lo_flags |= LO_FLAGS_DIRECT_IO;
|
||||
lo->lo_min_dio_size = loop_query_min_dio_size(lo);
|
||||
}
|
||||
|
||||
/*
|
||||
* loop_change_fd switched the backing store of a loopback device to
|
||||
* a new file. This is useful for operating system installers to free up
|
||||
|
@ -622,14 +631,18 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
|
|||
if (get_loop_size(lo, file) != get_loop_size(lo, old_file))
|
||||
goto out_err;
|
||||
|
||||
/*
|
||||
* We might switch to direct I/O mode for the loop device, write back
|
||||
* all dirty data the page cache now that so that the individual I/O
|
||||
* operations don't have to do that.
|
||||
*/
|
||||
vfs_fsync(file, 0);
|
||||
|
||||
/* and ... switch */
|
||||
disk_force_media_change(lo->lo_disk);
|
||||
memflags = blk_mq_freeze_queue(lo->lo_queue);
|
||||
mapping_set_gfp_mask(old_file->f_mapping, lo->old_gfp_mask);
|
||||
lo->lo_backing_file = file;
|
||||
lo->old_gfp_mask = mapping_gfp_mask(file->f_mapping);
|
||||
mapping_set_gfp_mask(file->f_mapping,
|
||||
lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS));
|
||||
loop_assign_backing_file(lo, file);
|
||||
loop_update_dio(lo);
|
||||
blk_mq_unfreeze_queue(lo->lo_queue, memflags);
|
||||
partscan = lo->lo_flags & LO_FLAGS_PARTSCAN;
|
||||
|
@ -971,12 +984,11 @@ loop_set_status_from_info(struct loop_device *lo,
|
|||
return 0;
|
||||
}
|
||||
|
||||
static unsigned int loop_default_blocksize(struct loop_device *lo,
|
||||
struct block_device *backing_bdev)
|
||||
static unsigned int loop_default_blocksize(struct loop_device *lo)
|
||||
{
|
||||
/* In case of direct I/O, match underlying block size */
|
||||
if ((lo->lo_backing_file->f_flags & O_DIRECT) && backing_bdev)
|
||||
return bdev_logical_block_size(backing_bdev);
|
||||
/* In case of direct I/O, match underlying minimum I/O size */
|
||||
if (lo->lo_flags & LO_FLAGS_DIRECT_IO)
|
||||
return lo->lo_min_dio_size;
|
||||
return SECTOR_SIZE;
|
||||
}
|
||||
|
||||
|
@ -994,7 +1006,7 @@ static void loop_update_limits(struct loop_device *lo, struct queue_limits *lim,
|
|||
backing_bdev = inode->i_sb->s_bdev;
|
||||
|
||||
if (!bsize)
|
||||
bsize = loop_default_blocksize(lo, backing_bdev);
|
||||
bsize = loop_default_blocksize(lo);
|
||||
|
||||
loop_get_discard_config(lo, &granularity, &max_discard_sectors);
|
||||
|
||||
|
@ -1019,7 +1031,6 @@ static int loop_configure(struct loop_device *lo, blk_mode_t mode,
|
|||
const struct loop_config *config)
|
||||
{
|
||||
struct file *file = fget(config->fd);
|
||||
struct address_space *mapping;
|
||||
struct queue_limits lim;
|
||||
int error;
|
||||
loff_t size;
|
||||
|
@ -1055,8 +1066,6 @@ static int loop_configure(struct loop_device *lo, blk_mode_t mode,
|
|||
if (error)
|
||||
goto out_unlock;
|
||||
|
||||
mapping = file->f_mapping;
|
||||
|
||||
if ((config->info.lo_flags & ~LOOP_CONFIGURE_SETTABLE_FLAGS) != 0) {
|
||||
error = -EINVAL;
|
||||
goto out_unlock;
|
||||
|
@ -1088,9 +1097,7 @@ static int loop_configure(struct loop_device *lo, blk_mode_t mode,
|
|||
set_disk_ro(lo->lo_disk, (lo->lo_flags & LO_FLAGS_READ_ONLY) != 0);
|
||||
|
||||
lo->lo_device = bdev;
|
||||
lo->lo_backing_file = file;
|
||||
lo->old_gfp_mask = mapping_gfp_mask(mapping);
|
||||
mapping_set_gfp_mask(mapping, lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS));
|
||||
loop_assign_backing_file(lo, file);
|
||||
|
||||
lim = queue_limits_start_update(lo->lo_queue);
|
||||
loop_update_limits(lo, &lim, config->block_size);
|
||||
|
@ -1099,6 +1106,13 @@ static int loop_configure(struct loop_device *lo, blk_mode_t mode,
|
|||
if (error)
|
||||
goto out_unlock;
|
||||
|
||||
/*
|
||||
* We might switch to direct I/O mode for the loop device, write back
|
||||
* all dirty data the page cache now that so that the individual I/O
|
||||
* operations don't have to do that.
|
||||
*/
|
||||
vfs_fsync(file, 0);
|
||||
|
||||
loop_update_dio(lo);
|
||||
loop_sysfs_init(lo);
|
||||
|
||||
|
|
|
@ -2056,7 +2056,7 @@ static void mtip_hw_submit_io(struct driver_data *dd, struct request *rq,
|
|||
unsigned int nents;
|
||||
|
||||
/* Map the scatter list for DMA access */
|
||||
nents = blk_rq_map_sg(hctx->queue, rq, command->sg);
|
||||
nents = blk_rq_map_sg(rq, command->sg);
|
||||
nents = dma_map_sg(&dd->pdev->dev, command->sg, nents, dma_dir);
|
||||
|
||||
prefetch(&port->flags);
|
||||
|
|
|
@ -473,6 +473,8 @@ NULLB_DEVICE_ATTR(shared_tags, bool, NULL);
|
|||
NULLB_DEVICE_ATTR(shared_tag_bitmap, bool, NULL);
|
||||
NULLB_DEVICE_ATTR(fua, bool, NULL);
|
||||
NULLB_DEVICE_ATTR(rotational, bool, NULL);
|
||||
NULLB_DEVICE_ATTR(badblocks_once, bool, NULL);
|
||||
NULLB_DEVICE_ATTR(badblocks_partial_io, bool, NULL);
|
||||
|
||||
static ssize_t nullb_device_power_show(struct config_item *item, char *page)
|
||||
{
|
||||
|
@ -559,14 +561,14 @@ static ssize_t nullb_device_badblocks_store(struct config_item *item,
|
|||
goto out;
|
||||
/* enable badblocks */
|
||||
cmpxchg(&t_dev->badblocks.shift, -1, 0);
|
||||
if (buf[0] == '+')
|
||||
ret = badblocks_set(&t_dev->badblocks, start,
|
||||
end - start + 1, 1);
|
||||
else
|
||||
ret = badblocks_clear(&t_dev->badblocks, start,
|
||||
end - start + 1);
|
||||
if (ret == 0)
|
||||
if (buf[0] == '+') {
|
||||
if (badblocks_set(&t_dev->badblocks, start,
|
||||
end - start + 1, 1))
|
||||
ret = count;
|
||||
} else if (badblocks_clear(&t_dev->badblocks, start,
|
||||
end - start + 1)) {
|
||||
ret = count;
|
||||
}
|
||||
out:
|
||||
kfree(orig);
|
||||
return ret;
|
||||
|
@ -592,41 +594,43 @@ static ssize_t nullb_device_zone_offline_store(struct config_item *item,
|
|||
CONFIGFS_ATTR_WO(nullb_device_, zone_offline);
|
||||
|
||||
static struct configfs_attribute *nullb_device_attrs[] = {
|
||||
&nullb_device_attr_size,
|
||||
&nullb_device_attr_completion_nsec,
|
||||
&nullb_device_attr_submit_queues,
|
||||
&nullb_device_attr_poll_queues,
|
||||
&nullb_device_attr_home_node,
|
||||
&nullb_device_attr_queue_mode,
|
||||
&nullb_device_attr_badblocks,
|
||||
&nullb_device_attr_badblocks_once,
|
||||
&nullb_device_attr_badblocks_partial_io,
|
||||
&nullb_device_attr_blocking,
|
||||
&nullb_device_attr_blocksize,
|
||||
&nullb_device_attr_max_sectors,
|
||||
&nullb_device_attr_irqmode,
|
||||
&nullb_device_attr_cache_size,
|
||||
&nullb_device_attr_completion_nsec,
|
||||
&nullb_device_attr_discard,
|
||||
&nullb_device_attr_fua,
|
||||
&nullb_device_attr_home_node,
|
||||
&nullb_device_attr_hw_queue_depth,
|
||||
&nullb_device_attr_index,
|
||||
&nullb_device_attr_blocking,
|
||||
&nullb_device_attr_use_per_node_hctx,
|
||||
&nullb_device_attr_power,
|
||||
&nullb_device_attr_memory_backed,
|
||||
&nullb_device_attr_discard,
|
||||
&nullb_device_attr_irqmode,
|
||||
&nullb_device_attr_max_sectors,
|
||||
&nullb_device_attr_mbps,
|
||||
&nullb_device_attr_cache_size,
|
||||
&nullb_device_attr_badblocks,
|
||||
&nullb_device_attr_zoned,
|
||||
&nullb_device_attr_zone_size,
|
||||
&nullb_device_attr_zone_capacity,
|
||||
&nullb_device_attr_zone_nr_conv,
|
||||
&nullb_device_attr_zone_max_open,
|
||||
&nullb_device_attr_zone_max_active,
|
||||
&nullb_device_attr_zone_append_max_sectors,
|
||||
&nullb_device_attr_zone_readonly,
|
||||
&nullb_device_attr_zone_offline,
|
||||
&nullb_device_attr_zone_full,
|
||||
&nullb_device_attr_virt_boundary,
|
||||
&nullb_device_attr_memory_backed,
|
||||
&nullb_device_attr_no_sched,
|
||||
&nullb_device_attr_shared_tags,
|
||||
&nullb_device_attr_shared_tag_bitmap,
|
||||
&nullb_device_attr_fua,
|
||||
&nullb_device_attr_poll_queues,
|
||||
&nullb_device_attr_power,
|
||||
&nullb_device_attr_queue_mode,
|
||||
&nullb_device_attr_rotational,
|
||||
&nullb_device_attr_shared_tag_bitmap,
|
||||
&nullb_device_attr_shared_tags,
|
||||
&nullb_device_attr_size,
|
||||
&nullb_device_attr_submit_queues,
|
||||
&nullb_device_attr_use_per_node_hctx,
|
||||
&nullb_device_attr_virt_boundary,
|
||||
&nullb_device_attr_zone_append_max_sectors,
|
||||
&nullb_device_attr_zone_capacity,
|
||||
&nullb_device_attr_zone_full,
|
||||
&nullb_device_attr_zone_max_active,
|
||||
&nullb_device_attr_zone_max_open,
|
||||
&nullb_device_attr_zone_nr_conv,
|
||||
&nullb_device_attr_zone_offline,
|
||||
&nullb_device_attr_zone_readonly,
|
||||
&nullb_device_attr_zone_size,
|
||||
&nullb_device_attr_zoned,
|
||||
NULL,
|
||||
};
|
||||
|
||||
|
@ -704,16 +708,28 @@ nullb_group_drop_item(struct config_group *group, struct config_item *item)
|
|||
|
||||
static ssize_t memb_group_features_show(struct config_item *item, char *page)
|
||||
{
|
||||
return snprintf(page, PAGE_SIZE,
|
||||
"badblocks,blocking,blocksize,cache_size,fua,"
|
||||
"completion_nsec,discard,home_node,hw_queue_depth,"
|
||||
"irqmode,max_sectors,mbps,memory_backed,no_sched,"
|
||||
"poll_queues,power,queue_mode,shared_tag_bitmap,"
|
||||
"shared_tags,size,submit_queues,use_per_node_hctx,"
|
||||
"virt_boundary,zoned,zone_capacity,zone_max_active,"
|
||||
"zone_max_open,zone_nr_conv,zone_offline,zone_readonly,"
|
||||
"zone_size,zone_append_max_sectors,zone_full,"
|
||||
"rotational\n");
|
||||
|
||||
struct configfs_attribute **entry;
|
||||
char delimiter = ',';
|
||||
size_t left = PAGE_SIZE;
|
||||
size_t written = 0;
|
||||
int ret;
|
||||
|
||||
for (entry = &nullb_device_attrs[0]; *entry && left > 0; entry++) {
|
||||
if (!*(entry + 1))
|
||||
delimiter = '\n';
|
||||
ret = snprintf(page + written, left, "%s%c", (*entry)->ca_name,
|
||||
delimiter);
|
||||
if (ret >= left) {
|
||||
WARN_ONCE(1, "Too many null_blk features to print\n");
|
||||
memzero_explicit(page, PAGE_SIZE);
|
||||
return -ENOBUFS;
|
||||
}
|
||||
left -= ret;
|
||||
written += ret;
|
||||
}
|
||||
|
||||
return written;
|
||||
}
|
||||
|
||||
CONFIGFS_ATTR_RO(memb_group_, features);
|
||||
|
@ -1249,25 +1265,37 @@ static int null_transfer(struct nullb *nullb, struct page *page,
|
|||
return err;
|
||||
}
|
||||
|
||||
static blk_status_t null_handle_rq(struct nullb_cmd *cmd)
|
||||
/*
|
||||
* Transfer data for the given request. The transfer size is capped with the
|
||||
* nr_sectors argument.
|
||||
*/
|
||||
static blk_status_t null_handle_data_transfer(struct nullb_cmd *cmd,
|
||||
sector_t nr_sectors)
|
||||
{
|
||||
struct request *rq = blk_mq_rq_from_pdu(cmd);
|
||||
struct nullb *nullb = cmd->nq->dev->nullb;
|
||||
int err = 0;
|
||||
unsigned int len;
|
||||
sector_t sector = blk_rq_pos(rq);
|
||||
unsigned int max_bytes = nr_sectors << SECTOR_SHIFT;
|
||||
unsigned int transferred_bytes = 0;
|
||||
struct req_iterator iter;
|
||||
struct bio_vec bvec;
|
||||
|
||||
spin_lock_irq(&nullb->lock);
|
||||
rq_for_each_segment(bvec, rq, iter) {
|
||||
len = bvec.bv_len;
|
||||
if (transferred_bytes + len > max_bytes)
|
||||
len = max_bytes - transferred_bytes;
|
||||
err = null_transfer(nullb, bvec.bv_page, len, bvec.bv_offset,
|
||||
op_is_write(req_op(rq)), sector,
|
||||
rq->cmd_flags & REQ_FUA);
|
||||
if (err)
|
||||
break;
|
||||
sector += len >> SECTOR_SHIFT;
|
||||
transferred_bytes += len;
|
||||
if (transferred_bytes >= max_bytes)
|
||||
break;
|
||||
}
|
||||
spin_unlock_irq(&nullb->lock);
|
||||
|
||||
|
@ -1295,31 +1323,51 @@ static inline blk_status_t null_handle_throttled(struct nullb_cmd *cmd)
|
|||
return sts;
|
||||
}
|
||||
|
||||
static inline blk_status_t null_handle_badblocks(struct nullb_cmd *cmd,
|
||||
sector_t sector,
|
||||
sector_t nr_sectors)
|
||||
/*
|
||||
* Check if the command should fail for the badblocks. If so, return
|
||||
* BLK_STS_IOERR and return number of partial I/O sectors to be written or read,
|
||||
* which may be less than the requested number of sectors.
|
||||
*
|
||||
* @cmd: The command to handle.
|
||||
* @sector: The start sector for I/O.
|
||||
* @nr_sectors: Specifies number of sectors to write or read, and returns the
|
||||
* number of sectors to be written or read.
|
||||
*/
|
||||
blk_status_t null_handle_badblocks(struct nullb_cmd *cmd, sector_t sector,
|
||||
unsigned int *nr_sectors)
|
||||
{
|
||||
struct badblocks *bb = &cmd->nq->dev->badblocks;
|
||||
sector_t first_bad;
|
||||
int bad_sectors;
|
||||
|
||||
if (badblocks_check(bb, sector, nr_sectors, &first_bad, &bad_sectors))
|
||||
return BLK_STS_IOERR;
|
||||
struct nullb_device *dev = cmd->nq->dev;
|
||||
unsigned int block_sectors = dev->blocksize >> SECTOR_SHIFT;
|
||||
sector_t first_bad, bad_sectors;
|
||||
unsigned int partial_io_sectors = 0;
|
||||
|
||||
if (!badblocks_check(bb, sector, *nr_sectors, &first_bad, &bad_sectors))
|
||||
return BLK_STS_OK;
|
||||
|
||||
if (cmd->nq->dev->badblocks_once)
|
||||
badblocks_clear(bb, first_bad, bad_sectors);
|
||||
|
||||
if (cmd->nq->dev->badblocks_partial_io) {
|
||||
if (!IS_ALIGNED(first_bad, block_sectors))
|
||||
first_bad = ALIGN_DOWN(first_bad, block_sectors);
|
||||
if (sector < first_bad)
|
||||
partial_io_sectors = first_bad - sector;
|
||||
}
|
||||
*nr_sectors = partial_io_sectors;
|
||||
|
||||
return BLK_STS_IOERR;
|
||||
}
|
||||
|
||||
static inline blk_status_t null_handle_memory_backed(struct nullb_cmd *cmd,
|
||||
enum req_op op,
|
||||
sector_t sector,
|
||||
sector_t nr_sectors)
|
||||
blk_status_t null_handle_memory_backed(struct nullb_cmd *cmd, enum req_op op,
|
||||
sector_t sector, sector_t nr_sectors)
|
||||
{
|
||||
struct nullb_device *dev = cmd->nq->dev;
|
||||
|
||||
if (op == REQ_OP_DISCARD)
|
||||
return null_handle_discard(dev, sector, nr_sectors);
|
||||
|
||||
return null_handle_rq(cmd);
|
||||
return null_handle_data_transfer(cmd, nr_sectors);
|
||||
}
|
||||
|
||||
static void nullb_zero_read_cmd_buffer(struct nullb_cmd *cmd)
|
||||
|
@ -1366,18 +1414,19 @@ blk_status_t null_process_cmd(struct nullb_cmd *cmd, enum req_op op,
|
|||
sector_t sector, unsigned int nr_sectors)
|
||||
{
|
||||
struct nullb_device *dev = cmd->nq->dev;
|
||||
blk_status_t badblocks_ret = BLK_STS_OK;
|
||||
blk_status_t ret;
|
||||
|
||||
if (dev->badblocks.shift != -1) {
|
||||
ret = null_handle_badblocks(cmd, sector, nr_sectors);
|
||||
if (dev->badblocks.shift != -1)
|
||||
badblocks_ret = null_handle_badblocks(cmd, sector, &nr_sectors);
|
||||
|
||||
if (dev->memory_backed && nr_sectors) {
|
||||
ret = null_handle_memory_backed(cmd, op, sector, nr_sectors);
|
||||
if (ret != BLK_STS_OK)
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (dev->memory_backed)
|
||||
return null_handle_memory_backed(cmd, op, sector, nr_sectors);
|
||||
|
||||
return BLK_STS_OK;
|
||||
return badblocks_ret;
|
||||
}
|
||||
|
||||
static void null_handle_cmd(struct nullb_cmd *cmd, sector_t sector,
|
||||
|
|
|
@ -63,6 +63,8 @@ struct nullb_device {
|
|||
unsigned long flags; /* device flags */
|
||||
unsigned int curr_cache;
|
||||
struct badblocks badblocks;
|
||||
bool badblocks_once;
|
||||
bool badblocks_partial_io;
|
||||
|
||||
unsigned int nr_zones;
|
||||
unsigned int nr_zones_imp_open;
|
||||
|
@ -131,6 +133,10 @@ blk_status_t null_handle_discard(struct nullb_device *dev, sector_t sector,
|
|||
sector_t nr_sectors);
|
||||
blk_status_t null_process_cmd(struct nullb_cmd *cmd, enum req_op op,
|
||||
sector_t sector, unsigned int nr_sectors);
|
||||
blk_status_t null_handle_badblocks(struct nullb_cmd *cmd, sector_t sector,
|
||||
unsigned int *nr_sectors);
|
||||
blk_status_t null_handle_memory_backed(struct nullb_cmd *cmd, enum req_op op,
|
||||
sector_t sector, sector_t nr_sectors);
|
||||
|
||||
#ifdef CONFIG_BLK_DEV_ZONED
|
||||
int null_init_zoned_dev(struct nullb_device *dev, struct queue_limits *lim);
|
||||
|
|
|
@ -353,6 +353,7 @@ static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector,
|
|||
struct nullb_device *dev = cmd->nq->dev;
|
||||
unsigned int zno = null_zone_no(dev, sector);
|
||||
struct nullb_zone *zone = &dev->zones[zno];
|
||||
blk_status_t badblocks_ret = BLK_STS_OK;
|
||||
blk_status_t ret;
|
||||
|
||||
trace_nullb_zone_op(cmd, zno, zone->cond);
|
||||
|
@ -412,9 +413,20 @@ static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector,
|
|||
zone->cond = BLK_ZONE_COND_IMP_OPEN;
|
||||
}
|
||||
|
||||
ret = null_process_cmd(cmd, REQ_OP_WRITE, sector, nr_sectors);
|
||||
if (dev->badblocks.shift != -1) {
|
||||
badblocks_ret = null_handle_badblocks(cmd, sector, &nr_sectors);
|
||||
if (badblocks_ret != BLK_STS_OK && !nr_sectors) {
|
||||
ret = badblocks_ret;
|
||||
goto unlock_zone;
|
||||
}
|
||||
}
|
||||
|
||||
if (dev->memory_backed) {
|
||||
ret = null_handle_memory_backed(cmd, REQ_OP_WRITE, sector,
|
||||
nr_sectors);
|
||||
if (ret != BLK_STS_OK)
|
||||
goto unlock_zone;
|
||||
}
|
||||
|
||||
zone->wp += nr_sectors;
|
||||
if (zone->wp == zone->start + zone->capacity) {
|
||||
|
@ -429,7 +441,7 @@ static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector,
|
|||
zone->cond = BLK_ZONE_COND_FULL;
|
||||
}
|
||||
|
||||
ret = BLK_STS_OK;
|
||||
ret = badblocks_ret;
|
||||
|
||||
unlock_zone:
|
||||
null_unlock_zone(dev, zone);
|
||||
|
|
|
@ -1010,7 +1010,7 @@ static int rnbd_client_xfer_request(struct rnbd_clt_dev *dev,
|
|||
* See queue limits.
|
||||
*/
|
||||
if ((req_op(rq) != REQ_OP_DISCARD) && (req_op(rq) != REQ_OP_WRITE_ZEROES))
|
||||
sg_cnt = blk_rq_map_sg(dev->queue, rq, iu->sgt.sgl);
|
||||
sg_cnt = blk_rq_map_sg(rq, iu->sgt.sgl);
|
||||
|
||||
if (sg_cnt == 0)
|
||||
sg_mark_end(&iu->sgt.sgl[0]);
|
||||
|
|
|
@ -485,7 +485,7 @@ static int __send_request(struct request *req)
|
|||
}
|
||||
|
||||
sg_init_table(sg, port->ring_cookies);
|
||||
nsg = blk_rq_map_sg(req->q, req, sg);
|
||||
nsg = blk_rq_map_sg(req, sg);
|
||||
|
||||
len = 0;
|
||||
for (i = 0; i < nsg; i++)
|
||||
|
|
|
@ -73,11 +73,10 @@
|
|||
/* All UBLK_PARAM_TYPE_* should be included here */
|
||||
#define UBLK_PARAM_TYPE_ALL \
|
||||
(UBLK_PARAM_TYPE_BASIC | UBLK_PARAM_TYPE_DISCARD | \
|
||||
UBLK_PARAM_TYPE_DEVT | UBLK_PARAM_TYPE_ZONED)
|
||||
UBLK_PARAM_TYPE_DEVT | UBLK_PARAM_TYPE_ZONED | \
|
||||
UBLK_PARAM_TYPE_DMA_ALIGN)
|
||||
|
||||
struct ublk_rq_data {
|
||||
struct llist_node node;
|
||||
|
||||
struct kref ref;
|
||||
};
|
||||
|
||||
|
@ -144,8 +143,6 @@ struct ublk_queue {
|
|||
struct task_struct *ubq_daemon;
|
||||
char *io_cmd_buf;
|
||||
|
||||
struct llist_head io_cmds;
|
||||
|
||||
unsigned long io_addr; /* mapped vm address */
|
||||
unsigned int max_io_sz;
|
||||
bool force_abort;
|
||||
|
@ -494,15 +491,17 @@ static wait_queue_head_t ublk_idr_wq; /* wait until one idr is freed */
|
|||
|
||||
static DEFINE_MUTEX(ublk_ctl_mutex);
|
||||
|
||||
|
||||
#define UBLK_MAX_UBLKS UBLK_MINORS
|
||||
|
||||
/*
|
||||
* Max ublk devices allowed to add
|
||||
* Max unprivileged ublk devices allowed to add
|
||||
*
|
||||
* It can be extended to one per-user limit in future or even controlled
|
||||
* by cgroup.
|
||||
*/
|
||||
#define UBLK_MAX_UBLKS UBLK_MINORS
|
||||
static unsigned int ublks_max = 64;
|
||||
static unsigned int ublks_added; /* protected by ublk_ctl_mutex */
|
||||
static unsigned int unprivileged_ublks_max = 64;
|
||||
static unsigned int unprivileged_ublks_added; /* protected by ublk_ctl_mutex */
|
||||
|
||||
static struct miscdevice ublk_misc;
|
||||
|
||||
|
@ -573,6 +572,16 @@ static int ublk_validate_params(const struct ublk_device *ub)
|
|||
else if (ublk_dev_is_zoned(ub))
|
||||
return -EINVAL;
|
||||
|
||||
if (ub->params.types & UBLK_PARAM_TYPE_DMA_ALIGN) {
|
||||
const struct ublk_param_dma_align *p = &ub->params.dma;
|
||||
|
||||
if (p->alignment >= PAGE_SIZE)
|
||||
return -EINVAL;
|
||||
|
||||
if (!is_power_of_2(p->alignment + 1))
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -1100,7 +1109,7 @@ static void ublk_complete_rq(struct kref *ref)
|
|||
}
|
||||
|
||||
/*
|
||||
* Since __ublk_rq_task_work always fails requests immediately during
|
||||
* Since ublk_rq_task_work_cb always fails requests immediately during
|
||||
* exiting, __ublk_fail_req() is only called from abort context during
|
||||
* exiting. So lock is unnecessary.
|
||||
*
|
||||
|
@ -1146,11 +1155,14 @@ static inline void __ublk_abort_rq(struct ublk_queue *ubq,
|
|||
blk_mq_end_request(rq, BLK_STS_IOERR);
|
||||
}
|
||||
|
||||
static inline void __ublk_rq_task_work(struct request *req,
|
||||
unsigned issue_flags)
|
||||
static void ublk_rq_task_work_cb(struct io_uring_cmd *cmd,
|
||||
unsigned int issue_flags)
|
||||
{
|
||||
struct ublk_queue *ubq = req->mq_hctx->driver_data;
|
||||
int tag = req->tag;
|
||||
struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd);
|
||||
struct ublk_queue *ubq = pdu->ubq;
|
||||
int tag = pdu->tag;
|
||||
struct request *req = blk_mq_tag_to_rq(
|
||||
ubq->dev->tag_set.tags[ubq->q_id], tag);
|
||||
struct ublk_io *io = &ubq->ios[tag];
|
||||
unsigned int mapped_bytes;
|
||||
|
||||
|
@ -1225,34 +1237,11 @@ static inline void __ublk_rq_task_work(struct request *req,
|
|||
ubq_complete_io_cmd(io, UBLK_IO_RES_OK, issue_flags);
|
||||
}
|
||||
|
||||
static inline void ublk_forward_io_cmds(struct ublk_queue *ubq,
|
||||
unsigned issue_flags)
|
||||
{
|
||||
struct llist_node *io_cmds = llist_del_all(&ubq->io_cmds);
|
||||
struct ublk_rq_data *data, *tmp;
|
||||
|
||||
io_cmds = llist_reverse_order(io_cmds);
|
||||
llist_for_each_entry_safe(data, tmp, io_cmds, node)
|
||||
__ublk_rq_task_work(blk_mq_rq_from_pdu(data), issue_flags);
|
||||
}
|
||||
|
||||
static void ublk_rq_task_work_cb(struct io_uring_cmd *cmd, unsigned issue_flags)
|
||||
{
|
||||
struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd);
|
||||
struct ublk_queue *ubq = pdu->ubq;
|
||||
|
||||
ublk_forward_io_cmds(ubq, issue_flags);
|
||||
}
|
||||
|
||||
static void ublk_queue_cmd(struct ublk_queue *ubq, struct request *rq)
|
||||
{
|
||||
struct ublk_rq_data *data = blk_mq_rq_to_pdu(rq);
|
||||
|
||||
if (llist_add(&data->node, &ubq->io_cmds)) {
|
||||
struct ublk_io *io = &ubq->ios[rq->tag];
|
||||
|
||||
io_uring_cmd_complete_in_task(io->cmd, ublk_rq_task_work_cb);
|
||||
}
|
||||
}
|
||||
|
||||
static enum blk_eh_timer_return ublk_timeout(struct request *rq)
|
||||
|
@ -1445,7 +1434,7 @@ static void ublk_abort_queue(struct ublk_device *ub, struct ublk_queue *ubq)
|
|||
struct request *rq;
|
||||
|
||||
/*
|
||||
* Either we fail the request or ublk_rq_task_work_fn
|
||||
* Either we fail the request or ublk_rq_task_work_cb
|
||||
* will do it
|
||||
*/
|
||||
rq = blk_mq_tag_to_rq(ub->tag_set.tags[ubq->q_id], i);
|
||||
|
@ -1911,10 +1900,9 @@ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd,
|
|||
return -EIOCBQUEUED;
|
||||
|
||||
out:
|
||||
io_uring_cmd_done(cmd, ret, 0, issue_flags);
|
||||
pr_devel("%s: complete: cmd op %d, tag %d ret %x io_flags %x\n",
|
||||
__func__, cmd_op, tag, ret, io->flags);
|
||||
return -EIOCBQUEUED;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline struct request *__ublk_check_and_get_req(struct ublk_device *ub,
|
||||
|
@ -1970,7 +1958,10 @@ static inline int ublk_ch_uring_cmd_local(struct io_uring_cmd *cmd,
|
|||
static void ublk_ch_uring_cmd_cb(struct io_uring_cmd *cmd,
|
||||
unsigned int issue_flags)
|
||||
{
|
||||
ublk_ch_uring_cmd_local(cmd, issue_flags);
|
||||
int ret = ublk_ch_uring_cmd_local(cmd, issue_flags);
|
||||
|
||||
if (ret != -EIOCBQUEUED)
|
||||
io_uring_cmd_done(cmd, ret, 0, issue_flags);
|
||||
}
|
||||
|
||||
static int ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags)
|
||||
|
@ -2235,7 +2226,8 @@ static int ublk_add_chdev(struct ublk_device *ub)
|
|||
if (ret)
|
||||
goto fail;
|
||||
|
||||
ublks_added++;
|
||||
if (ub->dev_info.flags & UBLK_F_UNPRIVILEGED_DEV)
|
||||
unprivileged_ublks_added++;
|
||||
return 0;
|
||||
fail:
|
||||
put_device(dev);
|
||||
|
@ -2264,11 +2256,16 @@ static int ublk_add_tag_set(struct ublk_device *ub)
|
|||
|
||||
static void ublk_remove(struct ublk_device *ub)
|
||||
{
|
||||
bool unprivileged;
|
||||
|
||||
ublk_stop_dev(ub);
|
||||
cancel_work_sync(&ub->nosrv_work);
|
||||
cdev_device_del(&ub->cdev, &ub->cdev_dev);
|
||||
unprivileged = ub->dev_info.flags & UBLK_F_UNPRIVILEGED_DEV;
|
||||
ublk_put_device(ub);
|
||||
ublks_added--;
|
||||
|
||||
if (unprivileged)
|
||||
unprivileged_ublks_added--;
|
||||
}
|
||||
|
||||
static struct ublk_device *ublk_get_device_from_id(int idx)
|
||||
|
@ -2343,6 +2340,9 @@ static int ublk_ctrl_start_dev(struct ublk_device *ub, struct io_uring_cmd *cmd)
|
|||
if (ub->params.basic.attrs & UBLK_ATTR_ROTATIONAL)
|
||||
lim.features |= BLK_FEAT_ROTATIONAL;
|
||||
|
||||
if (ub->params.types & UBLK_PARAM_TYPE_DMA_ALIGN)
|
||||
lim.dma_alignment = ub->params.dma.alignment;
|
||||
|
||||
if (wait_for_completion_interruptible(&ub->completion) != 0)
|
||||
return -EINTR;
|
||||
|
||||
|
@ -2530,7 +2530,8 @@ static int ublk_ctrl_add_dev(struct io_uring_cmd *cmd)
|
|||
return ret;
|
||||
|
||||
ret = -EACCES;
|
||||
if (ublks_added >= ublks_max)
|
||||
if ((info.flags & UBLK_F_UNPRIVILEGED_DEV) &&
|
||||
unprivileged_ublks_added >= unprivileged_ublks_max)
|
||||
goto out_unlock;
|
||||
|
||||
ret = -ENOMEM;
|
||||
|
@ -3101,10 +3102,9 @@ static int ublk_ctrl_uring_cmd(struct io_uring_cmd *cmd,
|
|||
if (ub)
|
||||
ublk_put_device(ub);
|
||||
out:
|
||||
io_uring_cmd_done(cmd, ret, 0, issue_flags);
|
||||
pr_devel("%s: cmd done ret %d cmd_op %x, dev id %d qid %d\n",
|
||||
__func__, ret, cmd->cmd_op, header->dev_id, header->queue_id);
|
||||
return -EIOCBQUEUED;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static const struct file_operations ublk_ctl_fops = {
|
||||
|
@ -3168,23 +3168,26 @@ static void __exit ublk_exit(void)
|
|||
module_init(ublk_init);
|
||||
module_exit(ublk_exit);
|
||||
|
||||
static int ublk_set_max_ublks(const char *buf, const struct kernel_param *kp)
|
||||
static int ublk_set_max_unprivileged_ublks(const char *buf,
|
||||
const struct kernel_param *kp)
|
||||
{
|
||||
return param_set_uint_minmax(buf, kp, 0, UBLK_MAX_UBLKS);
|
||||
}
|
||||
|
||||
static int ublk_get_max_ublks(char *buf, const struct kernel_param *kp)
|
||||
static int ublk_get_max_unprivileged_ublks(char *buf,
|
||||
const struct kernel_param *kp)
|
||||
{
|
||||
return sysfs_emit(buf, "%u\n", ublks_max);
|
||||
return sysfs_emit(buf, "%u\n", unprivileged_ublks_max);
|
||||
}
|
||||
|
||||
static const struct kernel_param_ops ublk_max_ublks_ops = {
|
||||
.set = ublk_set_max_ublks,
|
||||
.get = ublk_get_max_ublks,
|
||||
static const struct kernel_param_ops ublk_max_unprivileged_ublks_ops = {
|
||||
.set = ublk_set_max_unprivileged_ublks,
|
||||
.get = ublk_get_max_unprivileged_ublks,
|
||||
};
|
||||
|
||||
module_param_cb(ublks_max, &ublk_max_ublks_ops, &ublks_max, 0644);
|
||||
MODULE_PARM_DESC(ublks_max, "max number of ublk devices allowed to add(default: 64)");
|
||||
module_param_cb(ublks_max, &ublk_max_unprivileged_ublks_ops,
|
||||
&unprivileged_ublks_max, 0644);
|
||||
MODULE_PARM_DESC(ublks_max, "max number of unprivileged ublk devices allowed to add(default: 64)");
|
||||
|
||||
MODULE_AUTHOR("Ming Lei <ming.lei@redhat.com>");
|
||||
MODULE_DESCRIPTION("Userspace block device");
|
||||
|
|
|
@ -226,7 +226,7 @@ static int virtblk_map_data(struct blk_mq_hw_ctx *hctx, struct request *req,
|
|||
if (unlikely(err))
|
||||
return -ENOMEM;
|
||||
|
||||
return blk_rq_map_sg(hctx->queue, req, vbr->sg_table.sgl);
|
||||
return blk_rq_map_sg(req, vbr->sg_table.sgl);
|
||||
}
|
||||
|
||||
static void virtblk_cleanup_cmd(struct request *req)
|
||||
|
|
|
@ -751,7 +751,7 @@ static int blkif_queue_rw_req(struct request *req, struct blkfront_ring_info *ri
|
|||
id = blkif_ring_get_request(rinfo, req, &final_ring_req);
|
||||
ring_req = &rinfo->shadow[id].req;
|
||||
|
||||
num_sg = blk_rq_map_sg(req->q, req, rinfo->shadow[id].sg);
|
||||
num_sg = blk_rq_map_sg(req, rinfo->shadow[id].sg);
|
||||
num_grant = 0;
|
||||
/* Calculate the number of grant used */
|
||||
for_each_sg(rinfo->shadow[id].sg, sg, num_sg, i)
|
||||
|
|
|
@ -4811,23 +4811,11 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned int argc, char **argv
|
|||
ti->error = "Cannot allocate bio set";
|
||||
goto bad;
|
||||
}
|
||||
r = bioset_integrity_create(&ic->recheck_bios, RECHECK_POOL_SIZE);
|
||||
if (r) {
|
||||
ti->error = "Cannot allocate bio integrity set";
|
||||
r = -ENOMEM;
|
||||
goto bad;
|
||||
}
|
||||
r = bioset_init(&ic->recalc_bios, 1, 0, BIOSET_NEED_BVECS);
|
||||
if (r) {
|
||||
ti->error = "Cannot allocate bio set";
|
||||
goto bad;
|
||||
}
|
||||
r = bioset_integrity_create(&ic->recalc_bios, 1);
|
||||
if (r) {
|
||||
ti->error = "Cannot allocate bio integrity set";
|
||||
r = -ENOMEM;
|
||||
goto bad;
|
||||
}
|
||||
}
|
||||
|
||||
ic->metadata_wq = alloc_workqueue("dm-integrity-metadata",
|
||||
|
|
|
@ -1081,15 +1081,9 @@ static int dm_table_alloc_md_mempools(struct dm_table *t, struct mapped_device *
|
|||
__alignof__(struct dm_io)) + DM_IO_BIO_OFFSET;
|
||||
if (bioset_init(&pools->io_bs, pool_size, io_front_pad, bioset_flags))
|
||||
goto out_free_pools;
|
||||
if (mempool_needs_integrity &&
|
||||
bioset_integrity_create(&pools->io_bs, pool_size))
|
||||
goto out_free_pools;
|
||||
init_bs:
|
||||
if (bioset_init(&pools->bs, pool_size, front_pad, 0))
|
||||
goto out_free_pools;
|
||||
if (mempool_needs_integrity &&
|
||||
bioset_integrity_create(&pools->bs, pool_size))
|
||||
goto out_free_pools;
|
||||
|
||||
t->mempools = pools;
|
||||
return 0;
|
||||
|
@ -1250,6 +1244,7 @@ static int dm_table_construct_crypto_profile(struct dm_table *t)
|
|||
profile->max_dun_bytes_supported = UINT_MAX;
|
||||
memset(profile->modes_supported, 0xFF,
|
||||
sizeof(profile->modes_supported));
|
||||
profile->key_types_supported = ~0;
|
||||
|
||||
for (i = 0; i < t->num_targets; i++) {
|
||||
struct dm_target *ti = dm_table_get_target(t, i);
|
||||
|
|
|
@ -29,8 +29,10 @@
|
|||
#include <linux/buffer_head.h>
|
||||
#include <linux/seq_file.h>
|
||||
#include <trace/events/block.h>
|
||||
|
||||
#include "md.h"
|
||||
#include "md-bitmap.h"
|
||||
#include "md-cluster.h"
|
||||
|
||||
#define BITMAP_MAJOR_LO 3
|
||||
/* version 4 insists the bitmap is in little-endian order
|
||||
|
@ -426,8 +428,8 @@ static int __write_sb_page(struct md_rdev *rdev, struct bitmap *bitmap,
|
|||
struct block_device *bdev;
|
||||
struct mddev *mddev = bitmap->mddev;
|
||||
struct bitmap_storage *store = &bitmap->storage;
|
||||
unsigned int bitmap_limit = (bitmap->storage.file_pages - pg_index) <<
|
||||
PAGE_SHIFT;
|
||||
unsigned long num_pages = bitmap->storage.file_pages;
|
||||
unsigned int bitmap_limit = (num_pages - pg_index % num_pages) << PAGE_SHIFT;
|
||||
loff_t sboff, offset = mddev->bitmap_info.offset;
|
||||
sector_t ps = pg_index * PAGE_SIZE / SECTOR_SIZE;
|
||||
unsigned int size = PAGE_SIZE;
|
||||
|
@ -436,7 +438,7 @@ static int __write_sb_page(struct md_rdev *rdev, struct bitmap *bitmap,
|
|||
|
||||
bdev = (rdev->meta_bdev) ? rdev->meta_bdev : rdev->bdev;
|
||||
/* we compare length (page numbers), not page offset. */
|
||||
if ((pg_index - store->sb_index) == store->file_pages - 1) {
|
||||
if ((pg_index - store->sb_index) == num_pages - 1) {
|
||||
unsigned int last_page_size = store->bytes & (PAGE_SIZE - 1);
|
||||
|
||||
if (last_page_size == 0)
|
||||
|
@ -942,7 +944,7 @@ out:
|
|||
bmname(bitmap), err);
|
||||
goto out_no_sb;
|
||||
}
|
||||
bitmap->cluster_slot = md_cluster_ops->slot_number(bitmap->mddev);
|
||||
bitmap->cluster_slot = bitmap->mddev->cluster_ops->slot_number(bitmap->mddev);
|
||||
goto re_read;
|
||||
}
|
||||
|
||||
|
@ -2021,7 +2023,7 @@ static void md_bitmap_free(void *data)
|
|||
sysfs_put(bitmap->sysfs_can_clear);
|
||||
|
||||
if (mddev_is_clustered(bitmap->mddev) && bitmap->mddev->cluster_info &&
|
||||
bitmap->cluster_slot == md_cluster_ops->slot_number(bitmap->mddev))
|
||||
bitmap->cluster_slot == bitmap->mddev->cluster_ops->slot_number(bitmap->mddev))
|
||||
md_cluster_stop(bitmap->mddev);
|
||||
|
||||
/* Shouldn't be needed - but just in case.... */
|
||||
|
@ -2229,7 +2231,7 @@ static int bitmap_load(struct mddev *mddev)
|
|||
mddev_create_serial_pool(mddev, rdev);
|
||||
|
||||
if (mddev_is_clustered(mddev))
|
||||
md_cluster_ops->load_bitmaps(mddev, mddev->bitmap_info.nodes);
|
||||
mddev->cluster_ops->load_bitmaps(mddev, mddev->bitmap_info.nodes);
|
||||
|
||||
/* Clear out old bitmap info first: Either there is none, or we
|
||||
* are resuming after someone else has possibly changed things,
|
||||
|
|
|
@ -1166,7 +1166,7 @@ static int resize_bitmaps(struct mddev *mddev, sector_t newsize, sector_t oldsiz
|
|||
struct dlm_lock_resource *bm_lockres;
|
||||
char str[64];
|
||||
|
||||
if (i == md_cluster_ops->slot_number(mddev))
|
||||
if (i == slot_number(mddev))
|
||||
continue;
|
||||
|
||||
bitmap = mddev->bitmap_ops->get_from_slot(mddev, i);
|
||||
|
@ -1216,7 +1216,7 @@ out:
|
|||
*/
|
||||
static int cluster_check_sync_size(struct mddev *mddev)
|
||||
{
|
||||
int current_slot = md_cluster_ops->slot_number(mddev);
|
||||
int current_slot = slot_number(mddev);
|
||||
int node_num = mddev->bitmap_info.nodes;
|
||||
struct dlm_lock_resource *bm_lockres;
|
||||
struct md_bitmap_stats stats;
|
||||
|
@ -1612,7 +1612,14 @@ out:
|
|||
return err;
|
||||
}
|
||||
|
||||
static const struct md_cluster_operations cluster_ops = {
|
||||
static struct md_cluster_operations cluster_ops = {
|
||||
.head = {
|
||||
.type = MD_CLUSTER,
|
||||
.id = ID_CLUSTER,
|
||||
.name = "cluster",
|
||||
.owner = THIS_MODULE,
|
||||
},
|
||||
|
||||
.join = join,
|
||||
.leave = leave,
|
||||
.slot_number = slot_number,
|
||||
|
@ -1642,13 +1649,12 @@ static int __init cluster_init(void)
|
|||
{
|
||||
pr_warn("md-cluster: support raid1 and raid10 (limited support)\n");
|
||||
pr_info("Registering Cluster MD functions\n");
|
||||
register_md_cluster_operations(&cluster_ops, THIS_MODULE);
|
||||
return 0;
|
||||
return register_md_submodule(&cluster_ops.head);
|
||||
}
|
||||
|
||||
static void cluster_exit(void)
|
||||
{
|
||||
unregister_md_cluster_operations();
|
||||
unregister_md_submodule(&cluster_ops.head);
|
||||
}
|
||||
|
||||
module_init(cluster_init);
|
||||
|
|
|
@ -10,6 +10,8 @@ struct mddev;
|
|||
struct md_rdev;
|
||||
|
||||
struct md_cluster_operations {
|
||||
struct md_submodule_head head;
|
||||
|
||||
int (*join)(struct mddev *mddev, int nodes);
|
||||
int (*leave)(struct mddev *mddev);
|
||||
int (*slot_number)(struct mddev *mddev);
|
||||
|
@ -35,4 +37,8 @@ struct md_cluster_operations {
|
|||
void (*update_size)(struct mddev *mddev, sector_t old_dev_sectors);
|
||||
};
|
||||
|
||||
extern int md_setup_cluster(struct mddev *mddev, int nodes);
|
||||
extern void md_cluster_stop(struct mddev *mddev);
|
||||
extern void md_reload_sb(struct mddev *mddev, int raid_disk);
|
||||
|
||||
#endif /* _MD_CLUSTER_H */
|
||||
|
|
|
@ -5,7 +5,6 @@
|
|||
*/
|
||||
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/raid/md_u.h>
|
||||
#include <linux/seq_file.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/slab.h>
|
||||
|
@ -320,9 +319,13 @@ static void linear_quiesce(struct mddev *mddev, int state)
|
|||
}
|
||||
|
||||
static struct md_personality linear_personality = {
|
||||
.head = {
|
||||
.type = MD_PERSONALITY,
|
||||
.id = ID_LINEAR,
|
||||
.name = "linear",
|
||||
.level = LEVEL_LINEAR,
|
||||
.owner = THIS_MODULE,
|
||||
},
|
||||
|
||||
.make_request = linear_make_request,
|
||||
.run = linear_run,
|
||||
.free = linear_free,
|
||||
|
@ -335,12 +338,12 @@ static struct md_personality linear_personality = {
|
|||
|
||||
static int __init linear_init(void)
|
||||
{
|
||||
return register_md_personality(&linear_personality);
|
||||
return register_md_submodule(&linear_personality.head);
|
||||
}
|
||||
|
||||
static void linear_exit(void)
|
||||
{
|
||||
unregister_md_personality(&linear_personality);
|
||||
unregister_md_submodule(&linear_personality.head);
|
||||
}
|
||||
|
||||
module_init(linear_init);
|
||||
|
|
344
drivers/md/md.c
344
drivers/md/md.c
|
@ -79,16 +79,10 @@ static const char *action_name[NR_SYNC_ACTIONS] = {
|
|||
[ACTION_IDLE] = "idle",
|
||||
};
|
||||
|
||||
/* pers_list is a list of registered personalities protected by pers_lock. */
|
||||
static LIST_HEAD(pers_list);
|
||||
static DEFINE_SPINLOCK(pers_lock);
|
||||
static DEFINE_XARRAY(md_submodule);
|
||||
|
||||
static const struct kobj_type md_ktype;
|
||||
|
||||
const struct md_cluster_operations *md_cluster_ops;
|
||||
EXPORT_SYMBOL(md_cluster_ops);
|
||||
static struct module *md_cluster_mod;
|
||||
|
||||
static DECLARE_WAIT_QUEUE_HEAD(resync_wait);
|
||||
static struct workqueue_struct *md_wq;
|
||||
|
||||
|
@ -629,6 +623,12 @@ static void __mddev_put(struct mddev *mddev)
|
|||
queue_work(md_misc_wq, &mddev->del_work);
|
||||
}
|
||||
|
||||
static void mddev_put_locked(struct mddev *mddev)
|
||||
{
|
||||
if (atomic_dec_and_test(&mddev->active))
|
||||
__mddev_put(mddev);
|
||||
}
|
||||
|
||||
void mddev_put(struct mddev *mddev)
|
||||
{
|
||||
if (!atomic_dec_and_lock(&mddev->active, &all_mddevs_lock))
|
||||
|
@ -888,16 +888,40 @@ struct md_rdev *md_find_rdev_rcu(struct mddev *mddev, dev_t dev)
|
|||
}
|
||||
EXPORT_SYMBOL_GPL(md_find_rdev_rcu);
|
||||
|
||||
static struct md_personality *find_pers(int level, char *clevel)
|
||||
static struct md_personality *get_pers(int level, char *clevel)
|
||||
{
|
||||
struct md_personality *pers;
|
||||
list_for_each_entry(pers, &pers_list, list) {
|
||||
if (level != LEVEL_NONE && pers->level == level)
|
||||
return pers;
|
||||
if (strcmp(pers->name, clevel)==0)
|
||||
return pers;
|
||||
struct md_personality *ret = NULL;
|
||||
struct md_submodule_head *head;
|
||||
unsigned long i;
|
||||
|
||||
xa_lock(&md_submodule);
|
||||
xa_for_each(&md_submodule, i, head) {
|
||||
if (head->type != MD_PERSONALITY)
|
||||
continue;
|
||||
if ((level != LEVEL_NONE && head->id == level) ||
|
||||
!strcmp(head->name, clevel)) {
|
||||
if (try_module_get(head->owner))
|
||||
ret = (void *)head;
|
||||
break;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
xa_unlock(&md_submodule);
|
||||
|
||||
if (!ret) {
|
||||
if (level != LEVEL_NONE)
|
||||
pr_warn("md: personality for level %d is not loaded!\n",
|
||||
level);
|
||||
else
|
||||
pr_warn("md: personality for level %s is not loaded!\n",
|
||||
clevel);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void put_pers(struct md_personality *pers)
|
||||
{
|
||||
module_put(pers->head.owner);
|
||||
}
|
||||
|
||||
/* return the offset of the super block in 512byte sectors */
|
||||
|
@ -1180,7 +1204,7 @@ int md_check_no_bitmap(struct mddev *mddev)
|
|||
if (!mddev->bitmap_info.file && !mddev->bitmap_info.offset)
|
||||
return 0;
|
||||
pr_warn("%s: bitmaps are not supported for %s\n",
|
||||
mdname(mddev), mddev->pers->name);
|
||||
mdname(mddev), mddev->pers->head.name);
|
||||
return 1;
|
||||
}
|
||||
EXPORT_SYMBOL(md_check_no_bitmap);
|
||||
|
@ -1748,7 +1772,7 @@ static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_
|
|||
count <<= sb->bblog_shift;
|
||||
if (bb + 1 == 0)
|
||||
break;
|
||||
if (badblocks_set(&rdev->badblocks, sector, count, 1))
|
||||
if (!badblocks_set(&rdev->badblocks, sector, count, 1))
|
||||
return -EINVAL;
|
||||
}
|
||||
} else if (sb->bblog_offset != 0)
|
||||
|
@ -2359,19 +2383,6 @@ int md_integrity_register(struct mddev *mddev)
|
|||
return 0; /* shouldn't register */
|
||||
|
||||
pr_debug("md: data integrity enabled on %s\n", mdname(mddev));
|
||||
if (bioset_integrity_create(&mddev->bio_set, BIO_POOL_SIZE) ||
|
||||
(mddev->level != 1 && mddev->level != 10 &&
|
||||
bioset_integrity_create(&mddev->io_clone_set, BIO_POOL_SIZE))) {
|
||||
/*
|
||||
* No need to handle the failure of bioset_integrity_create,
|
||||
* because the function is called by md_run() -> pers->run(),
|
||||
* md_run calls bioset_exit -> bioset_integrity_free in case
|
||||
* of failure case.
|
||||
*/
|
||||
pr_err("md: failed to create integrity pool for %s\n",
|
||||
mdname(mddev));
|
||||
return -EINVAL;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(md_integrity_register);
|
||||
|
@ -2639,11 +2650,11 @@ repeat:
|
|||
force_change = 1;
|
||||
if (test_and_clear_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags))
|
||||
nospares = 1;
|
||||
ret = md_cluster_ops->metadata_update_start(mddev);
|
||||
ret = mddev->cluster_ops->metadata_update_start(mddev);
|
||||
/* Has someone else has updated the sb */
|
||||
if (!does_sb_need_changing(mddev)) {
|
||||
if (ret == 0)
|
||||
md_cluster_ops->metadata_update_cancel(mddev);
|
||||
mddev->cluster_ops->metadata_update_cancel(mddev);
|
||||
bit_clear_unless(&mddev->sb_flags, BIT(MD_SB_CHANGE_PENDING),
|
||||
BIT(MD_SB_CHANGE_DEVS) |
|
||||
BIT(MD_SB_CHANGE_CLEAN));
|
||||
|
@ -2783,7 +2794,7 @@ rewrite:
|
|||
/* if there was a failure, MD_SB_CHANGE_DEVS was set, and we re-write super */
|
||||
|
||||
if (mddev_is_clustered(mddev) && ret == 0)
|
||||
md_cluster_ops->metadata_update_finish(mddev);
|
||||
mddev->cluster_ops->metadata_update_finish(mddev);
|
||||
|
||||
if (mddev->in_sync != sync_req ||
|
||||
!bit_clear_unless(&mddev->sb_flags, BIT(MD_SB_CHANGE_PENDING),
|
||||
|
@ -2942,7 +2953,7 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len)
|
|||
else {
|
||||
err = 0;
|
||||
if (mddev_is_clustered(mddev))
|
||||
err = md_cluster_ops->remove_disk(mddev, rdev);
|
||||
err = mddev->cluster_ops->remove_disk(mddev, rdev);
|
||||
|
||||
if (err == 0) {
|
||||
md_kick_rdev_from_array(rdev);
|
||||
|
@ -3052,7 +3063,7 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len)
|
|||
* by this node eventually
|
||||
*/
|
||||
if (!mddev_is_clustered(rdev->mddev) ||
|
||||
(err = md_cluster_ops->gather_bitmaps(rdev)) == 0) {
|
||||
(err = mddev->cluster_ops->gather_bitmaps(rdev)) == 0) {
|
||||
clear_bit(Faulty, &rdev->flags);
|
||||
err = add_bound_rdev(rdev);
|
||||
}
|
||||
|
@ -3860,7 +3871,7 @@ level_show(struct mddev *mddev, char *page)
|
|||
spin_lock(&mddev->lock);
|
||||
p = mddev->pers;
|
||||
if (p)
|
||||
ret = sprintf(page, "%s\n", p->name);
|
||||
ret = sprintf(page, "%s\n", p->head.name);
|
||||
else if (mddev->clevel[0])
|
||||
ret = sprintf(page, "%s\n", mddev->clevel);
|
||||
else if (mddev->level != LEVEL_NONE)
|
||||
|
@ -3917,7 +3928,7 @@ level_store(struct mddev *mddev, const char *buf, size_t len)
|
|||
rv = -EINVAL;
|
||||
if (!mddev->pers->quiesce) {
|
||||
pr_warn("md: %s: %s does not support online personality change\n",
|
||||
mdname(mddev), mddev->pers->name);
|
||||
mdname(mddev), mddev->pers->head.name);
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
|
@ -3931,24 +3942,20 @@ level_store(struct mddev *mddev, const char *buf, size_t len)
|
|||
|
||||
if (request_module("md-%s", clevel) != 0)
|
||||
request_module("md-level-%s", clevel);
|
||||
spin_lock(&pers_lock);
|
||||
pers = find_pers(level, clevel);
|
||||
if (!pers || !try_module_get(pers->owner)) {
|
||||
spin_unlock(&pers_lock);
|
||||
pr_warn("md: personality %s not loaded\n", clevel);
|
||||
pers = get_pers(level, clevel);
|
||||
if (!pers) {
|
||||
rv = -EINVAL;
|
||||
goto out_unlock;
|
||||
}
|
||||
spin_unlock(&pers_lock);
|
||||
|
||||
if (pers == mddev->pers) {
|
||||
/* Nothing to do! */
|
||||
module_put(pers->owner);
|
||||
put_pers(pers);
|
||||
rv = len;
|
||||
goto out_unlock;
|
||||
}
|
||||
if (!pers->takeover) {
|
||||
module_put(pers->owner);
|
||||
put_pers(pers);
|
||||
pr_warn("md: %s: %s does not support personality takeover\n",
|
||||
mdname(mddev), clevel);
|
||||
rv = -EINVAL;
|
||||
|
@ -3969,7 +3976,7 @@ level_store(struct mddev *mddev, const char *buf, size_t len)
|
|||
mddev->raid_disks -= mddev->delta_disks;
|
||||
mddev->delta_disks = 0;
|
||||
mddev->reshape_backwards = 0;
|
||||
module_put(pers->owner);
|
||||
put_pers(pers);
|
||||
pr_warn("md: %s: %s would not accept array\n",
|
||||
mdname(mddev), clevel);
|
||||
rv = PTR_ERR(priv);
|
||||
|
@ -3984,7 +3991,7 @@ level_store(struct mddev *mddev, const char *buf, size_t len)
|
|||
oldpriv = mddev->private;
|
||||
mddev->pers = pers;
|
||||
mddev->private = priv;
|
||||
strscpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
|
||||
strscpy(mddev->clevel, pers->head.name, sizeof(mddev->clevel));
|
||||
mddev->level = mddev->new_level;
|
||||
mddev->layout = mddev->new_layout;
|
||||
mddev->chunk_sectors = mddev->new_chunk_sectors;
|
||||
|
@ -4026,7 +4033,7 @@ level_store(struct mddev *mddev, const char *buf, size_t len)
|
|||
mddev->to_remove = &md_redundancy_group;
|
||||
}
|
||||
|
||||
module_put(oldpers->owner);
|
||||
put_pers(oldpers);
|
||||
|
||||
rdev_for_each(rdev, mddev) {
|
||||
if (rdev->raid_disk < 0)
|
||||
|
@ -5584,7 +5591,7 @@ __ATTR(fail_last_dev, S_IRUGO | S_IWUSR, fail_last_dev_show,
|
|||
|
||||
static ssize_t serialize_policy_show(struct mddev *mddev, char *page)
|
||||
{
|
||||
if (mddev->pers == NULL || (mddev->pers->level != 1))
|
||||
if (mddev->pers == NULL || (mddev->pers->head.id != ID_RAID1))
|
||||
return sprintf(page, "n/a\n");
|
||||
else
|
||||
return sprintf(page, "%d\n", mddev->serialize_policy);
|
||||
|
@ -5610,7 +5617,7 @@ serialize_policy_store(struct mddev *mddev, const char *buf, size_t len)
|
|||
err = mddev_suspend_and_lock(mddev);
|
||||
if (err)
|
||||
return err;
|
||||
if (mddev->pers == NULL || (mddev->pers->level != 1)) {
|
||||
if (mddev->pers == NULL || (mddev->pers->head.id != ID_RAID1)) {
|
||||
pr_err("md: serialize_policy is only effective for raid1\n");
|
||||
err = -EINVAL;
|
||||
goto unlock;
|
||||
|
@ -6096,30 +6103,21 @@ int md_run(struct mddev *mddev)
|
|||
goto exit_sync_set;
|
||||
}
|
||||
|
||||
spin_lock(&pers_lock);
|
||||
pers = find_pers(mddev->level, mddev->clevel);
|
||||
if (!pers || !try_module_get(pers->owner)) {
|
||||
spin_unlock(&pers_lock);
|
||||
if (mddev->level != LEVEL_NONE)
|
||||
pr_warn("md: personality for level %d is not loaded!\n",
|
||||
mddev->level);
|
||||
else
|
||||
pr_warn("md: personality for level %s is not loaded!\n",
|
||||
mddev->clevel);
|
||||
pers = get_pers(mddev->level, mddev->clevel);
|
||||
if (!pers) {
|
||||
err = -EINVAL;
|
||||
goto abort;
|
||||
}
|
||||
spin_unlock(&pers_lock);
|
||||
if (mddev->level != pers->level) {
|
||||
mddev->level = pers->level;
|
||||
mddev->new_level = pers->level;
|
||||
if (mddev->level != pers->head.id) {
|
||||
mddev->level = pers->head.id;
|
||||
mddev->new_level = pers->head.id;
|
||||
}
|
||||
strscpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
|
||||
strscpy(mddev->clevel, pers->head.name, sizeof(mddev->clevel));
|
||||
|
||||
if (mddev->reshape_position != MaxSector &&
|
||||
pers->start_reshape == NULL) {
|
||||
/* This personality cannot handle reshaping... */
|
||||
module_put(pers->owner);
|
||||
put_pers(pers);
|
||||
err = -EINVAL;
|
||||
goto abort;
|
||||
}
|
||||
|
@ -6246,7 +6244,7 @@ bitmap_abort:
|
|||
if (mddev->private)
|
||||
pers->free(mddev, mddev->private);
|
||||
mddev->private = NULL;
|
||||
module_put(pers->owner);
|
||||
put_pers(pers);
|
||||
mddev->bitmap_ops->destroy(mddev);
|
||||
abort:
|
||||
bioset_exit(&mddev->io_clone_set);
|
||||
|
@ -6467,7 +6465,7 @@ static void __md_stop(struct mddev *mddev)
|
|||
mddev->private = NULL;
|
||||
if (pers->sync_request && mddev->to_remove == NULL)
|
||||
mddev->to_remove = &md_redundancy_group;
|
||||
module_put(pers->owner);
|
||||
put_pers(pers);
|
||||
clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
|
||||
|
||||
bioset_exit(&mddev->bio_set);
|
||||
|
@ -6983,7 +6981,7 @@ int md_add_new_disk(struct mddev *mddev, struct mdu_disk_info_s *info)
|
|||
set_bit(Candidate, &rdev->flags);
|
||||
else if (info->state & (1 << MD_DISK_CLUSTER_ADD)) {
|
||||
/* --add initiated by this node */
|
||||
err = md_cluster_ops->add_new_disk(mddev, rdev);
|
||||
err = mddev->cluster_ops->add_new_disk(mddev, rdev);
|
||||
if (err) {
|
||||
export_rdev(rdev, mddev);
|
||||
return err;
|
||||
|
@ -7000,14 +6998,14 @@ int md_add_new_disk(struct mddev *mddev, struct mdu_disk_info_s *info)
|
|||
if (mddev_is_clustered(mddev)) {
|
||||
if (info->state & (1 << MD_DISK_CANDIDATE)) {
|
||||
if (!err) {
|
||||
err = md_cluster_ops->new_disk_ack(mddev,
|
||||
err == 0);
|
||||
err = mddev->cluster_ops->new_disk_ack(
|
||||
mddev, err == 0);
|
||||
if (err)
|
||||
md_kick_rdev_from_array(rdev);
|
||||
}
|
||||
} else {
|
||||
if (err)
|
||||
md_cluster_ops->add_new_disk_cancel(mddev);
|
||||
mddev->cluster_ops->add_new_disk_cancel(mddev);
|
||||
else
|
||||
err = add_bound_rdev(rdev);
|
||||
}
|
||||
|
@ -7087,10 +7085,9 @@ static int hot_remove_disk(struct mddev *mddev, dev_t dev)
|
|||
goto busy;
|
||||
|
||||
kick_rdev:
|
||||
if (mddev_is_clustered(mddev)) {
|
||||
if (md_cluster_ops->remove_disk(mddev, rdev))
|
||||
if (mddev_is_clustered(mddev) &&
|
||||
mddev->cluster_ops->remove_disk(mddev, rdev))
|
||||
goto busy;
|
||||
}
|
||||
|
||||
md_kick_rdev_from_array(rdev);
|
||||
set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
|
||||
|
@ -7393,7 +7390,7 @@ static int update_size(struct mddev *mddev, sector_t num_sectors)
|
|||
rv = mddev->pers->resize(mddev, num_sectors);
|
||||
if (!rv) {
|
||||
if (mddev_is_clustered(mddev))
|
||||
md_cluster_ops->update_size(mddev, old_dev_sectors);
|
||||
mddev->cluster_ops->update_size(mddev, old_dev_sectors);
|
||||
else if (!mddev_is_dm(mddev))
|
||||
set_capacity_and_notify(mddev->gendisk,
|
||||
mddev->array_sectors);
|
||||
|
@ -7441,6 +7438,28 @@ static int update_raid_disks(struct mddev *mddev, int raid_disks)
|
|||
return rv;
|
||||
}
|
||||
|
||||
static int get_cluster_ops(struct mddev *mddev)
|
||||
{
|
||||
xa_lock(&md_submodule);
|
||||
mddev->cluster_ops = xa_load(&md_submodule, ID_CLUSTER);
|
||||
if (mddev->cluster_ops &&
|
||||
!try_module_get(mddev->cluster_ops->head.owner))
|
||||
mddev->cluster_ops = NULL;
|
||||
xa_unlock(&md_submodule);
|
||||
|
||||
return mddev->cluster_ops == NULL ? -ENOENT : 0;
|
||||
}
|
||||
|
||||
static void put_cluster_ops(struct mddev *mddev)
|
||||
{
|
||||
if (!mddev->cluster_ops)
|
||||
return;
|
||||
|
||||
mddev->cluster_ops->leave(mddev);
|
||||
module_put(mddev->cluster_ops->head.owner);
|
||||
mddev->cluster_ops = NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* update_array_info is used to change the configuration of an
|
||||
* on-line array.
|
||||
|
@ -7549,16 +7568,15 @@ static int update_array_info(struct mddev *mddev, mdu_array_info_t *info)
|
|||
|
||||
if (mddev->bitmap_info.nodes) {
|
||||
/* hold PW on all the bitmap lock */
|
||||
if (md_cluster_ops->lock_all_bitmaps(mddev) <= 0) {
|
||||
if (mddev->cluster_ops->lock_all_bitmaps(mddev) <= 0) {
|
||||
pr_warn("md: can't change bitmap to none since the array is in use by more than one node\n");
|
||||
rv = -EPERM;
|
||||
md_cluster_ops->unlock_all_bitmaps(mddev);
|
||||
mddev->cluster_ops->unlock_all_bitmaps(mddev);
|
||||
goto err;
|
||||
}
|
||||
|
||||
mddev->bitmap_info.nodes = 0;
|
||||
md_cluster_ops->leave(mddev);
|
||||
module_put(md_cluster_mod);
|
||||
put_cluster_ops(mddev);
|
||||
mddev->safemode_delay = DEFAULT_SAFEMODE_DELAY;
|
||||
}
|
||||
mddev->bitmap_ops->destroy(mddev);
|
||||
|
@ -7842,7 +7860,7 @@ static int md_ioctl(struct block_device *bdev, blk_mode_t mode,
|
|||
|
||||
case CLUSTERED_DISK_NACK:
|
||||
if (mddev_is_clustered(mddev))
|
||||
md_cluster_ops->new_disk_ack(mddev, false);
|
||||
mddev->cluster_ops->new_disk_ack(mddev, false);
|
||||
else
|
||||
err = -EINVAL;
|
||||
goto unlock;
|
||||
|
@ -8124,7 +8142,8 @@ void md_error(struct mddev *mddev, struct md_rdev *rdev)
|
|||
return;
|
||||
mddev->pers->error_handler(mddev, rdev);
|
||||
|
||||
if (mddev->pers->level == 0 || mddev->pers->level == LEVEL_LINEAR)
|
||||
if (mddev->pers->head.id == ID_RAID0 ||
|
||||
mddev->pers->head.id == ID_LINEAR)
|
||||
return;
|
||||
|
||||
if (mddev->degraded && !test_bit(MD_BROKEN, &mddev->flags))
|
||||
|
@ -8162,14 +8181,17 @@ static void status_unused(struct seq_file *seq)
|
|||
|
||||
static void status_personalities(struct seq_file *seq)
|
||||
{
|
||||
struct md_personality *pers;
|
||||
struct md_submodule_head *head;
|
||||
unsigned long i;
|
||||
|
||||
seq_puts(seq, "Personalities : ");
|
||||
spin_lock(&pers_lock);
|
||||
list_for_each_entry(pers, &pers_list, list)
|
||||
seq_printf(seq, "[%s] ", pers->name);
|
||||
|
||||
spin_unlock(&pers_lock);
|
||||
xa_lock(&md_submodule);
|
||||
xa_for_each(&md_submodule, i, head)
|
||||
if (head->type == MD_PERSONALITY)
|
||||
seq_printf(seq, "[%s] ", head->name);
|
||||
xa_unlock(&md_submodule);
|
||||
|
||||
seq_puts(seq, "\n");
|
||||
}
|
||||
|
||||
|
@ -8392,7 +8414,7 @@ static int md_seq_show(struct seq_file *seq, void *v)
|
|||
seq_printf(seq, " (read-only)");
|
||||
if (mddev->ro == MD_AUTO_READ)
|
||||
seq_printf(seq, " (auto-read-only)");
|
||||
seq_printf(seq, " %s", mddev->pers->name);
|
||||
seq_printf(seq, " %s", mddev->pers->head.name);
|
||||
} else {
|
||||
seq_printf(seq, "inactive");
|
||||
}
|
||||
|
@ -8461,9 +8483,7 @@ static int md_seq_show(struct seq_file *seq, void *v)
|
|||
if (mddev == list_last_entry(&all_mddevs, struct mddev, all_mddevs))
|
||||
status_unused(seq);
|
||||
|
||||
if (atomic_dec_and_test(&mddev->active))
|
||||
__mddev_put(mddev);
|
||||
|
||||
mddev_put_locked(mddev);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -8514,67 +8534,34 @@ static const struct proc_ops mdstat_proc_ops = {
|
|||
.proc_poll = mdstat_poll,
|
||||
};
|
||||
|
||||
int register_md_personality(struct md_personality *p)
|
||||
int register_md_submodule(struct md_submodule_head *msh)
|
||||
{
|
||||
pr_debug("md: %s personality registered for level %d\n",
|
||||
p->name, p->level);
|
||||
spin_lock(&pers_lock);
|
||||
list_add_tail(&p->list, &pers_list);
|
||||
spin_unlock(&pers_lock);
|
||||
return 0;
|
||||
return xa_insert(&md_submodule, msh->id, msh, GFP_KERNEL);
|
||||
}
|
||||
EXPORT_SYMBOL(register_md_personality);
|
||||
EXPORT_SYMBOL_GPL(register_md_submodule);
|
||||
|
||||
int unregister_md_personality(struct md_personality *p)
|
||||
void unregister_md_submodule(struct md_submodule_head *msh)
|
||||
{
|
||||
pr_debug("md: %s personality unregistered\n", p->name);
|
||||
spin_lock(&pers_lock);
|
||||
list_del_init(&p->list);
|
||||
spin_unlock(&pers_lock);
|
||||
return 0;
|
||||
xa_erase(&md_submodule, msh->id);
|
||||
}
|
||||
EXPORT_SYMBOL(unregister_md_personality);
|
||||
|
||||
int register_md_cluster_operations(const struct md_cluster_operations *ops,
|
||||
struct module *module)
|
||||
{
|
||||
int ret = 0;
|
||||
spin_lock(&pers_lock);
|
||||
if (md_cluster_ops != NULL)
|
||||
ret = -EALREADY;
|
||||
else {
|
||||
md_cluster_ops = ops;
|
||||
md_cluster_mod = module;
|
||||
}
|
||||
spin_unlock(&pers_lock);
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(register_md_cluster_operations);
|
||||
|
||||
int unregister_md_cluster_operations(void)
|
||||
{
|
||||
spin_lock(&pers_lock);
|
||||
md_cluster_ops = NULL;
|
||||
spin_unlock(&pers_lock);
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(unregister_md_cluster_operations);
|
||||
EXPORT_SYMBOL_GPL(unregister_md_submodule);
|
||||
|
||||
int md_setup_cluster(struct mddev *mddev, int nodes)
|
||||
{
|
||||
int ret;
|
||||
if (!md_cluster_ops)
|
||||
request_module("md-cluster");
|
||||
spin_lock(&pers_lock);
|
||||
/* ensure module won't be unloaded */
|
||||
if (!md_cluster_ops || !try_module_get(md_cluster_mod)) {
|
||||
pr_warn("can't find md-cluster module or get its reference.\n");
|
||||
spin_unlock(&pers_lock);
|
||||
return -ENOENT;
|
||||
}
|
||||
spin_unlock(&pers_lock);
|
||||
int ret = get_cluster_ops(mddev);
|
||||
|
||||
ret = md_cluster_ops->join(mddev, nodes);
|
||||
if (ret) {
|
||||
request_module("md-cluster");
|
||||
ret = get_cluster_ops(mddev);
|
||||
}
|
||||
|
||||
/* ensure module won't be unloaded */
|
||||
if (ret) {
|
||||
pr_warn("can't find md-cluster module or get its reference.\n");
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = mddev->cluster_ops->join(mddev, nodes);
|
||||
if (!ret)
|
||||
mddev->safemode_delay = 0;
|
||||
return ret;
|
||||
|
@ -8582,10 +8569,7 @@ int md_setup_cluster(struct mddev *mddev, int nodes)
|
|||
|
||||
void md_cluster_stop(struct mddev *mddev)
|
||||
{
|
||||
if (!md_cluster_ops)
|
||||
return;
|
||||
md_cluster_ops->leave(mddev);
|
||||
module_put(md_cluster_mod);
|
||||
put_cluster_ops(mddev);
|
||||
}
|
||||
|
||||
static int is_mddev_idle(struct mddev *mddev, int init)
|
||||
|
@ -8978,7 +8962,7 @@ void md_do_sync(struct md_thread *thread)
|
|||
}
|
||||
|
||||
if (mddev_is_clustered(mddev)) {
|
||||
ret = md_cluster_ops->resync_start(mddev);
|
||||
ret = mddev->cluster_ops->resync_start(mddev);
|
||||
if (ret)
|
||||
goto skip;
|
||||
|
||||
|
@ -9005,7 +8989,7 @@ void md_do_sync(struct md_thread *thread)
|
|||
*
|
||||
*/
|
||||
if (mddev_is_clustered(mddev))
|
||||
md_cluster_ops->resync_start_notify(mddev);
|
||||
mddev->cluster_ops->resync_start_notify(mddev);
|
||||
do {
|
||||
int mddev2_minor = -1;
|
||||
mddev->curr_resync = MD_RESYNC_DELAYED;
|
||||
|
@ -9460,6 +9444,13 @@ static bool md_choose_sync_action(struct mddev *mddev, int *spares)
|
|||
return true;
|
||||
}
|
||||
|
||||
/* Check if resync is in progress. */
|
||||
if (mddev->recovery_cp < MaxSector) {
|
||||
set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
|
||||
clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Remove any failed drives, then add spares if possible. Spares are
|
||||
* also removed and re-added, to allow the personality to fail the
|
||||
|
@ -9476,13 +9467,6 @@ static bool md_choose_sync_action(struct mddev *mddev, int *spares)
|
|||
return true;
|
||||
}
|
||||
|
||||
/* Check if recovery is in progress. */
|
||||
if (mddev->recovery_cp < MaxSector) {
|
||||
set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
|
||||
clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Delay to choose resync/check/repair in md_do_sync(). */
|
||||
if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery))
|
||||
return true;
|
||||
|
@ -9789,7 +9773,7 @@ void md_reap_sync_thread(struct mddev *mddev)
|
|||
* call resync_finish here if MD_CLUSTER_RESYNC_LOCKED is set by
|
||||
* clustered raid */
|
||||
if (test_and_clear_bit(MD_CLUSTER_RESYNC_LOCKED, &mddev->flags))
|
||||
md_cluster_ops->resync_finish(mddev);
|
||||
mddev->cluster_ops->resync_finish(mddev);
|
||||
clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
|
||||
clear_bit(MD_RECOVERY_DONE, &mddev->recovery);
|
||||
clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
|
||||
|
@ -9797,13 +9781,13 @@ void md_reap_sync_thread(struct mddev *mddev)
|
|||
clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
|
||||
clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
|
||||
/*
|
||||
* We call md_cluster_ops->update_size here because sync_size could
|
||||
* We call mddev->cluster_ops->update_size here because sync_size could
|
||||
* be changed by md_update_sb, and MD_RECOVERY_RESHAPE is cleared,
|
||||
* so it is time to update size across cluster.
|
||||
*/
|
||||
if (mddev_is_clustered(mddev) && is_reshaped
|
||||
&& !test_bit(MD_CLOSING, &mddev->flags))
|
||||
md_cluster_ops->update_size(mddev, old_dev_sectors);
|
||||
mddev->cluster_ops->update_size(mddev, old_dev_sectors);
|
||||
/* flag recovery needed just to double check */
|
||||
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
|
||||
sysfs_notify_dirent_safe(mddev->sysfs_completed);
|
||||
|
@ -9841,12 +9825,11 @@ EXPORT_SYMBOL(md_finish_reshape);
|
|||
|
||||
/* Bad block management */
|
||||
|
||||
/* Returns 1 on success, 0 on failure */
|
||||
int rdev_set_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
|
||||
/* Returns true on success, false on failure */
|
||||
bool rdev_set_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
|
||||
int is_new)
|
||||
{
|
||||
struct mddev *mddev = rdev->mddev;
|
||||
int rv;
|
||||
|
||||
/*
|
||||
* Recording new badblocks for faulty rdev will force unnecessary
|
||||
|
@ -9856,14 +9839,16 @@ int rdev_set_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
|
|||
* avoid it.
|
||||
*/
|
||||
if (test_bit(Faulty, &rdev->flags))
|
||||
return 1;
|
||||
return true;
|
||||
|
||||
if (is_new)
|
||||
s += rdev->new_data_offset;
|
||||
else
|
||||
s += rdev->data_offset;
|
||||
rv = badblocks_set(&rdev->badblocks, s, sectors, 0);
|
||||
if (rv == 0) {
|
||||
|
||||
if (!badblocks_set(&rdev->badblocks, s, sectors, 0))
|
||||
return false;
|
||||
|
||||
/* Make sure they get written out promptly */
|
||||
if (test_bit(ExternalBbl, &rdev->flags))
|
||||
sysfs_notify_dirent_safe(rdev->sysfs_unack_badblocks);
|
||||
|
@ -9871,35 +9856,34 @@ int rdev_set_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
|
|||
set_mask_bits(&mddev->sb_flags, 0,
|
||||
BIT(MD_SB_CHANGE_CLEAN) | BIT(MD_SB_CHANGE_PENDING));
|
||||
md_wakeup_thread(rdev->mddev->thread);
|
||||
return 1;
|
||||
} else
|
||||
return 0;
|
||||
return true;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(rdev_set_badblocks);
|
||||
|
||||
int rdev_clear_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
|
||||
void rdev_clear_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
|
||||
int is_new)
|
||||
{
|
||||
int rv;
|
||||
if (is_new)
|
||||
s += rdev->new_data_offset;
|
||||
else
|
||||
s += rdev->data_offset;
|
||||
rv = badblocks_clear(&rdev->badblocks, s, sectors);
|
||||
if ((rv == 0) && test_bit(ExternalBbl, &rdev->flags))
|
||||
|
||||
if (!badblocks_clear(&rdev->badblocks, s, sectors))
|
||||
return;
|
||||
|
||||
if (test_bit(ExternalBbl, &rdev->flags))
|
||||
sysfs_notify_dirent_safe(rdev->sysfs_badblocks);
|
||||
return rv;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(rdev_clear_badblocks);
|
||||
|
||||
static int md_notify_reboot(struct notifier_block *this,
|
||||
unsigned long code, void *x)
|
||||
{
|
||||
struct mddev *mddev, *n;
|
||||
struct mddev *mddev;
|
||||
int need_delay = 0;
|
||||
|
||||
spin_lock(&all_mddevs_lock);
|
||||
list_for_each_entry_safe(mddev, n, &all_mddevs, all_mddevs) {
|
||||
list_for_each_entry(mddev, &all_mddevs, all_mddevs) {
|
||||
if (!mddev_get(mddev))
|
||||
continue;
|
||||
spin_unlock(&all_mddevs_lock);
|
||||
|
@ -9911,8 +9895,8 @@ static int md_notify_reboot(struct notifier_block *this,
|
|||
mddev_unlock(mddev);
|
||||
}
|
||||
need_delay = 1;
|
||||
mddev_put(mddev);
|
||||
spin_lock(&all_mddevs_lock);
|
||||
mddev_put_locked(mddev);
|
||||
}
|
||||
spin_unlock(&all_mddevs_lock);
|
||||
|
||||
|
@ -10029,7 +10013,7 @@ static void check_sb_changes(struct mddev *mddev, struct md_rdev *rdev)
|
|||
if (rdev2->raid_disk == -1 && role != MD_DISK_ROLE_SPARE &&
|
||||
!(le32_to_cpu(sb->feature_map) &
|
||||
MD_FEATURE_RESHAPE_ACTIVE) &&
|
||||
!md_cluster_ops->resync_status_get(mddev)) {
|
||||
!mddev->cluster_ops->resync_status_get(mddev)) {
|
||||
/*
|
||||
* -1 to make raid1_add_disk() set conf->fullsync
|
||||
* to 1. This could avoid skipping sync when the
|
||||
|
@ -10245,7 +10229,7 @@ void md_autostart_arrays(int part)
|
|||
|
||||
static __exit void md_exit(void)
|
||||
{
|
||||
struct mddev *mddev, *n;
|
||||
struct mddev *mddev;
|
||||
int delay = 1;
|
||||
|
||||
unregister_blkdev(MD_MAJOR,"md");
|
||||
|
@ -10266,7 +10250,7 @@ static __exit void md_exit(void)
|
|||
remove_proc_entry("mdstat", NULL);
|
||||
|
||||
spin_lock(&all_mddevs_lock);
|
||||
list_for_each_entry_safe(mddev, n, &all_mddevs, all_mddevs) {
|
||||
list_for_each_entry(mddev, &all_mddevs, all_mddevs) {
|
||||
if (!mddev_get(mddev))
|
||||
continue;
|
||||
spin_unlock(&all_mddevs_lock);
|
||||
|
@ -10278,8 +10262,8 @@ static __exit void md_exit(void)
|
|||
* the mddev for destruction by a workqueue, and the
|
||||
* destroy_workqueue() below will wait for that to complete.
|
||||
*/
|
||||
mddev_put(mddev);
|
||||
spin_lock(&all_mddevs_lock);
|
||||
mddev_put_locked(mddev);
|
||||
}
|
||||
spin_unlock(&all_mddevs_lock);
|
||||
|
||||
|
|
|
@ -18,11 +18,37 @@
|
|||
#include <linux/timer.h>
|
||||
#include <linux/wait.h>
|
||||
#include <linux/workqueue.h>
|
||||
#include <linux/raid/md_u.h>
|
||||
#include <trace/events/block.h>
|
||||
#include "md-cluster.h"
|
||||
|
||||
#define MaxSector (~(sector_t)0)
|
||||
|
||||
enum md_submodule_type {
|
||||
MD_PERSONALITY = 0,
|
||||
MD_CLUSTER,
|
||||
MD_BITMAP, /* TODO */
|
||||
};
|
||||
|
||||
enum md_submodule_id {
|
||||
ID_LINEAR = LEVEL_LINEAR,
|
||||
ID_RAID0 = 0,
|
||||
ID_RAID1 = 1,
|
||||
ID_RAID4 = 4,
|
||||
ID_RAID5 = 5,
|
||||
ID_RAID6 = 6,
|
||||
ID_RAID10 = 10,
|
||||
ID_CLUSTER,
|
||||
ID_BITMAP, /* TODO */
|
||||
ID_LLBITMAP, /* TODO */
|
||||
};
|
||||
|
||||
struct md_submodule_head {
|
||||
enum md_submodule_type type;
|
||||
enum md_submodule_id id;
|
||||
const char *name;
|
||||
struct module *owner;
|
||||
};
|
||||
|
||||
/*
|
||||
* These flags should really be called "NO_RETRY" rather than
|
||||
* "FAILFAST" because they don't make any promise about time lapse,
|
||||
|
@ -266,8 +292,8 @@ enum flag_bits {
|
|||
Nonrot, /* non-rotational device (SSD) */
|
||||
};
|
||||
|
||||
static inline int is_badblock(struct md_rdev *rdev, sector_t s, int sectors,
|
||||
sector_t *first_bad, int *bad_sectors)
|
||||
static inline int is_badblock(struct md_rdev *rdev, sector_t s, sector_t sectors,
|
||||
sector_t *first_bad, sector_t *bad_sectors)
|
||||
{
|
||||
if (unlikely(rdev->badblocks.count)) {
|
||||
int rv = badblocks_check(&rdev->badblocks, rdev->data_offset + s,
|
||||
|
@ -284,16 +310,17 @@ static inline int rdev_has_badblock(struct md_rdev *rdev, sector_t s,
|
|||
int sectors)
|
||||
{
|
||||
sector_t first_bad;
|
||||
int bad_sectors;
|
||||
sector_t bad_sectors;
|
||||
|
||||
return is_badblock(rdev, s, sectors, &first_bad, &bad_sectors);
|
||||
}
|
||||
|
||||
extern int rdev_set_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
|
||||
extern bool rdev_set_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
|
||||
int is_new);
|
||||
extern int rdev_clear_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
|
||||
extern void rdev_clear_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
|
||||
int is_new);
|
||||
struct md_cluster_info;
|
||||
struct md_cluster_operations;
|
||||
|
||||
/**
|
||||
* enum mddev_flags - md device flags.
|
||||
|
@ -576,6 +603,7 @@ struct mddev {
|
|||
mempool_t *serial_info_pool;
|
||||
void (*sync_super)(struct mddev *mddev, struct md_rdev *rdev);
|
||||
struct md_cluster_info *cluster_info;
|
||||
struct md_cluster_operations *cluster_ops;
|
||||
unsigned int good_device_nr; /* good device num within cluster raid */
|
||||
unsigned int noio_flag; /* for memalloc scope API */
|
||||
|
||||
|
@ -699,10 +727,8 @@ static inline void md_sync_acct_bio(struct bio *bio, unsigned long nr_sectors)
|
|||
|
||||
struct md_personality
|
||||
{
|
||||
char *name;
|
||||
int level;
|
||||
struct list_head list;
|
||||
struct module *owner;
|
||||
struct md_submodule_head head;
|
||||
|
||||
bool __must_check (*make_request)(struct mddev *mddev, struct bio *bio);
|
||||
/*
|
||||
* start up works that do NOT require md_thread. tasks that
|
||||
|
@ -843,13 +869,9 @@ static inline void safe_put_page(struct page *p)
|
|||
if (p) put_page(p);
|
||||
}
|
||||
|
||||
extern int register_md_personality(struct md_personality *p);
|
||||
extern int unregister_md_personality(struct md_personality *p);
|
||||
extern int register_md_cluster_operations(const struct md_cluster_operations *ops,
|
||||
struct module *module);
|
||||
extern int unregister_md_cluster_operations(void);
|
||||
extern int md_setup_cluster(struct mddev *mddev, int nodes);
|
||||
extern void md_cluster_stop(struct mddev *mddev);
|
||||
int register_md_submodule(struct md_submodule_head *msh);
|
||||
void unregister_md_submodule(struct md_submodule_head *msh);
|
||||
|
||||
extern struct md_thread *md_register_thread(
|
||||
void (*run)(struct md_thread *thread),
|
||||
struct mddev *mddev,
|
||||
|
@ -906,7 +928,6 @@ extern void md_idle_sync_thread(struct mddev *mddev);
|
|||
extern void md_frozen_sync_thread(struct mddev *mddev);
|
||||
extern void md_unfrozen_sync_thread(struct mddev *mddev);
|
||||
|
||||
extern void md_reload_sb(struct mddev *mddev, int raid_disk);
|
||||
extern void md_update_sb(struct mddev *mddev, int force);
|
||||
extern void mddev_create_serial_pool(struct mddev *mddev, struct md_rdev *rdev);
|
||||
extern void mddev_destroy_serial_pool(struct mddev *mddev,
|
||||
|
@ -928,7 +949,6 @@ static inline void rdev_dec_pending(struct md_rdev *rdev, struct mddev *mddev)
|
|||
}
|
||||
}
|
||||
|
||||
extern const struct md_cluster_operations *md_cluster_ops;
|
||||
static inline int mddev_is_clustered(struct mddev *mddev)
|
||||
{
|
||||
return mddev->cluster_info && mddev->bitmap_info.nodes > 1;
|
||||
|
|
|
@ -809,9 +809,13 @@ static void raid0_quiesce(struct mddev *mddev, int quiesce)
|
|||
|
||||
static struct md_personality raid0_personality=
|
||||
{
|
||||
.head = {
|
||||
.type = MD_PERSONALITY,
|
||||
.id = ID_RAID0,
|
||||
.name = "raid0",
|
||||
.level = 0,
|
||||
.owner = THIS_MODULE,
|
||||
},
|
||||
|
||||
.make_request = raid0_make_request,
|
||||
.run = raid0_run,
|
||||
.free = raid0_free,
|
||||
|
@ -822,14 +826,14 @@ static struct md_personality raid0_personality=
|
|||
.error_handler = raid0_error,
|
||||
};
|
||||
|
||||
static int __init raid0_init (void)
|
||||
static int __init raid0_init(void)
|
||||
{
|
||||
return register_md_personality (&raid0_personality);
|
||||
return register_md_submodule(&raid0_personality.head);
|
||||
}
|
||||
|
||||
static void raid0_exit (void)
|
||||
static void __exit raid0_exit(void)
|
||||
{
|
||||
unregister_md_personality (&raid0_personality);
|
||||
unregister_md_submodule(&raid0_personality.head);
|
||||
}
|
||||
|
||||
module_init(raid0_init);
|
||||
|
|
|
@ -247,7 +247,7 @@ static inline int raid1_check_read_range(struct md_rdev *rdev,
|
|||
sector_t this_sector, int *len)
|
||||
{
|
||||
sector_t first_bad;
|
||||
int bad_sectors;
|
||||
sector_t bad_sectors;
|
||||
|
||||
/* no bad block overlap */
|
||||
if (!is_badblock(rdev, this_sector, *len, &first_bad, &bad_sectors))
|
||||
|
@ -287,7 +287,7 @@ static inline bool raid1_should_read_first(struct mddev *mddev,
|
|||
return true;
|
||||
|
||||
if (mddev_is_clustered(mddev) &&
|
||||
md_cluster_ops->area_resyncing(mddev, READ, this_sector,
|
||||
mddev->cluster_ops->area_resyncing(mddev, READ, this_sector,
|
||||
this_sector + len))
|
||||
return true;
|
||||
|
||||
|
|
|
@ -36,6 +36,7 @@
|
|||
#include "md.h"
|
||||
#include "raid1.h"
|
||||
#include "md-bitmap.h"
|
||||
#include "md-cluster.h"
|
||||
|
||||
#define UNSUPPORTED_MDDEV_FLAGS \
|
||||
((1L << MD_HAS_JOURNAL) | \
|
||||
|
@ -45,6 +46,7 @@
|
|||
|
||||
static void allow_barrier(struct r1conf *conf, sector_t sector_nr);
|
||||
static void lower_barrier(struct r1conf *conf, sector_t sector_nr);
|
||||
static void raid1_free(struct mddev *mddev, void *priv);
|
||||
|
||||
#define RAID_1_10_NAME "raid1"
|
||||
#include "raid1-10.c"
|
||||
|
@ -1315,8 +1317,6 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio,
|
|||
struct r1conf *conf = mddev->private;
|
||||
struct raid1_info *mirror;
|
||||
struct bio *read_bio;
|
||||
const enum req_op op = bio_op(bio);
|
||||
const blk_opf_t do_sync = bio->bi_opf & REQ_SYNC;
|
||||
int max_sectors;
|
||||
int rdisk, error;
|
||||
bool r1bio_existed = !!r1_bio;
|
||||
|
@ -1404,7 +1404,6 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio,
|
|||
read_bio->bi_iter.bi_sector = r1_bio->sector +
|
||||
mirror->rdev->data_offset;
|
||||
read_bio->bi_end_io = raid1_end_read_request;
|
||||
read_bio->bi_opf = op | do_sync;
|
||||
if (test_bit(FailFast, &mirror->rdev->flags) &&
|
||||
test_bit(R1BIO_FailFast, &r1_bio->state))
|
||||
read_bio->bi_opf |= MD_FAILFAST;
|
||||
|
@ -1467,7 +1466,7 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
|
|||
bool is_discard = (bio_op(bio) == REQ_OP_DISCARD);
|
||||
|
||||
if (mddev_is_clustered(mddev) &&
|
||||
md_cluster_ops->area_resyncing(mddev, WRITE,
|
||||
mddev->cluster_ops->area_resyncing(mddev, WRITE,
|
||||
bio->bi_iter.bi_sector, bio_end_sector(bio))) {
|
||||
|
||||
DEFINE_WAIT(w);
|
||||
|
@ -1478,7 +1477,7 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
|
|||
for (;;) {
|
||||
prepare_to_wait(&conf->wait_barrier,
|
||||
&w, TASK_IDLE);
|
||||
if (!md_cluster_ops->area_resyncing(mddev, WRITE,
|
||||
if (!mddev->cluster_ops->area_resyncing(mddev, WRITE,
|
||||
bio->bi_iter.bi_sector,
|
||||
bio_end_sector(bio)))
|
||||
break;
|
||||
|
@ -1537,7 +1536,7 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
|
|||
atomic_inc(&rdev->nr_pending);
|
||||
if (test_bit(WriteErrorSeen, &rdev->flags)) {
|
||||
sector_t first_bad;
|
||||
int bad_sectors;
|
||||
sector_t bad_sectors;
|
||||
int is_bad;
|
||||
|
||||
is_bad = is_badblock(rdev, r1_bio->sector, max_sectors,
|
||||
|
@ -1653,8 +1652,6 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
|
|||
|
||||
mbio->bi_iter.bi_sector = (r1_bio->sector + rdev->data_offset);
|
||||
mbio->bi_end_io = raid1_end_write_request;
|
||||
mbio->bi_opf = bio_op(bio) |
|
||||
(bio->bi_opf & (REQ_SYNC | REQ_FUA | REQ_ATOMIC));
|
||||
if (test_bit(FailFast, &rdev->flags) &&
|
||||
!test_bit(WriteMostly, &rdev->flags) &&
|
||||
conf->raid_disks - mddev->degraded > 1)
|
||||
|
@ -2486,7 +2483,7 @@ static void fix_read_error(struct r1conf *conf, struct r1bio *r1_bio)
|
|||
}
|
||||
}
|
||||
|
||||
static int narrow_write_error(struct r1bio *r1_bio, int i)
|
||||
static bool narrow_write_error(struct r1bio *r1_bio, int i)
|
||||
{
|
||||
struct mddev *mddev = r1_bio->mddev;
|
||||
struct r1conf *conf = mddev->private;
|
||||
|
@ -2507,10 +2504,10 @@ static int narrow_write_error(struct r1bio *r1_bio, int i)
|
|||
sector_t sector;
|
||||
int sectors;
|
||||
int sect_to_write = r1_bio->sectors;
|
||||
int ok = 1;
|
||||
bool ok = true;
|
||||
|
||||
if (rdev->badblocks.shift < 0)
|
||||
return 0;
|
||||
return false;
|
||||
|
||||
block_sectors = roundup(1 << rdev->badblocks.shift,
|
||||
bdev_logical_block_size(rdev->bdev) >> 9);
|
||||
|
@ -2886,7 +2883,7 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr,
|
|||
} else {
|
||||
/* may need to read from here */
|
||||
sector_t first_bad = MaxSector;
|
||||
int bad_sectors;
|
||||
sector_t bad_sectors;
|
||||
|
||||
if (is_badblock(rdev, sector_nr, good_sectors,
|
||||
&first_bad, &bad_sectors)) {
|
||||
|
@ -3038,7 +3035,7 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr,
|
|||
conf->cluster_sync_low = mddev->curr_resync_completed;
|
||||
conf->cluster_sync_high = conf->cluster_sync_low + CLUSTER_RESYNC_WINDOW_SECTORS;
|
||||
/* Send resync message */
|
||||
md_cluster_ops->resync_info_update(mddev,
|
||||
mddev->cluster_ops->resync_info_update(mddev,
|
||||
conf->cluster_sync_low,
|
||||
conf->cluster_sync_high);
|
||||
}
|
||||
|
@ -3256,9 +3253,12 @@ static int raid1_run(struct mddev *mddev)
|
|||
|
||||
if (!mddev_is_dm(mddev)) {
|
||||
ret = raid1_set_limits(mddev);
|
||||
if (ret)
|
||||
if (ret) {
|
||||
if (!mddev->private)
|
||||
raid1_free(mddev, conf);
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
mddev->degraded = 0;
|
||||
for (i = 0; i < conf->raid_disks; i++)
|
||||
|
@ -3271,6 +3271,8 @@ static int raid1_run(struct mddev *mddev)
|
|||
*/
|
||||
if (conf->raid_disks - mddev->degraded < 1) {
|
||||
md_unregister_thread(mddev, &conf->thread);
|
||||
if (!mddev->private)
|
||||
raid1_free(mddev, conf);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
|
@ -3491,9 +3493,13 @@ static void *raid1_takeover(struct mddev *mddev)
|
|||
|
||||
static struct md_personality raid1_personality =
|
||||
{
|
||||
.head = {
|
||||
.type = MD_PERSONALITY,
|
||||
.id = ID_RAID1,
|
||||
.name = "raid1",
|
||||
.level = 1,
|
||||
.owner = THIS_MODULE,
|
||||
},
|
||||
|
||||
.make_request = raid1_make_request,
|
||||
.run = raid1_run,
|
||||
.free = raid1_free,
|
||||
|
@ -3510,18 +3516,18 @@ static struct md_personality raid1_personality =
|
|||
.takeover = raid1_takeover,
|
||||
};
|
||||
|
||||
static int __init raid_init(void)
|
||||
static int __init raid1_init(void)
|
||||
{
|
||||
return register_md_personality(&raid1_personality);
|
||||
return register_md_submodule(&raid1_personality.head);
|
||||
}
|
||||
|
||||
static void raid_exit(void)
|
||||
static void __exit raid1_exit(void)
|
||||
{
|
||||
unregister_md_personality(&raid1_personality);
|
||||
unregister_md_submodule(&raid1_personality.head);
|
||||
}
|
||||
|
||||
module_init(raid_init);
|
||||
module_exit(raid_exit);
|
||||
module_init(raid1_init);
|
||||
module_exit(raid1_exit);
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_DESCRIPTION("RAID1 (mirroring) personality for MD");
|
||||
MODULE_ALIAS("md-personality-3"); /* RAID1 */
|
||||
|
|
|
@ -24,6 +24,7 @@
|
|||
#include "raid10.h"
|
||||
#include "raid0.h"
|
||||
#include "md-bitmap.h"
|
||||
#include "md-cluster.h"
|
||||
|
||||
/*
|
||||
* RAID10 provides a combination of RAID0 and RAID1 functionality.
|
||||
|
@ -747,7 +748,7 @@ static struct md_rdev *read_balance(struct r10conf *conf,
|
|||
|
||||
for (slot = 0; slot < conf->copies ; slot++) {
|
||||
sector_t first_bad;
|
||||
int bad_sectors;
|
||||
sector_t bad_sectors;
|
||||
sector_t dev_sector;
|
||||
unsigned int pending;
|
||||
bool nonrot;
|
||||
|
@ -1146,8 +1147,6 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio,
|
|||
{
|
||||
struct r10conf *conf = mddev->private;
|
||||
struct bio *read_bio;
|
||||
const enum req_op op = bio_op(bio);
|
||||
const blk_opf_t do_sync = bio->bi_opf & REQ_SYNC;
|
||||
int max_sectors;
|
||||
struct md_rdev *rdev;
|
||||
char b[BDEVNAME_SIZE];
|
||||
|
@ -1228,7 +1227,6 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio,
|
|||
read_bio->bi_iter.bi_sector = r10_bio->devs[slot].addr +
|
||||
choose_data_offset(r10_bio, rdev);
|
||||
read_bio->bi_end_io = raid10_end_read_request;
|
||||
read_bio->bi_opf = op | do_sync;
|
||||
if (test_bit(FailFast, &rdev->flags) &&
|
||||
test_bit(R10BIO_FailFast, &r10_bio->state))
|
||||
read_bio->bi_opf |= MD_FAILFAST;
|
||||
|
@ -1247,10 +1245,6 @@ static void raid10_write_one_disk(struct mddev *mddev, struct r10bio *r10_bio,
|
|||
struct bio *bio, bool replacement,
|
||||
int n_copy)
|
||||
{
|
||||
const enum req_op op = bio_op(bio);
|
||||
const blk_opf_t do_sync = bio->bi_opf & REQ_SYNC;
|
||||
const blk_opf_t do_fua = bio->bi_opf & REQ_FUA;
|
||||
const blk_opf_t do_atomic = bio->bi_opf & REQ_ATOMIC;
|
||||
unsigned long flags;
|
||||
struct r10conf *conf = mddev->private;
|
||||
struct md_rdev *rdev;
|
||||
|
@ -1269,7 +1263,6 @@ static void raid10_write_one_disk(struct mddev *mddev, struct r10bio *r10_bio,
|
|||
mbio->bi_iter.bi_sector = (r10_bio->devs[n_copy].addr +
|
||||
choose_data_offset(r10_bio, rdev));
|
||||
mbio->bi_end_io = raid10_end_write_request;
|
||||
mbio->bi_opf = op | do_sync | do_fua | do_atomic;
|
||||
if (!replacement && test_bit(FailFast,
|
||||
&conf->mirrors[devnum].rdev->flags)
|
||||
&& enough(conf, devnum))
|
||||
|
@ -1355,7 +1348,7 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio,
|
|||
int error;
|
||||
|
||||
if ((mddev_is_clustered(mddev) &&
|
||||
md_cluster_ops->area_resyncing(mddev, WRITE,
|
||||
mddev->cluster_ops->area_resyncing(mddev, WRITE,
|
||||
bio->bi_iter.bi_sector,
|
||||
bio_end_sector(bio)))) {
|
||||
DEFINE_WAIT(w);
|
||||
|
@ -1367,7 +1360,7 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio,
|
|||
for (;;) {
|
||||
prepare_to_wait(&conf->wait_barrier,
|
||||
&w, TASK_IDLE);
|
||||
if (!md_cluster_ops->area_resyncing(mddev, WRITE,
|
||||
if (!mddev->cluster_ops->area_resyncing(mddev, WRITE,
|
||||
bio->bi_iter.bi_sector, bio_end_sector(bio)))
|
||||
break;
|
||||
schedule();
|
||||
|
@ -1438,7 +1431,7 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio,
|
|||
if (rdev && test_bit(WriteErrorSeen, &rdev->flags)) {
|
||||
sector_t first_bad;
|
||||
sector_t dev_sector = r10_bio->devs[i].addr;
|
||||
int bad_sectors;
|
||||
sector_t bad_sectors;
|
||||
int is_bad;
|
||||
|
||||
is_bad = is_badblock(rdev, dev_sector, max_sectors,
|
||||
|
@ -1631,11 +1624,10 @@ static int raid10_handle_discard(struct mddev *mddev, struct bio *bio)
|
|||
if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
|
||||
return -EAGAIN;
|
||||
|
||||
if (WARN_ON_ONCE(bio->bi_opf & REQ_NOWAIT)) {
|
||||
if (!wait_barrier(conf, bio->bi_opf & REQ_NOWAIT)) {
|
||||
bio_wouldblock_error(bio);
|
||||
return 0;
|
||||
}
|
||||
wait_barrier(conf, false);
|
||||
|
||||
/*
|
||||
* Check reshape again to avoid reshape happens after checking
|
||||
|
@ -2786,7 +2778,7 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
|
|||
}
|
||||
}
|
||||
|
||||
static int narrow_write_error(struct r10bio *r10_bio, int i)
|
||||
static bool narrow_write_error(struct r10bio *r10_bio, int i)
|
||||
{
|
||||
struct bio *bio = r10_bio->master_bio;
|
||||
struct mddev *mddev = r10_bio->mddev;
|
||||
|
@ -2807,10 +2799,10 @@ static int narrow_write_error(struct r10bio *r10_bio, int i)
|
|||
sector_t sector;
|
||||
int sectors;
|
||||
int sect_to_write = r10_bio->sectors;
|
||||
int ok = 1;
|
||||
bool ok = true;
|
||||
|
||||
if (rdev->badblocks.shift < 0)
|
||||
return 0;
|
||||
return false;
|
||||
|
||||
block_sectors = roundup(1 << rdev->badblocks.shift,
|
||||
bdev_logical_block_size(rdev->bdev) >> 9);
|
||||
|
@ -3413,7 +3405,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
|
|||
sector_t from_addr, to_addr;
|
||||
struct md_rdev *rdev = conf->mirrors[d].rdev;
|
||||
sector_t sector, first_bad;
|
||||
int bad_sectors;
|
||||
sector_t bad_sectors;
|
||||
if (!rdev ||
|
||||
!test_bit(In_sync, &rdev->flags))
|
||||
continue;
|
||||
|
@ -3609,7 +3601,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
|
|||
for (i = 0; i < conf->copies; i++) {
|
||||
int d = r10_bio->devs[i].devnum;
|
||||
sector_t first_bad, sector;
|
||||
int bad_sectors;
|
||||
sector_t bad_sectors;
|
||||
struct md_rdev *rdev;
|
||||
|
||||
if (r10_bio->devs[i].repl_bio)
|
||||
|
@ -3716,7 +3708,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
|
|||
conf->cluster_sync_low = mddev->curr_resync_completed;
|
||||
raid10_set_cluster_sync_high(conf);
|
||||
/* Send resync message */
|
||||
md_cluster_ops->resync_info_update(mddev,
|
||||
mddev->cluster_ops->resync_info_update(mddev,
|
||||
conf->cluster_sync_low,
|
||||
conf->cluster_sync_high);
|
||||
}
|
||||
|
@ -3749,7 +3741,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
|
|||
}
|
||||
if (broadcast_msg) {
|
||||
raid10_set_cluster_sync_high(conf);
|
||||
md_cluster_ops->resync_info_update(mddev,
|
||||
mddev->cluster_ops->resync_info_update(mddev,
|
||||
conf->cluster_sync_low,
|
||||
conf->cluster_sync_high);
|
||||
}
|
||||
|
@ -4541,7 +4533,7 @@ static int raid10_start_reshape(struct mddev *mddev)
|
|||
if (ret)
|
||||
goto abort;
|
||||
|
||||
ret = md_cluster_ops->resize_bitmaps(mddev, newsize, oldsize);
|
||||
ret = mddev->cluster_ops->resize_bitmaps(mddev, newsize, oldsize);
|
||||
if (ret) {
|
||||
mddev->bitmap_ops->resize(mddev, oldsize, 0, false);
|
||||
goto abort;
|
||||
|
@ -4832,7 +4824,7 @@ read_more:
|
|||
conf->cluster_sync_low = sb_reshape_pos;
|
||||
}
|
||||
|
||||
md_cluster_ops->resync_info_update(mddev, conf->cluster_sync_low,
|
||||
mddev->cluster_ops->resync_info_update(mddev, conf->cluster_sync_low,
|
||||
conf->cluster_sync_high);
|
||||
}
|
||||
|
||||
|
@ -4977,7 +4969,7 @@ static void raid10_update_reshape_pos(struct mddev *mddev)
|
|||
struct r10conf *conf = mddev->private;
|
||||
sector_t lo, hi;
|
||||
|
||||
md_cluster_ops->resync_info_get(mddev, &lo, &hi);
|
||||
mddev->cluster_ops->resync_info_get(mddev, &lo, &hi);
|
||||
if (((mddev->reshape_position <= hi) && (mddev->reshape_position >= lo))
|
||||
|| mddev->reshape_position == MaxSector)
|
||||
conf->reshape_progress = mddev->reshape_position;
|
||||
|
@ -5123,9 +5115,13 @@ static void raid10_finish_reshape(struct mddev *mddev)
|
|||
|
||||
static struct md_personality raid10_personality =
|
||||
{
|
||||
.head = {
|
||||
.type = MD_PERSONALITY,
|
||||
.id = ID_RAID10,
|
||||
.name = "raid10",
|
||||
.level = 10,
|
||||
.owner = THIS_MODULE,
|
||||
},
|
||||
|
||||
.make_request = raid10_make_request,
|
||||
.run = raid10_run,
|
||||
.free = raid10_free,
|
||||
|
@ -5145,18 +5141,18 @@ static struct md_personality raid10_personality =
|
|||
.update_reshape_pos = raid10_update_reshape_pos,
|
||||
};
|
||||
|
||||
static int __init raid_init(void)
|
||||
static int __init raid10_init(void)
|
||||
{
|
||||
return register_md_personality(&raid10_personality);
|
||||
return register_md_submodule(&raid10_personality.head);
|
||||
}
|
||||
|
||||
static void raid_exit(void)
|
||||
static void __exit raid10_exit(void)
|
||||
{
|
||||
unregister_md_personality(&raid10_personality);
|
||||
unregister_md_submodule(&raid10_personality.head);
|
||||
}
|
||||
|
||||
module_init(raid_init);
|
||||
module_exit(raid_exit);
|
||||
module_init(raid10_init);
|
||||
module_exit(raid10_exit);
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_DESCRIPTION("RAID10 (striped mirror) personality for MD");
|
||||
MODULE_ALIAS("md-personality-9"); /* RAID10 */
|
||||
|
|
|
@ -5858,6 +5858,9 @@ static enum reshape_loc get_reshape_loc(struct mddev *mddev,
|
|||
struct r5conf *conf, sector_t logical_sector)
|
||||
{
|
||||
sector_t reshape_progress, reshape_safe;
|
||||
|
||||
if (likely(conf->reshape_progress == MaxSector))
|
||||
return LOC_NO_RESHAPE;
|
||||
/*
|
||||
* Spinlock is needed as reshape_progress may be
|
||||
* 64bit on a 32bit platform, and so it might be
|
||||
|
@ -5935,22 +5938,19 @@ static enum stripe_result make_stripe_request(struct mddev *mddev,
|
|||
const int rw = bio_data_dir(bi);
|
||||
enum stripe_result ret;
|
||||
struct stripe_head *sh;
|
||||
enum reshape_loc loc;
|
||||
sector_t new_sector;
|
||||
int previous = 0, flags = 0;
|
||||
int seq, dd_idx;
|
||||
|
||||
seq = read_seqcount_begin(&conf->gen_lock);
|
||||
|
||||
if (unlikely(conf->reshape_progress != MaxSector)) {
|
||||
enum reshape_loc loc = get_reshape_loc(mddev, conf,
|
||||
logical_sector);
|
||||
loc = get_reshape_loc(mddev, conf, logical_sector);
|
||||
if (loc == LOC_INSIDE_RESHAPE) {
|
||||
ret = STRIPE_SCHEDULE_AND_RETRY;
|
||||
goto out;
|
||||
}
|
||||
if (loc == LOC_AHEAD_OF_RESHAPE)
|
||||
previous = 1;
|
||||
}
|
||||
|
||||
new_sector = raid5_compute_sector(conf, logical_sector, previous,
|
||||
&dd_idx, NULL);
|
||||
|
@ -6127,7 +6127,6 @@ static bool raid5_make_request(struct mddev *mddev, struct bio * bi)
|
|||
|
||||
/* Bail out if conflicts with reshape and REQ_NOWAIT is set */
|
||||
if ((bi->bi_opf & REQ_NOWAIT) &&
|
||||
(conf->reshape_progress != MaxSector) &&
|
||||
get_reshape_loc(mddev, conf, logical_sector) == LOC_INSIDE_RESHAPE) {
|
||||
bio_wouldblock_error(bi);
|
||||
if (rw == WRITE)
|
||||
|
@ -8954,9 +8953,13 @@ static void raid5_prepare_suspend(struct mddev *mddev)
|
|||
|
||||
static struct md_personality raid6_personality =
|
||||
{
|
||||
.head = {
|
||||
.type = MD_PERSONALITY,
|
||||
.id = ID_RAID6,
|
||||
.name = "raid6",
|
||||
.level = 6,
|
||||
.owner = THIS_MODULE,
|
||||
},
|
||||
|
||||
.make_request = raid5_make_request,
|
||||
.run = raid5_run,
|
||||
.start = raid5_start,
|
||||
|
@ -8980,9 +8983,13 @@ static struct md_personality raid6_personality =
|
|||
};
|
||||
static struct md_personality raid5_personality =
|
||||
{
|
||||
.head = {
|
||||
.type = MD_PERSONALITY,
|
||||
.id = ID_RAID5,
|
||||
.name = "raid5",
|
||||
.level = 5,
|
||||
.owner = THIS_MODULE,
|
||||
},
|
||||
|
||||
.make_request = raid5_make_request,
|
||||
.run = raid5_run,
|
||||
.start = raid5_start,
|
||||
|
@ -9007,9 +9014,13 @@ static struct md_personality raid5_personality =
|
|||
|
||||
static struct md_personality raid4_personality =
|
||||
{
|
||||
.head = {
|
||||
.type = MD_PERSONALITY,
|
||||
.id = ID_RAID4,
|
||||
.name = "raid4",
|
||||
.level = 4,
|
||||
.owner = THIS_MODULE,
|
||||
},
|
||||
|
||||
.make_request = raid5_make_request,
|
||||
.run = raid5_run,
|
||||
.start = raid5_start,
|
||||
|
@ -9045,21 +9056,39 @@ static int __init raid5_init(void)
|
|||
"md/raid5:prepare",
|
||||
raid456_cpu_up_prepare,
|
||||
raid456_cpu_dead);
|
||||
if (ret) {
|
||||
if (ret)
|
||||
goto err_destroy_wq;
|
||||
|
||||
ret = register_md_submodule(&raid6_personality.head);
|
||||
if (ret)
|
||||
goto err_cpuhp_remove;
|
||||
|
||||
ret = register_md_submodule(&raid5_personality.head);
|
||||
if (ret)
|
||||
goto err_unregister_raid6;
|
||||
|
||||
ret = register_md_submodule(&raid4_personality.head);
|
||||
if (ret)
|
||||
goto err_unregister_raid5;
|
||||
|
||||
return 0;
|
||||
|
||||
err_unregister_raid5:
|
||||
unregister_md_submodule(&raid5_personality.head);
|
||||
err_unregister_raid6:
|
||||
unregister_md_submodule(&raid6_personality.head);
|
||||
err_cpuhp_remove:
|
||||
cpuhp_remove_multi_state(CPUHP_MD_RAID5_PREPARE);
|
||||
err_destroy_wq:
|
||||
destroy_workqueue(raid5_wq);
|
||||
return ret;
|
||||
}
|
||||
register_md_personality(&raid6_personality);
|
||||
register_md_personality(&raid5_personality);
|
||||
register_md_personality(&raid4_personality);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void raid5_exit(void)
|
||||
static void __exit raid5_exit(void)
|
||||
{
|
||||
unregister_md_personality(&raid6_personality);
|
||||
unregister_md_personality(&raid5_personality);
|
||||
unregister_md_personality(&raid4_personality);
|
||||
unregister_md_submodule(&raid6_personality.head);
|
||||
unregister_md_submodule(&raid5_personality.head);
|
||||
unregister_md_submodule(&raid4_personality.head);
|
||||
cpuhp_remove_multi_state(CPUHP_MD_RAID5_PREPARE);
|
||||
destroy_workqueue(raid5_wq);
|
||||
}
|
||||
|
|
|
@ -1904,7 +1904,7 @@ static void msb_io_work(struct work_struct *work)
|
|||
|
||||
/* process the request */
|
||||
dbg_verbose("IO: processing new request");
|
||||
blk_rq_map_sg(msb->queue, req, sg);
|
||||
blk_rq_map_sg(req, sg);
|
||||
|
||||
lba = blk_rq_pos(req);
|
||||
|
||||
|
|
|
@ -627,9 +627,7 @@ static int mspro_block_issue_req(struct memstick_dev *card)
|
|||
while (true) {
|
||||
msb->current_page = 0;
|
||||
msb->current_seg = 0;
|
||||
msb->seg_count = blk_rq_map_sg(msb->block_req->q,
|
||||
msb->block_req,
|
||||
msb->req_sg);
|
||||
msb->seg_count = blk_rq_map_sg(msb->block_req, msb->req_sg);
|
||||
|
||||
if (!msb->seg_count) {
|
||||
unsigned int bytes = blk_rq_cur_bytes(msb->block_req);
|
||||
|
|
|
@ -523,5 +523,5 @@ unsigned int mmc_queue_map_sg(struct mmc_queue *mq, struct mmc_queue_req *mqrq)
|
|||
{
|
||||
struct request *req = mmc_queue_req_to_req(mqrq);
|
||||
|
||||
return blk_rq_map_sg(mq->queue, req, mqrq->sg);
|
||||
return blk_rq_map_sg(req, mqrq->sg);
|
||||
}
|
||||
|
|
|
@ -84,11 +84,11 @@ static int cqhci_crypto_keyslot_program(struct blk_crypto_profile *profile,
|
|||
|
||||
if (ccap_array[cap_idx].algorithm_id == CQHCI_CRYPTO_ALG_AES_XTS) {
|
||||
/* In XTS mode, the blk_crypto_key's size is already doubled */
|
||||
memcpy(cfg.crypto_key, key->raw, key->size/2);
|
||||
memcpy(cfg.crypto_key, key->bytes, key->size/2);
|
||||
memcpy(cfg.crypto_key + CQHCI_CRYPTO_KEY_MAX_SIZE/2,
|
||||
key->raw + key->size/2, key->size/2);
|
||||
key->bytes + key->size/2, key->size/2);
|
||||
} else {
|
||||
memcpy(cfg.crypto_key, key->raw, key->size);
|
||||
memcpy(cfg.crypto_key, key->bytes, key->size);
|
||||
}
|
||||
|
||||
cqhci_crypto_program_key(cq_host, &cfg, slot);
|
||||
|
@ -204,6 +204,8 @@ int cqhci_crypto_init(struct cqhci_host *cq_host)
|
|||
/* Unfortunately, CQHCI crypto only supports 32 DUN bits. */
|
||||
profile->max_dun_bytes_supported = 4;
|
||||
|
||||
profile->key_types_supported = BLK_CRYPTO_KEY_TYPE_RAW;
|
||||
|
||||
/*
|
||||
* Cache all the crypto capabilities and advertise the supported crypto
|
||||
* modes and data unit sizes to the block layer.
|
||||
|
|
|
@ -1895,6 +1895,7 @@ static int sdhci_msm_ice_init(struct sdhci_msm_host *msm_host,
|
|||
|
||||
profile->ll_ops = sdhci_msm_crypto_ops;
|
||||
profile->max_dun_bytes_supported = 4;
|
||||
profile->key_types_supported = BLK_CRYPTO_KEY_TYPE_RAW;
|
||||
profile->dev = dev;
|
||||
|
||||
/*
|
||||
|
@ -1968,7 +1969,7 @@ static int sdhci_msm_ice_keyslot_program(struct blk_crypto_profile *profile,
|
|||
return qcom_ice_program_key(msm_host->ice,
|
||||
QCOM_ICE_CRYPTO_ALG_AES_XTS,
|
||||
QCOM_ICE_CRYPTO_KEY_SIZE_256,
|
||||
key->raw,
|
||||
key->bytes,
|
||||
key->crypto_cfg.data_unit_size / 512,
|
||||
slot);
|
||||
}
|
||||
|
|
|
@ -199,7 +199,7 @@ static blk_status_t ubiblock_read(struct request *req)
|
|||
* and ubi_read_sg() will check that limit.
|
||||
*/
|
||||
ubi_sgl_init(&pdu->usgl);
|
||||
blk_rq_map_sg(req->q, req, pdu->usgl.sg);
|
||||
blk_rq_map_sg(req, pdu->usgl.sg);
|
||||
|
||||
while (bytes_left) {
|
||||
/*
|
||||
|
|
|
@ -167,7 +167,7 @@ static void set_badblock(struct badblocks *bb, sector_t s, int num)
|
|||
dev_dbg(bb->dev, "Found a bad range (0x%llx, 0x%llx)\n",
|
||||
(u64) s * 512, (u64) num * 512);
|
||||
/* this isn't an error as the hardware will still throw an exception */
|
||||
if (badblocks_set(bb, s, num, 1))
|
||||
if (!badblocks_set(bb, s, num, 1))
|
||||
dev_info_once(bb->dev, "%s: failed for sector %llx\n",
|
||||
__func__, (u64) s);
|
||||
}
|
||||
|
|
|
@ -673,7 +673,7 @@ static inline bool is_bad_pmem(struct badblocks *bb, sector_t sector,
|
|||
{
|
||||
if (bb->count) {
|
||||
sector_t first_bad;
|
||||
int num_bad;
|
||||
sector_t num_bad;
|
||||
|
||||
return !!badblocks_check(bb, sector, len / 512, &first_bad,
|
||||
&num_bad);
|
||||
|
|
|
@ -367,9 +367,10 @@ static int nd_pfn_clear_memmap_errors(struct nd_pfn *nd_pfn)
|
|||
struct nd_namespace_common *ndns = nd_pfn->ndns;
|
||||
void *zero_page = page_address(ZERO_PAGE(0));
|
||||
struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb;
|
||||
int num_bad, meta_num, rc, bb_present;
|
||||
int meta_num, rc, bb_present;
|
||||
sector_t first_bad, meta_start;
|
||||
struct nd_namespace_io *nsio;
|
||||
sector_t num_bad;
|
||||
|
||||
if (nd_pfn->mode != PFN_MODE_PMEM)
|
||||
return 0;
|
||||
|
@ -394,7 +395,7 @@ static int nd_pfn_clear_memmap_errors(struct nd_pfn *nd_pfn)
|
|||
bb_present = badblocks_check(&nd_region->bb, meta_start,
|
||||
meta_num, &first_bad, &num_bad);
|
||||
if (bb_present) {
|
||||
dev_dbg(&nd_pfn->dev, "meta: %x badblocks at %llx\n",
|
||||
dev_dbg(&nd_pfn->dev, "meta: %llx badblocks at %llx\n",
|
||||
num_bad, first_bad);
|
||||
nsoff = ALIGN_DOWN((nd_region->ndr_start
|
||||
+ (first_bad << 9)) - nsio->res.start,
|
||||
|
@ -413,7 +414,7 @@ static int nd_pfn_clear_memmap_errors(struct nd_pfn *nd_pfn)
|
|||
}
|
||||
if (rc) {
|
||||
dev_err(&nd_pfn->dev,
|
||||
"error clearing %x badblocks at %llx\n",
|
||||
"error clearing %llx badblocks at %llx\n",
|
||||
num_bad, first_bad);
|
||||
return rc;
|
||||
}
|
||||
|
|
|
@ -249,7 +249,7 @@ __weak long __pmem_direct_access(struct pmem_device *pmem, pgoff_t pgoff,
|
|||
unsigned int num = PFN_PHYS(nr_pages) >> SECTOR_SHIFT;
|
||||
struct badblocks *bb = &pmem->bb;
|
||||
sector_t first_bad;
|
||||
int num_bad;
|
||||
sector_t num_bad;
|
||||
|
||||
if (kaddr)
|
||||
*kaddr = pmem->virt_addr + offset;
|
||||
|
|
|
@ -12,3 +12,4 @@ config NVME_AUTH
|
|||
select CRYPTO_SHA512
|
||||
select CRYPTO_DH
|
||||
select CRYPTO_DH_RFC7919_GROUPS
|
||||
select CRYPTO_HKDF
|
||||
|
|
|
@ -11,9 +11,12 @@
|
|||
#include <linux/unaligned.h>
|
||||
#include <crypto/hash.h>
|
||||
#include <crypto/dh.h>
|
||||
#include <crypto/hkdf.h>
|
||||
#include <linux/nvme.h>
|
||||
#include <linux/nvme-auth.h>
|
||||
|
||||
#define HKDF_MAX_HASHLEN 64
|
||||
|
||||
static u32 nvme_dhchap_seqnum;
|
||||
static DEFINE_MUTEX(nvme_dhchap_mutex);
|
||||
|
||||
|
@ -471,5 +474,339 @@ int nvme_auth_generate_key(u8 *secret, struct nvme_dhchap_key **ret_key)
|
|||
}
|
||||
EXPORT_SYMBOL_GPL(nvme_auth_generate_key);
|
||||
|
||||
/**
|
||||
* nvme_auth_generate_psk - Generate a PSK for TLS
|
||||
* @hmac_id: Hash function identifier
|
||||
* @skey: Session key
|
||||
* @skey_len: Length of @skey
|
||||
* @c1: Value of challenge C1
|
||||
* @c2: Value of challenge C2
|
||||
* @hash_len: Hash length of the hash algorithm
|
||||
* @ret_psk: Pointer too the resulting generated PSK
|
||||
* @ret_len: length of @ret_psk
|
||||
*
|
||||
* Generate a PSK for TLS as specified in NVMe base specification, section
|
||||
* 8.13.5.9: Generated PSK for TLS
|
||||
*
|
||||
* The generated PSK for TLS shall be computed applying the HMAC function
|
||||
* using the hash function H( ) selected by the HashID parameter in the
|
||||
* DH-HMAC-CHAP_Challenge message with the session key KS as key to the
|
||||
* concatenation of the two challenges C1 and C2 (i.e., generated
|
||||
* PSK = HMAC(KS, C1 || C2)).
|
||||
*
|
||||
* Returns 0 on success with a valid generated PSK pointer in @ret_psk and
|
||||
* the length of @ret_psk in @ret_len, or a negative error number otherwise.
|
||||
*/
|
||||
int nvme_auth_generate_psk(u8 hmac_id, u8 *skey, size_t skey_len,
|
||||
u8 *c1, u8 *c2, size_t hash_len, u8 **ret_psk, size_t *ret_len)
|
||||
{
|
||||
struct crypto_shash *tfm;
|
||||
SHASH_DESC_ON_STACK(shash, tfm);
|
||||
u8 *psk;
|
||||
const char *hmac_name;
|
||||
int ret, psk_len;
|
||||
|
||||
if (!c1 || !c2)
|
||||
return -EINVAL;
|
||||
|
||||
hmac_name = nvme_auth_hmac_name(hmac_id);
|
||||
if (!hmac_name) {
|
||||
pr_warn("%s: invalid hash algorithm %d\n",
|
||||
__func__, hmac_id);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
tfm = crypto_alloc_shash(hmac_name, 0, 0);
|
||||
if (IS_ERR(tfm))
|
||||
return PTR_ERR(tfm);
|
||||
|
||||
psk_len = crypto_shash_digestsize(tfm);
|
||||
psk = kzalloc(psk_len, GFP_KERNEL);
|
||||
if (!psk) {
|
||||
ret = -ENOMEM;
|
||||
goto out_free_tfm;
|
||||
}
|
||||
|
||||
shash->tfm = tfm;
|
||||
ret = crypto_shash_setkey(tfm, skey, skey_len);
|
||||
if (ret)
|
||||
goto out_free_psk;
|
||||
|
||||
ret = crypto_shash_init(shash);
|
||||
if (ret)
|
||||
goto out_free_psk;
|
||||
|
||||
ret = crypto_shash_update(shash, c1, hash_len);
|
||||
if (ret)
|
||||
goto out_free_psk;
|
||||
|
||||
ret = crypto_shash_update(shash, c2, hash_len);
|
||||
if (ret)
|
||||
goto out_free_psk;
|
||||
|
||||
ret = crypto_shash_final(shash, psk);
|
||||
if (!ret) {
|
||||
*ret_psk = psk;
|
||||
*ret_len = psk_len;
|
||||
}
|
||||
|
||||
out_free_psk:
|
||||
if (ret)
|
||||
kfree_sensitive(psk);
|
||||
out_free_tfm:
|
||||
crypto_free_shash(tfm);
|
||||
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nvme_auth_generate_psk);
|
||||
|
||||
/**
|
||||
* nvme_auth_generate_digest - Generate TLS PSK digest
|
||||
* @hmac_id: Hash function identifier
|
||||
* @psk: Generated input PSK
|
||||
* @psk_len: Length of @psk
|
||||
* @subsysnqn: NQN of the subsystem
|
||||
* @hostnqn: NQN of the host
|
||||
* @ret_digest: Pointer to the returned digest
|
||||
*
|
||||
* Generate a TLS PSK digest as specified in TP8018 Section 3.6.1.3:
|
||||
* TLS PSK and PSK identity Derivation
|
||||
*
|
||||
* The PSK digest shall be computed by encoding in Base64 (refer to RFC
|
||||
* 4648) the result of the application of the HMAC function using the hash
|
||||
* function specified in item 4 above (ie the hash function of the cipher
|
||||
* suite associated with the PSK identity) with the PSK as HMAC key to the
|
||||
* concatenation of:
|
||||
* - the NQN of the host (i.e., NQNh) not including the null terminator;
|
||||
* - a space character;
|
||||
* - the NQN of the NVM subsystem (i.e., NQNc) not including the null
|
||||
* terminator;
|
||||
* - a space character; and
|
||||
* - the seventeen ASCII characters "NVMe-over-Fabrics"
|
||||
* (i.e., <PSK digest> = Base64(HMAC(PSK, NQNh || " " || NQNc || " " ||
|
||||
* "NVMe-over-Fabrics"))).
|
||||
* The length of the PSK digest depends on the hash function used to compute
|
||||
* it as follows:
|
||||
* - If the SHA-256 hash function is used, the resulting PSK digest is 44
|
||||
* characters long; or
|
||||
* - If the SHA-384 hash function is used, the resulting PSK digest is 64
|
||||
* characters long.
|
||||
*
|
||||
* Returns 0 on success with a valid digest pointer in @ret_digest, or a
|
||||
* negative error number on failure.
|
||||
*/
|
||||
int nvme_auth_generate_digest(u8 hmac_id, u8 *psk, size_t psk_len,
|
||||
char *subsysnqn, char *hostnqn, u8 **ret_digest)
|
||||
{
|
||||
struct crypto_shash *tfm;
|
||||
SHASH_DESC_ON_STACK(shash, tfm);
|
||||
u8 *digest, *enc;
|
||||
const char *hmac_name;
|
||||
size_t digest_len, hmac_len;
|
||||
int ret;
|
||||
|
||||
if (WARN_ON(!subsysnqn || !hostnqn))
|
||||
return -EINVAL;
|
||||
|
||||
hmac_name = nvme_auth_hmac_name(hmac_id);
|
||||
if (!hmac_name) {
|
||||
pr_warn("%s: invalid hash algorithm %d\n",
|
||||
__func__, hmac_id);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
switch (nvme_auth_hmac_hash_len(hmac_id)) {
|
||||
case 32:
|
||||
hmac_len = 44;
|
||||
break;
|
||||
case 48:
|
||||
hmac_len = 64;
|
||||
break;
|
||||
default:
|
||||
pr_warn("%s: invalid hash algorithm '%s'\n",
|
||||
__func__, hmac_name);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
enc = kzalloc(hmac_len + 1, GFP_KERNEL);
|
||||
if (!enc)
|
||||
return -ENOMEM;
|
||||
|
||||
tfm = crypto_alloc_shash(hmac_name, 0, 0);
|
||||
if (IS_ERR(tfm)) {
|
||||
ret = PTR_ERR(tfm);
|
||||
goto out_free_enc;
|
||||
}
|
||||
|
||||
digest_len = crypto_shash_digestsize(tfm);
|
||||
digest = kzalloc(digest_len, GFP_KERNEL);
|
||||
if (!digest) {
|
||||
ret = -ENOMEM;
|
||||
goto out_free_tfm;
|
||||
}
|
||||
|
||||
shash->tfm = tfm;
|
||||
ret = crypto_shash_setkey(tfm, psk, psk_len);
|
||||
if (ret)
|
||||
goto out_free_digest;
|
||||
|
||||
ret = crypto_shash_init(shash);
|
||||
if (ret)
|
||||
goto out_free_digest;
|
||||
|
||||
ret = crypto_shash_update(shash, hostnqn, strlen(hostnqn));
|
||||
if (ret)
|
||||
goto out_free_digest;
|
||||
|
||||
ret = crypto_shash_update(shash, " ", 1);
|
||||
if (ret)
|
||||
goto out_free_digest;
|
||||
|
||||
ret = crypto_shash_update(shash, subsysnqn, strlen(subsysnqn));
|
||||
if (ret)
|
||||
goto out_free_digest;
|
||||
|
||||
ret = crypto_shash_update(shash, " NVMe-over-Fabrics", 18);
|
||||
if (ret)
|
||||
goto out_free_digest;
|
||||
|
||||
ret = crypto_shash_final(shash, digest);
|
||||
if (ret)
|
||||
goto out_free_digest;
|
||||
|
||||
ret = base64_encode(digest, digest_len, enc);
|
||||
if (ret < hmac_len) {
|
||||
ret = -ENOKEY;
|
||||
goto out_free_digest;
|
||||
}
|
||||
*ret_digest = enc;
|
||||
ret = 0;
|
||||
|
||||
out_free_digest:
|
||||
kfree_sensitive(digest);
|
||||
out_free_tfm:
|
||||
crypto_free_shash(tfm);
|
||||
out_free_enc:
|
||||
if (ret)
|
||||
kfree_sensitive(enc);
|
||||
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nvme_auth_generate_digest);
|
||||
|
||||
/**
|
||||
* nvme_auth_derive_tls_psk - Derive TLS PSK
|
||||
* @hmac_id: Hash function identifier
|
||||
* @psk: generated input PSK
|
||||
* @psk_len: size of @psk
|
||||
* @psk_digest: TLS PSK digest
|
||||
* @ret_psk: Pointer to the resulting TLS PSK
|
||||
*
|
||||
* Derive a TLS PSK as specified in TP8018 Section 3.6.1.3:
|
||||
* TLS PSK and PSK identity Derivation
|
||||
*
|
||||
* The TLS PSK shall be derived as follows from an input PSK
|
||||
* (i.e., either a retained PSK or a generated PSK) and a PSK
|
||||
* identity using the HKDF-Extract and HKDF-Expand-Label operations
|
||||
* (refer to RFC 5869 and RFC 8446) where the hash function is the
|
||||
* one specified by the hash specifier of the PSK identity:
|
||||
* 1. PRK = HKDF-Extract(0, Input PSK); and
|
||||
* 2. TLS PSK = HKDF-Expand-Label(PRK, "nvme-tls-psk", PskIdentityContext, L),
|
||||
* where PskIdentityContext is the hash identifier indicated in
|
||||
* the PSK identity concatenated to a space character and to the
|
||||
* Base64 PSK digest (i.e., "<hash> <PSK digest>") and L is the
|
||||
* output size in bytes of the hash function (i.e., 32 for SHA-256
|
||||
* and 48 for SHA-384).
|
||||
*
|
||||
* Returns 0 on success with a valid psk pointer in @ret_psk or a negative
|
||||
* error number otherwise.
|
||||
*/
|
||||
int nvme_auth_derive_tls_psk(int hmac_id, u8 *psk, size_t psk_len,
|
||||
u8 *psk_digest, u8 **ret_psk)
|
||||
{
|
||||
struct crypto_shash *hmac_tfm;
|
||||
const char *hmac_name;
|
||||
const char *psk_prefix = "tls13 nvme-tls-psk";
|
||||
static const char default_salt[HKDF_MAX_HASHLEN];
|
||||
size_t info_len, prk_len;
|
||||
char *info;
|
||||
unsigned char *prk, *tls_key;
|
||||
int ret;
|
||||
|
||||
hmac_name = nvme_auth_hmac_name(hmac_id);
|
||||
if (!hmac_name) {
|
||||
pr_warn("%s: invalid hash algorithm %d\n",
|
||||
__func__, hmac_id);
|
||||
return -EINVAL;
|
||||
}
|
||||
if (hmac_id == NVME_AUTH_HASH_SHA512) {
|
||||
pr_warn("%s: unsupported hash algorithm %s\n",
|
||||
__func__, hmac_name);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
hmac_tfm = crypto_alloc_shash(hmac_name, 0, 0);
|
||||
if (IS_ERR(hmac_tfm))
|
||||
return PTR_ERR(hmac_tfm);
|
||||
|
||||
prk_len = crypto_shash_digestsize(hmac_tfm);
|
||||
prk = kzalloc(prk_len, GFP_KERNEL);
|
||||
if (!prk) {
|
||||
ret = -ENOMEM;
|
||||
goto out_free_shash;
|
||||
}
|
||||
|
||||
if (WARN_ON(prk_len > HKDF_MAX_HASHLEN)) {
|
||||
ret = -EINVAL;
|
||||
goto out_free_prk;
|
||||
}
|
||||
ret = hkdf_extract(hmac_tfm, psk, psk_len,
|
||||
default_salt, prk_len, prk);
|
||||
if (ret)
|
||||
goto out_free_prk;
|
||||
|
||||
ret = crypto_shash_setkey(hmac_tfm, prk, prk_len);
|
||||
if (ret)
|
||||
goto out_free_prk;
|
||||
|
||||
/*
|
||||
* 2 addtional bytes for the length field from HDKF-Expand-Label,
|
||||
* 2 addtional bytes for the HMAC ID, and one byte for the space
|
||||
* separator.
|
||||
*/
|
||||
info_len = strlen(psk_digest) + strlen(psk_prefix) + 5;
|
||||
info = kzalloc(info_len + 1, GFP_KERNEL);
|
||||
if (!info) {
|
||||
ret = -ENOMEM;
|
||||
goto out_free_prk;
|
||||
}
|
||||
|
||||
put_unaligned_be16(psk_len, info);
|
||||
memcpy(info + 2, psk_prefix, strlen(psk_prefix));
|
||||
sprintf(info + 2 + strlen(psk_prefix), "%02d %s", hmac_id, psk_digest);
|
||||
|
||||
tls_key = kzalloc(psk_len, GFP_KERNEL);
|
||||
if (!tls_key) {
|
||||
ret = -ENOMEM;
|
||||
goto out_free_info;
|
||||
}
|
||||
ret = hkdf_expand(hmac_tfm, info, info_len, tls_key, psk_len);
|
||||
if (ret) {
|
||||
kfree(tls_key);
|
||||
goto out_free_info;
|
||||
}
|
||||
*ret_psk = tls_key;
|
||||
|
||||
out_free_info:
|
||||
kfree(info);
|
||||
out_free_prk:
|
||||
kfree(prk);
|
||||
out_free_shash:
|
||||
crypto_free_shash(hmac_tfm);
|
||||
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nvme_auth_derive_tls_psk);
|
||||
|
||||
MODULE_DESCRIPTION("NVMe Authentication framework");
|
||||
MODULE_LICENSE("GPL v2");
|
||||
|
|
|
@ -5,7 +5,6 @@
|
|||
|
||||
#include <linux/module.h>
|
||||
#include <linux/seq_file.h>
|
||||
#include <linux/key.h>
|
||||
#include <linux/key-type.h>
|
||||
#include <keys/user-type.h>
|
||||
#include <linux/nvme.h>
|
||||
|
@ -124,6 +123,70 @@ static struct key *nvme_tls_psk_lookup(struct key *keyring,
|
|||
return key_ref_to_ptr(keyref);
|
||||
}
|
||||
|
||||
/**
|
||||
* nvme_tls_psk_refresh - Refresh TLS PSK
|
||||
* @keyring: Keyring holding the TLS PSK
|
||||
* @hostnqn: Host NQN to use
|
||||
* @subnqn: Subsystem NQN to use
|
||||
* @hmac_id: Hash function identifier
|
||||
* @data: TLS PSK key material
|
||||
* @data_len: Length of @data
|
||||
* @digest: TLS PSK digest
|
||||
*
|
||||
* Refresh a generated version 1 TLS PSK with the identity generated
|
||||
* from @hmac_id, @hostnqn, @subnqn, and @digest in the keyring given
|
||||
* by @keyring.
|
||||
*
|
||||
* Returns the updated key success or an error pointer otherwise.
|
||||
*/
|
||||
struct key *nvme_tls_psk_refresh(struct key *keyring,
|
||||
const char *hostnqn, const char *subnqn, u8 hmac_id,
|
||||
u8 *data, size_t data_len, const char *digest)
|
||||
{
|
||||
key_perm_t keyperm =
|
||||
KEY_POS_SEARCH | KEY_POS_VIEW | KEY_POS_READ |
|
||||
KEY_POS_WRITE | KEY_POS_LINK | KEY_POS_SETATTR |
|
||||
KEY_USR_SEARCH | KEY_USR_VIEW | KEY_USR_READ;
|
||||
char *identity;
|
||||
key_ref_t keyref;
|
||||
key_serial_t keyring_id;
|
||||
struct key *key;
|
||||
|
||||
if (!hostnqn || !subnqn || !data || !data_len)
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
identity = kasprintf(GFP_KERNEL, "NVMe1G%02d %s %s %s",
|
||||
hmac_id, hostnqn, subnqn, digest);
|
||||
if (!identity)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
if (!keyring)
|
||||
keyring = nvme_keyring;
|
||||
keyring_id = key_serial(keyring);
|
||||
pr_debug("keyring %x refresh tls psk '%s'\n",
|
||||
keyring_id, identity);
|
||||
keyref = key_create_or_update(make_key_ref(keyring, true),
|
||||
"psk", identity, data, data_len,
|
||||
keyperm, KEY_ALLOC_NOT_IN_QUOTA |
|
||||
KEY_ALLOC_BUILT_IN |
|
||||
KEY_ALLOC_BYPASS_RESTRICTION);
|
||||
if (IS_ERR(keyref)) {
|
||||
pr_debug("refresh tls psk '%s' failed, error %ld\n",
|
||||
identity, PTR_ERR(keyref));
|
||||
kfree(identity);
|
||||
return ERR_PTR(-ENOKEY);
|
||||
}
|
||||
kfree(identity);
|
||||
/*
|
||||
* Set the default timeout to 1 hour
|
||||
* as suggested in TP8018.
|
||||
*/
|
||||
key = key_ref_to_ptr(keyref);
|
||||
key_set_timeout(key, 3600);
|
||||
return key;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nvme_tls_psk_refresh);
|
||||
|
||||
/*
|
||||
* NVMe PSK priority list
|
||||
*
|
||||
|
|
|
@ -109,7 +109,7 @@ config NVME_HOST_AUTH
|
|||
bool "NVMe over Fabrics In-Band Authentication in host side"
|
||||
depends on NVME_CORE
|
||||
select NVME_AUTH
|
||||
select NVME_KEYRING if NVME_TCP_TLS
|
||||
select NVME_KEYRING
|
||||
help
|
||||
This provides support for NVMe over Fabrics In-Band Authentication in
|
||||
host side.
|
||||
|
|
|
@ -525,7 +525,7 @@ static blk_status_t apple_nvme_map_data(struct apple_nvme *anv,
|
|||
if (!iod->sg)
|
||||
return BLK_STS_RESOURCE;
|
||||
sg_init_table(iod->sg, blk_rq_nr_phys_segments(req));
|
||||
iod->nents = blk_rq_map_sg(req->q, req, iod->sg);
|
||||
iod->nents = blk_rq_map_sg(req, iod->sg);
|
||||
if (!iod->nents)
|
||||
goto out_free_sg;
|
||||
|
||||
|
|
|
@ -12,6 +12,7 @@
|
|||
#include "nvme.h"
|
||||
#include "fabrics.h"
|
||||
#include <linux/nvme-auth.h>
|
||||
#include <linux/nvme-keyring.h>
|
||||
|
||||
#define CHAP_BUF_SIZE 4096
|
||||
static struct kmem_cache *nvme_chap_buf_cache;
|
||||
|
@ -131,7 +132,13 @@ static int nvme_auth_set_dhchap_negotiate_data(struct nvme_ctrl *ctrl,
|
|||
data->auth_type = NVME_AUTH_COMMON_MESSAGES;
|
||||
data->auth_id = NVME_AUTH_DHCHAP_MESSAGE_NEGOTIATE;
|
||||
data->t_id = cpu_to_le16(chap->transaction);
|
||||
data->sc_c = 0; /* No secure channel concatenation */
|
||||
if (ctrl->opts->concat && chap->qid == 0) {
|
||||
if (ctrl->opts->tls_key)
|
||||
data->sc_c = NVME_AUTH_SECP_REPLACETLSPSK;
|
||||
else
|
||||
data->sc_c = NVME_AUTH_SECP_NEWTLSPSK;
|
||||
} else
|
||||
data->sc_c = NVME_AUTH_SECP_NOSC;
|
||||
data->napd = 1;
|
||||
data->auth_protocol[0].dhchap.authid = NVME_AUTH_DHCHAP_AUTH_ID;
|
||||
data->auth_protocol[0].dhchap.halen = 3;
|
||||
|
@ -311,8 +318,9 @@ static int nvme_auth_set_dhchap_reply_data(struct nvme_ctrl *ctrl,
|
|||
data->hl = chap->hash_len;
|
||||
data->dhvlen = cpu_to_le16(chap->host_key_len);
|
||||
memcpy(data->rval, chap->response, chap->hash_len);
|
||||
if (ctrl->ctrl_key) {
|
||||
if (ctrl->ctrl_key)
|
||||
chap->bi_directional = true;
|
||||
if (ctrl->ctrl_key || ctrl->opts->concat) {
|
||||
get_random_bytes(chap->c2, chap->hash_len);
|
||||
data->cvalid = 1;
|
||||
memcpy(data->rval + chap->hash_len, chap->c2,
|
||||
|
@ -322,6 +330,9 @@ static int nvme_auth_set_dhchap_reply_data(struct nvme_ctrl *ctrl,
|
|||
} else {
|
||||
memset(chap->c2, 0, chap->hash_len);
|
||||
}
|
||||
if (ctrl->opts->concat)
|
||||
chap->s2 = 0;
|
||||
else
|
||||
chap->s2 = nvme_auth_get_seqnum();
|
||||
data->seqnum = cpu_to_le32(chap->s2);
|
||||
if (chap->host_key_len) {
|
||||
|
@ -677,6 +688,92 @@ static void nvme_auth_free_dhchap(struct nvme_dhchap_queue_context *chap)
|
|||
crypto_free_kpp(chap->dh_tfm);
|
||||
}
|
||||
|
||||
void nvme_auth_revoke_tls_key(struct nvme_ctrl *ctrl)
|
||||
{
|
||||
dev_dbg(ctrl->device, "Wipe generated TLS PSK %08x\n",
|
||||
key_serial(ctrl->opts->tls_key));
|
||||
key_revoke(ctrl->opts->tls_key);
|
||||
key_put(ctrl->opts->tls_key);
|
||||
ctrl->opts->tls_key = NULL;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nvme_auth_revoke_tls_key);
|
||||
|
||||
static int nvme_auth_secure_concat(struct nvme_ctrl *ctrl,
|
||||
struct nvme_dhchap_queue_context *chap)
|
||||
{
|
||||
u8 *psk, *digest, *tls_psk;
|
||||
struct key *tls_key;
|
||||
size_t psk_len;
|
||||
int ret = 0;
|
||||
|
||||
if (!chap->sess_key) {
|
||||
dev_warn(ctrl->device,
|
||||
"%s: qid %d no session key negotiated\n",
|
||||
__func__, chap->qid);
|
||||
return -ENOKEY;
|
||||
}
|
||||
|
||||
if (chap->qid) {
|
||||
dev_warn(ctrl->device,
|
||||
"qid %d: secure concatenation not supported on I/O queues\n",
|
||||
chap->qid);
|
||||
return -EINVAL;
|
||||
}
|
||||
ret = nvme_auth_generate_psk(chap->hash_id, chap->sess_key,
|
||||
chap->sess_key_len,
|
||||
chap->c1, chap->c2,
|
||||
chap->hash_len, &psk, &psk_len);
|
||||
if (ret) {
|
||||
dev_warn(ctrl->device,
|
||||
"%s: qid %d failed to generate PSK, error %d\n",
|
||||
__func__, chap->qid, ret);
|
||||
return ret;
|
||||
}
|
||||
dev_dbg(ctrl->device,
|
||||
"%s: generated psk %*ph\n", __func__, (int)psk_len, psk);
|
||||
|
||||
ret = nvme_auth_generate_digest(chap->hash_id, psk, psk_len,
|
||||
ctrl->opts->subsysnqn,
|
||||
ctrl->opts->host->nqn, &digest);
|
||||
if (ret) {
|
||||
dev_warn(ctrl->device,
|
||||
"%s: qid %d failed to generate digest, error %d\n",
|
||||
__func__, chap->qid, ret);
|
||||
goto out_free_psk;
|
||||
};
|
||||
dev_dbg(ctrl->device, "%s: generated digest %s\n",
|
||||
__func__, digest);
|
||||
ret = nvme_auth_derive_tls_psk(chap->hash_id, psk, psk_len,
|
||||
digest, &tls_psk);
|
||||
if (ret) {
|
||||
dev_warn(ctrl->device,
|
||||
"%s: qid %d failed to derive TLS psk, error %d\n",
|
||||
__func__, chap->qid, ret);
|
||||
goto out_free_digest;
|
||||
};
|
||||
|
||||
tls_key = nvme_tls_psk_refresh(ctrl->opts->keyring,
|
||||
ctrl->opts->host->nqn,
|
||||
ctrl->opts->subsysnqn, chap->hash_id,
|
||||
tls_psk, psk_len, digest);
|
||||
if (IS_ERR(tls_key)) {
|
||||
ret = PTR_ERR(tls_key);
|
||||
dev_warn(ctrl->device,
|
||||
"%s: qid %d failed to insert generated key, error %d\n",
|
||||
__func__, chap->qid, ret);
|
||||
tls_key = NULL;
|
||||
}
|
||||
kfree_sensitive(tls_psk);
|
||||
if (ctrl->opts->tls_key)
|
||||
nvme_auth_revoke_tls_key(ctrl);
|
||||
ctrl->opts->tls_key = tls_key;
|
||||
out_free_digest:
|
||||
kfree_sensitive(digest);
|
||||
out_free_psk:
|
||||
kfree_sensitive(psk);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void nvme_queue_auth_work(struct work_struct *work)
|
||||
{
|
||||
struct nvme_dhchap_queue_context *chap =
|
||||
|
@ -833,6 +930,13 @@ static void nvme_queue_auth_work(struct work_struct *work)
|
|||
}
|
||||
if (!ret) {
|
||||
chap->error = 0;
|
||||
if (ctrl->opts->concat &&
|
||||
(ret = nvme_auth_secure_concat(ctrl, chap))) {
|
||||
dev_warn(ctrl->device,
|
||||
"%s: qid %d failed to enable secure concatenation\n",
|
||||
__func__, chap->qid);
|
||||
chap->error = ret;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -912,6 +1016,11 @@ static void nvme_ctrl_auth_work(struct work_struct *work)
|
|||
"qid 0: authentication failed\n");
|
||||
return;
|
||||
}
|
||||
/*
|
||||
* Only run authentication on the admin queue for secure concatenation.
|
||||
*/
|
||||
if (ctrl->opts->concat)
|
||||
return;
|
||||
|
||||
for (q = 1; q < ctrl->queue_count; q++) {
|
||||
ret = nvme_auth_negotiate(ctrl, q);
|
||||
|
|
|
@ -4018,6 +4018,9 @@ static void nvme_ns_remove(struct nvme_ns *ns)
|
|||
|
||||
if (!nvme_ns_head_multipath(ns->head))
|
||||
nvme_cdev_del(&ns->cdev, &ns->cdev_device);
|
||||
|
||||
nvme_mpath_remove_sysfs_link(ns);
|
||||
|
||||
del_gendisk(ns->disk);
|
||||
|
||||
mutex_lock(&ns->ctrl->namespaces_lock);
|
||||
|
|
|
@ -472,8 +472,9 @@ int nvmf_connect_admin_queue(struct nvme_ctrl *ctrl)
|
|||
result = le32_to_cpu(res.u32);
|
||||
ctrl->cntlid = result & 0xFFFF;
|
||||
if (result & (NVME_CONNECT_AUTHREQ_ATR | NVME_CONNECT_AUTHREQ_ASCR)) {
|
||||
/* Secure concatenation is not implemented */
|
||||
if (result & NVME_CONNECT_AUTHREQ_ASCR) {
|
||||
/* Check for secure concatenation */
|
||||
if ((result & NVME_CONNECT_AUTHREQ_ASCR) &&
|
||||
!ctrl->opts->concat) {
|
||||
dev_warn(ctrl->device,
|
||||
"qid 0: secure concatenation is not supported\n");
|
||||
ret = -EOPNOTSUPP;
|
||||
|
@ -550,7 +551,7 @@ int nvmf_connect_io_queue(struct nvme_ctrl *ctrl, u16 qid)
|
|||
/* Secure concatenation is not implemented */
|
||||
if (result & NVME_CONNECT_AUTHREQ_ASCR) {
|
||||
dev_warn(ctrl->device,
|
||||
"qid 0: secure concatenation is not supported\n");
|
||||
"qid %d: secure concatenation is not supported\n", qid);
|
||||
ret = -EOPNOTSUPP;
|
||||
goto out_free_data;
|
||||
}
|
||||
|
@ -706,6 +707,7 @@ static const match_table_t opt_tokens = {
|
|||
#endif
|
||||
#ifdef CONFIG_NVME_TCP_TLS
|
||||
{ NVMF_OPT_TLS, "tls" },
|
||||
{ NVMF_OPT_CONCAT, "concat" },
|
||||
#endif
|
||||
{ NVMF_OPT_ERR, NULL }
|
||||
};
|
||||
|
@ -735,6 +737,7 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
|
|||
opts->tls = false;
|
||||
opts->tls_key = NULL;
|
||||
opts->keyring = NULL;
|
||||
opts->concat = false;
|
||||
|
||||
options = o = kstrdup(buf, GFP_KERNEL);
|
||||
if (!options)
|
||||
|
@ -1053,6 +1056,14 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
|
|||
}
|
||||
opts->tls = true;
|
||||
break;
|
||||
case NVMF_OPT_CONCAT:
|
||||
if (!IS_ENABLED(CONFIG_NVME_TCP_TLS)) {
|
||||
pr_err("TLS is not supported\n");
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
opts->concat = true;
|
||||
break;
|
||||
default:
|
||||
pr_warn("unknown parameter or missing value '%s' in ctrl creation request\n",
|
||||
p);
|
||||
|
@ -1079,6 +1090,23 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
|
|||
pr_warn("failfast tmo (%d) larger than controller loss tmo (%d)\n",
|
||||
opts->fast_io_fail_tmo, ctrl_loss_tmo);
|
||||
}
|
||||
if (opts->concat) {
|
||||
if (opts->tls) {
|
||||
pr_err("Secure concatenation over TLS is not supported\n");
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
if (opts->tls_key) {
|
||||
pr_err("Cannot specify a TLS key for secure concatenation\n");
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
if (!opts->dhchap_secret) {
|
||||
pr_err("Need to enable DH-CHAP for secure concatenation\n");
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
opts->host = nvmf_host_add(hostnqn, &hostid);
|
||||
if (IS_ERR(opts->host)) {
|
||||
|
|
|
@ -66,6 +66,7 @@ enum {
|
|||
NVMF_OPT_TLS = 1 << 25,
|
||||
NVMF_OPT_KEYRING = 1 << 26,
|
||||
NVMF_OPT_TLS_KEY = 1 << 27,
|
||||
NVMF_OPT_CONCAT = 1 << 28,
|
||||
};
|
||||
|
||||
/**
|
||||
|
@ -101,6 +102,7 @@ enum {
|
|||
* @keyring: Keyring to use for key lookups
|
||||
* @tls_key: TLS key for encrypted connections (TCP)
|
||||
* @tls: Start TLS encrypted connections (TCP)
|
||||
* @concat: Enabled Secure channel concatenation (TCP)
|
||||
* @disable_sqflow: disable controller sq flow control
|
||||
* @hdr_digest: generate/verify header digest (TCP)
|
||||
* @data_digest: generate/verify data digest (TCP)
|
||||
|
@ -130,6 +132,7 @@ struct nvmf_ctrl_options {
|
|||
struct key *keyring;
|
||||
struct key *tls_key;
|
||||
bool tls;
|
||||
bool concat;
|
||||
bool disable_sqflow;
|
||||
bool hdr_digest;
|
||||
bool data_digest;
|
||||
|
|
|
@ -2571,7 +2571,7 @@ nvme_fc_map_data(struct nvme_fc_ctrl *ctrl, struct request *rq,
|
|||
if (ret)
|
||||
return -ENOMEM;
|
||||
|
||||
op->nents = blk_rq_map_sg(rq->q, rq, freq->sg_table.sgl);
|
||||
op->nents = blk_rq_map_sg(rq, freq->sg_table.sgl);
|
||||
WARN_ON(op->nents > blk_rq_nr_phys_segments(rq));
|
||||
freq->sg_cnt = fc_dma_map_sg(ctrl->lport->dev, freq->sg_table.sgl,
|
||||
op->nents, rq_dma_dir(rq));
|
||||
|
@ -2858,7 +2858,7 @@ nvme_fc_create_io_queues(struct nvme_fc_ctrl *ctrl)
|
|||
unsigned int nr_io_queues;
|
||||
int ret;
|
||||
|
||||
nr_io_queues = min(min(opts->nr_io_queues, num_online_cpus()),
|
||||
nr_io_queues = min3(opts->nr_io_queues, num_online_cpus(),
|
||||
ctrl->lport->ops->max_hw_queues);
|
||||
ret = nvme_set_queue_count(&ctrl->ctrl, &nr_io_queues);
|
||||
if (ret) {
|
||||
|
@ -2912,7 +2912,7 @@ nvme_fc_recreate_io_queues(struct nvme_fc_ctrl *ctrl)
|
|||
unsigned int nr_io_queues;
|
||||
int ret;
|
||||
|
||||
nr_io_queues = min(min(opts->nr_io_queues, num_online_cpus()),
|
||||
nr_io_queues = min3(opts->nr_io_queues, num_online_cpus(),
|
||||
ctrl->lport->ops->max_hw_queues);
|
||||
ret = nvme_set_queue_count(&ctrl->ctrl, &nr_io_queues);
|
||||
if (ret) {
|
||||
|
|
|
@ -686,6 +686,8 @@ static void nvme_mpath_set_live(struct nvme_ns *ns)
|
|||
kblockd_schedule_work(&head->partition_scan_work);
|
||||
}
|
||||
|
||||
nvme_mpath_add_sysfs_link(ns->head);
|
||||
|
||||
mutex_lock(&head->lock);
|
||||
if (nvme_path_is_optimized(ns)) {
|
||||
int node, srcu_idx;
|
||||
|
@ -768,6 +770,25 @@ static void nvme_update_ns_ana_state(struct nvme_ana_group_desc *desc,
|
|||
if (nvme_state_is_live(ns->ana_state) &&
|
||||
nvme_ctrl_state(ns->ctrl) == NVME_CTRL_LIVE)
|
||||
nvme_mpath_set_live(ns);
|
||||
else {
|
||||
/*
|
||||
* Add sysfs link from multipath head gendisk node to path
|
||||
* device gendisk node.
|
||||
* If path's ana state is live (i.e. state is either optimized
|
||||
* or non-optimized) while we alloc the ns then sysfs link would
|
||||
* be created from nvme_mpath_set_live(). In that case we would
|
||||
* not fallthrough this code path. However for the path's ana
|
||||
* state other than live, we call nvme_mpath_set_live() only
|
||||
* after ana state transitioned to the live state. But we still
|
||||
* want to create the sysfs link from head node to a path device
|
||||
* irrespctive of the path's ana state.
|
||||
* If we reach through here then it means that path's ana state
|
||||
* is not live but still create the sysfs link to this path from
|
||||
* head node if head node of the path has already come alive.
|
||||
*/
|
||||
if (test_bit(NVME_NSHEAD_DISK_LIVE, &ns->head->flags))
|
||||
nvme_mpath_add_sysfs_link(ns->head);
|
||||
}
|
||||
}
|
||||
|
||||
static int nvme_update_ana_state(struct nvme_ctrl *ctrl,
|
||||
|
@ -955,6 +976,45 @@ static ssize_t ana_state_show(struct device *dev, struct device_attribute *attr,
|
|||
}
|
||||
DEVICE_ATTR_RO(ana_state);
|
||||
|
||||
static ssize_t queue_depth_show(struct device *dev,
|
||||
struct device_attribute *attr, char *buf)
|
||||
{
|
||||
struct nvme_ns *ns = nvme_get_ns_from_dev(dev);
|
||||
|
||||
if (ns->head->subsys->iopolicy != NVME_IOPOLICY_QD)
|
||||
return 0;
|
||||
|
||||
return sysfs_emit(buf, "%d\n", atomic_read(&ns->ctrl->nr_active));
|
||||
}
|
||||
DEVICE_ATTR_RO(queue_depth);
|
||||
|
||||
static ssize_t numa_nodes_show(struct device *dev, struct device_attribute *attr,
|
||||
char *buf)
|
||||
{
|
||||
int node, srcu_idx;
|
||||
nodemask_t numa_nodes;
|
||||
struct nvme_ns *current_ns;
|
||||
struct nvme_ns *ns = nvme_get_ns_from_dev(dev);
|
||||
struct nvme_ns_head *head = ns->head;
|
||||
|
||||
if (head->subsys->iopolicy != NVME_IOPOLICY_NUMA)
|
||||
return 0;
|
||||
|
||||
nodes_clear(numa_nodes);
|
||||
|
||||
srcu_idx = srcu_read_lock(&head->srcu);
|
||||
for_each_node(node) {
|
||||
current_ns = srcu_dereference(head->current_path[node],
|
||||
&head->srcu);
|
||||
if (ns == current_ns)
|
||||
node_set(node, numa_nodes);
|
||||
}
|
||||
srcu_read_unlock(&head->srcu, srcu_idx);
|
||||
|
||||
return sysfs_emit(buf, "%*pbl\n", nodemask_pr_args(&numa_nodes));
|
||||
}
|
||||
DEVICE_ATTR_RO(numa_nodes);
|
||||
|
||||
static int nvme_lookup_ana_group_desc(struct nvme_ctrl *ctrl,
|
||||
struct nvme_ana_group_desc *desc, void *data)
|
||||
{
|
||||
|
@ -967,6 +1027,84 @@ static int nvme_lookup_ana_group_desc(struct nvme_ctrl *ctrl,
|
|||
return -ENXIO; /* just break out of the loop */
|
||||
}
|
||||
|
||||
void nvme_mpath_add_sysfs_link(struct nvme_ns_head *head)
|
||||
{
|
||||
struct device *target;
|
||||
int rc, srcu_idx;
|
||||
struct nvme_ns *ns;
|
||||
struct kobject *kobj;
|
||||
|
||||
/*
|
||||
* Ensure head disk node is already added otherwise we may get invalid
|
||||
* kobj for head disk node
|
||||
*/
|
||||
if (!test_bit(GD_ADDED, &head->disk->state))
|
||||
return;
|
||||
|
||||
kobj = &disk_to_dev(head->disk)->kobj;
|
||||
|
||||
/*
|
||||
* loop through each ns chained through the head->list and create the
|
||||
* sysfs link from head node to the ns path node
|
||||
*/
|
||||
srcu_idx = srcu_read_lock(&head->srcu);
|
||||
|
||||
list_for_each_entry_rcu(ns, &head->list, siblings) {
|
||||
/*
|
||||
* Avoid creating link if it already exists for the given path.
|
||||
* When path ana state transitions from optimized to non-
|
||||
* optimized or vice-versa, the nvme_mpath_set_live() is
|
||||
* invoked which in truns call this function. Now if the sysfs
|
||||
* link already exists for the given path and we attempt to re-
|
||||
* create the link then sysfs code would warn about it loudly.
|
||||
* So we evaluate NVME_NS_SYSFS_ATTR_LINK flag here to ensure
|
||||
* that we're not creating duplicate link.
|
||||
* The test_and_set_bit() is used because it is protecting
|
||||
* against multiple nvme paths being simultaneously added.
|
||||
*/
|
||||
if (test_and_set_bit(NVME_NS_SYSFS_ATTR_LINK, &ns->flags))
|
||||
continue;
|
||||
|
||||
/*
|
||||
* Ensure that ns path disk node is already added otherwise we
|
||||
* may get invalid kobj name for target
|
||||
*/
|
||||
if (!test_bit(GD_ADDED, &ns->disk->state))
|
||||
continue;
|
||||
|
||||
target = disk_to_dev(ns->disk);
|
||||
/*
|
||||
* Create sysfs link from head gendisk kobject @kobj to the
|
||||
* ns path gendisk kobject @target->kobj.
|
||||
*/
|
||||
rc = sysfs_add_link_to_group(kobj, nvme_ns_mpath_attr_group.name,
|
||||
&target->kobj, dev_name(target));
|
||||
if (unlikely(rc)) {
|
||||
dev_err(disk_to_dev(ns->head->disk),
|
||||
"failed to create link to %s\n",
|
||||
dev_name(target));
|
||||
clear_bit(NVME_NS_SYSFS_ATTR_LINK, &ns->flags);
|
||||
}
|
||||
}
|
||||
|
||||
srcu_read_unlock(&head->srcu, srcu_idx);
|
||||
}
|
||||
|
||||
void nvme_mpath_remove_sysfs_link(struct nvme_ns *ns)
|
||||
{
|
||||
struct device *target;
|
||||
struct kobject *kobj;
|
||||
|
||||
if (!test_bit(NVME_NS_SYSFS_ATTR_LINK, &ns->flags))
|
||||
return;
|
||||
|
||||
target = disk_to_dev(ns->disk);
|
||||
kobj = &disk_to_dev(ns->head->disk)->kobj;
|
||||
sysfs_remove_link_from_group(kobj, nvme_ns_mpath_attr_group.name,
|
||||
dev_name(target));
|
||||
clear_bit(NVME_NS_SYSFS_ATTR_LINK, &ns->flags);
|
||||
}
|
||||
|
||||
void nvme_mpath_add_disk(struct nvme_ns *ns, __le32 anagrpid)
|
||||
{
|
||||
if (nvme_ctrl_use_ana(ns->ctrl)) {
|
||||
|
|
|
@ -538,6 +538,7 @@ struct nvme_ns {
|
|||
#define NVME_NS_ANA_PENDING 2
|
||||
#define NVME_NS_FORCE_RO 3
|
||||
#define NVME_NS_READY 4
|
||||
#define NVME_NS_SYSFS_ATTR_LINK 5
|
||||
|
||||
struct cdev cdev;
|
||||
struct device cdev_device;
|
||||
|
@ -933,6 +934,7 @@ int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo);
|
|||
int nvme_dev_uring_cmd(struct io_uring_cmd *ioucmd, unsigned int issue_flags);
|
||||
|
||||
extern const struct attribute_group *nvme_ns_attr_groups[];
|
||||
extern const struct attribute_group nvme_ns_mpath_attr_group;
|
||||
extern const struct pr_ops nvme_pr_ops;
|
||||
extern const struct block_device_operations nvme_ns_head_ops;
|
||||
extern const struct attribute_group nvme_dev_attrs_group;
|
||||
|
@ -955,6 +957,8 @@ void nvme_mpath_default_iopolicy(struct nvme_subsystem *subsys);
|
|||
void nvme_failover_req(struct request *req);
|
||||
void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl);
|
||||
int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl,struct nvme_ns_head *head);
|
||||
void nvme_mpath_add_sysfs_link(struct nvme_ns_head *ns);
|
||||
void nvme_mpath_remove_sysfs_link(struct nvme_ns *ns);
|
||||
void nvme_mpath_add_disk(struct nvme_ns *ns, __le32 anagrpid);
|
||||
void nvme_mpath_remove_disk(struct nvme_ns_head *head);
|
||||
int nvme_mpath_init_identify(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id);
|
||||
|
@ -980,6 +984,8 @@ static inline void nvme_trace_bio_complete(struct request *req)
|
|||
extern bool multipath;
|
||||
extern struct device_attribute dev_attr_ana_grpid;
|
||||
extern struct device_attribute dev_attr_ana_state;
|
||||
extern struct device_attribute dev_attr_queue_depth;
|
||||
extern struct device_attribute dev_attr_numa_nodes;
|
||||
extern struct device_attribute subsys_attr_iopolicy;
|
||||
|
||||
static inline bool nvme_disk_is_ns_head(struct gendisk *disk)
|
||||
|
@ -1009,6 +1015,12 @@ static inline void nvme_mpath_add_disk(struct nvme_ns *ns, __le32 anagrpid)
|
|||
static inline void nvme_mpath_remove_disk(struct nvme_ns_head *head)
|
||||
{
|
||||
}
|
||||
static inline void nvme_mpath_add_sysfs_link(struct nvme_ns *ns)
|
||||
{
|
||||
}
|
||||
static inline void nvme_mpath_remove_sysfs_link(struct nvme_ns *ns)
|
||||
{
|
||||
}
|
||||
static inline bool nvme_mpath_clear_current_path(struct nvme_ns *ns)
|
||||
{
|
||||
return false;
|
||||
|
@ -1147,6 +1159,7 @@ void nvme_auth_stop(struct nvme_ctrl *ctrl);
|
|||
int nvme_auth_negotiate(struct nvme_ctrl *ctrl, int qid);
|
||||
int nvme_auth_wait(struct nvme_ctrl *ctrl, int qid);
|
||||
void nvme_auth_free(struct nvme_ctrl *ctrl);
|
||||
void nvme_auth_revoke_tls_key(struct nvme_ctrl *ctrl);
|
||||
#else
|
||||
static inline int nvme_auth_init_ctrl(struct nvme_ctrl *ctrl)
|
||||
{
|
||||
|
@ -1169,6 +1182,7 @@ static inline int nvme_auth_wait(struct nvme_ctrl *ctrl, int qid)
|
|||
return -EPROTONOSUPPORT;
|
||||
}
|
||||
static inline void nvme_auth_free(struct nvme_ctrl *ctrl) {};
|
||||
static inline void nvme_auth_revoke_tls_key(struct nvme_ctrl *ctrl) {};
|
||||
#endif
|
||||
|
||||
u32 nvme_command_effects(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
|
||||
|
|
|
@ -812,7 +812,7 @@ static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req,
|
|||
if (!iod->sgt.sgl)
|
||||
return BLK_STS_RESOURCE;
|
||||
sg_init_table(iod->sgt.sgl, blk_rq_nr_phys_segments(req));
|
||||
iod->sgt.orig_nents = blk_rq_map_sg(req->q, req, iod->sgt.sgl);
|
||||
iod->sgt.orig_nents = blk_rq_map_sg(req, iod->sgt.sgl);
|
||||
if (!iod->sgt.orig_nents)
|
||||
goto out_free_sg;
|
||||
|
||||
|
@ -953,9 +953,6 @@ out_free_cmd:
|
|||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* NOTE: ns is NULL when called on the admin queue.
|
||||
*/
|
||||
static blk_status_t nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
|
||||
const struct blk_mq_queue_data *bd)
|
||||
{
|
||||
|
|
|
@ -1476,8 +1476,7 @@ static int nvme_rdma_dma_map_req(struct ib_device *ibdev, struct request *rq,
|
|||
if (ret)
|
||||
return -ENOMEM;
|
||||
|
||||
req->data_sgl.nents = blk_rq_map_sg(rq->q, rq,
|
||||
req->data_sgl.sg_table.sgl);
|
||||
req->data_sgl.nents = blk_rq_map_sg(rq, req->data_sgl.sg_table.sgl);
|
||||
|
||||
*count = ib_dma_map_sg(ibdev, req->data_sgl.sg_table.sgl,
|
||||
req->data_sgl.nents, rq_dma_dir(rq));
|
||||
|
|
|
@ -258,6 +258,8 @@ static struct attribute *nvme_ns_attrs[] = {
|
|||
#ifdef CONFIG_NVME_MULTIPATH
|
||||
&dev_attr_ana_grpid.attr,
|
||||
&dev_attr_ana_state.attr,
|
||||
&dev_attr_queue_depth.attr,
|
||||
&dev_attr_numa_nodes.attr,
|
||||
#endif
|
||||
&dev_attr_io_passthru_err_log_enabled.attr,
|
||||
NULL,
|
||||
|
@ -290,6 +292,10 @@ static umode_t nvme_ns_attrs_are_visible(struct kobject *kobj,
|
|||
if (!nvme_ctrl_use_ana(nvme_get_ns_from_dev(dev)->ctrl))
|
||||
return 0;
|
||||
}
|
||||
if (a == &dev_attr_queue_depth.attr || a == &dev_attr_numa_nodes.attr) {
|
||||
if (nvme_disk_is_ns_head(dev_to_disk(dev)))
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
return a->mode;
|
||||
}
|
||||
|
@ -299,8 +305,22 @@ static const struct attribute_group nvme_ns_attr_group = {
|
|||
.is_visible = nvme_ns_attrs_are_visible,
|
||||
};
|
||||
|
||||
#ifdef CONFIG_NVME_MULTIPATH
|
||||
static struct attribute *nvme_ns_mpath_attrs[] = {
|
||||
NULL,
|
||||
};
|
||||
|
||||
const struct attribute_group nvme_ns_mpath_attr_group = {
|
||||
.name = "multipath",
|
||||
.attrs = nvme_ns_mpath_attrs,
|
||||
};
|
||||
#endif
|
||||
|
||||
const struct attribute_group *nvme_ns_attr_groups[] = {
|
||||
&nvme_ns_attr_group,
|
||||
#ifdef CONFIG_NVME_MULTIPATH
|
||||
&nvme_ns_mpath_attr_group,
|
||||
#endif
|
||||
NULL,
|
||||
};
|
||||
|
||||
|
@ -780,10 +800,10 @@ static umode_t nvme_tls_attrs_are_visible(struct kobject *kobj,
|
|||
return 0;
|
||||
|
||||
if (a == &dev_attr_tls_key.attr &&
|
||||
!ctrl->opts->tls)
|
||||
!ctrl->opts->tls && !ctrl->opts->concat)
|
||||
return 0;
|
||||
if (a == &dev_attr_tls_configured_key.attr &&
|
||||
!ctrl->opts->tls_key)
|
||||
(!ctrl->opts->tls_key || ctrl->opts->concat))
|
||||
return 0;
|
||||
if (a == &dev_attr_tls_keyring.attr &&
|
||||
!ctrl->opts->keyring)
|
||||
|
|
|
@ -8,7 +8,6 @@
|
|||
#include <linux/init.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/err.h>
|
||||
#include <linux/key.h>
|
||||
#include <linux/nvme-tcp.h>
|
||||
#include <linux/nvme-keyring.h>
|
||||
#include <net/sock.h>
|
||||
|
@ -249,7 +248,7 @@ static inline bool nvme_tcp_tls_configured(struct nvme_ctrl *ctrl)
|
|||
if (!IS_ENABLED(CONFIG_NVME_TCP_TLS))
|
||||
return 0;
|
||||
|
||||
return ctrl->opts->tls;
|
||||
return ctrl->opts->tls || ctrl->opts->concat;
|
||||
}
|
||||
|
||||
static inline struct blk_mq_tags *nvme_tcp_tagset(struct nvme_tcp_queue *queue)
|
||||
|
@ -1790,7 +1789,8 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl, int qid,
|
|||
queue->cmnd_capsule_len = sizeof(struct nvme_command) +
|
||||
NVME_TCP_ADMIN_CCSZ;
|
||||
|
||||
ret = sock_create(ctrl->addr.ss_family, SOCK_STREAM,
|
||||
ret = sock_create_kern(current->nsproxy->net_ns,
|
||||
ctrl->addr.ss_family, SOCK_STREAM,
|
||||
IPPROTO_TCP, &queue->sock);
|
||||
if (ret) {
|
||||
dev_err(nctrl->device,
|
||||
|
@ -2060,7 +2060,7 @@ static int nvme_tcp_alloc_admin_queue(struct nvme_ctrl *ctrl)
|
|||
if (nvme_tcp_tls_configured(ctrl)) {
|
||||
if (ctrl->opts->tls_key)
|
||||
pskid = key_serial(ctrl->opts->tls_key);
|
||||
else {
|
||||
else if (ctrl->opts->tls) {
|
||||
pskid = nvme_tls_psk_default(ctrl->opts->keyring,
|
||||
ctrl->opts->host->nqn,
|
||||
ctrl->opts->subsysnqn);
|
||||
|
@ -2090,10 +2090,26 @@ static int __nvme_tcp_alloc_io_queues(struct nvme_ctrl *ctrl)
|
|||
{
|
||||
int i, ret;
|
||||
|
||||
if (nvme_tcp_tls_configured(ctrl) && !ctrl->tls_pskid) {
|
||||
if (nvme_tcp_tls_configured(ctrl)) {
|
||||
if (ctrl->opts->concat) {
|
||||
/*
|
||||
* The generated PSK is stored in the
|
||||
* fabric options
|
||||
*/
|
||||
if (!ctrl->opts->tls_key) {
|
||||
dev_err(ctrl->device, "no PSK generated\n");
|
||||
return -ENOKEY;
|
||||
}
|
||||
if (ctrl->tls_pskid &&
|
||||
ctrl->tls_pskid != key_serial(ctrl->opts->tls_key)) {
|
||||
dev_err(ctrl->device, "Stale PSK id %08x\n", ctrl->tls_pskid);
|
||||
ctrl->tls_pskid = 0;
|
||||
}
|
||||
} else if (!ctrl->tls_pskid) {
|
||||
dev_err(ctrl->device, "no PSK negotiated\n");
|
||||
return -ENOKEY;
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 1; i < ctrl->queue_count; i++) {
|
||||
ret = nvme_tcp_alloc_queue(ctrl, i,
|
||||
|
@ -2310,6 +2326,27 @@ static void nvme_tcp_reconnect_or_remove(struct nvme_ctrl *ctrl,
|
|||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* The TLS key is set by secure concatenation after negotiation has been
|
||||
* completed on the admin queue. We need to revoke the key when:
|
||||
* - concatenation is enabled (otherwise it's a static key set by the user)
|
||||
* and
|
||||
* - the generated key is present in ctrl->tls_key (otherwise there's nothing
|
||||
* to revoke)
|
||||
* and
|
||||
* - a valid PSK key ID has been set in ctrl->tls_pskid (otherwise TLS
|
||||
* negotiation has not run).
|
||||
*
|
||||
* We cannot always revoke the key as nvme_tcp_alloc_admin_queue() is called
|
||||
* twice during secure concatenation, once on a 'normal' connection to run the
|
||||
* DH-HMAC-CHAP negotiation (which generates the key, so it _must not_ be set),
|
||||
* and once after the negotiation (which uses the key, so it _must_ be set).
|
||||
*/
|
||||
static bool nvme_tcp_key_revoke_needed(struct nvme_ctrl *ctrl)
|
||||
{
|
||||
return ctrl->opts->concat && ctrl->opts->tls_key && ctrl->tls_pskid;
|
||||
}
|
||||
|
||||
static int nvme_tcp_setup_ctrl(struct nvme_ctrl *ctrl, bool new)
|
||||
{
|
||||
struct nvmf_ctrl_options *opts = ctrl->opts;
|
||||
|
@ -2319,6 +2356,16 @@ static int nvme_tcp_setup_ctrl(struct nvme_ctrl *ctrl, bool new)
|
|||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (ctrl->opts && ctrl->opts->concat && !ctrl->tls_pskid) {
|
||||
/* See comments for nvme_tcp_key_revoke_needed() */
|
||||
dev_dbg(ctrl->device, "restart admin queue for secure concatenation\n");
|
||||
nvme_stop_keep_alive(ctrl);
|
||||
nvme_tcp_teardown_admin_queue(ctrl, false);
|
||||
ret = nvme_tcp_configure_admin_queue(ctrl, false);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (ctrl->icdoff) {
|
||||
ret = -EOPNOTSUPP;
|
||||
dev_err(ctrl->device, "icdoff is not supported!\n");
|
||||
|
@ -2415,6 +2462,8 @@ static void nvme_tcp_error_recovery_work(struct work_struct *work)
|
|||
struct nvme_tcp_ctrl, err_work);
|
||||
struct nvme_ctrl *ctrl = &tcp_ctrl->ctrl;
|
||||
|
||||
if (nvme_tcp_key_revoke_needed(ctrl))
|
||||
nvme_auth_revoke_tls_key(ctrl);
|
||||
nvme_stop_keep_alive(ctrl);
|
||||
flush_work(&ctrl->async_event_work);
|
||||
nvme_tcp_teardown_io_queues(ctrl, false);
|
||||
|
@ -2455,6 +2504,8 @@ static void nvme_reset_ctrl_work(struct work_struct *work)
|
|||
container_of(work, struct nvme_ctrl, reset_work);
|
||||
int ret;
|
||||
|
||||
if (nvme_tcp_key_revoke_needed(ctrl))
|
||||
nvme_auth_revoke_tls_key(ctrl);
|
||||
nvme_stop_ctrl(ctrl);
|
||||
nvme_tcp_teardown_ctrl(ctrl, false);
|
||||
|
||||
|
@ -2951,7 +3002,7 @@ static struct nvmf_transport_ops nvme_tcp_transport = {
|
|||
NVMF_OPT_HDR_DIGEST | NVMF_OPT_DATA_DIGEST |
|
||||
NVMF_OPT_NR_WRITE_QUEUES | NVMF_OPT_NR_POLL_QUEUES |
|
||||
NVMF_OPT_TOS | NVMF_OPT_HOST_IFACE | NVMF_OPT_TLS |
|
||||
NVMF_OPT_KEYRING | NVMF_OPT_TLS_KEY,
|
||||
NVMF_OPT_KEYRING | NVMF_OPT_TLS_KEY | NVMF_OPT_CONCAT,
|
||||
.create_ctrl = nvme_tcp_create_ctrl,
|
||||
};
|
||||
|
||||
|
|
|
@ -146,17 +146,16 @@ static void *nvme_zns_alloc_report_buffer(struct nvme_ns *ns,
|
|||
return NULL;
|
||||
}
|
||||
|
||||
static int nvme_zone_parse_entry(struct nvme_ctrl *ctrl,
|
||||
struct nvme_ns_head *head,
|
||||
static int nvme_zone_parse_entry(struct nvme_ns *ns,
|
||||
struct nvme_zone_descriptor *entry,
|
||||
unsigned int idx, report_zones_cb cb,
|
||||
void *data)
|
||||
{
|
||||
struct nvme_ns_head *head = ns->head;
|
||||
struct blk_zone zone = { };
|
||||
|
||||
if ((entry->zt & 0xf) != NVME_ZONE_TYPE_SEQWRITE_REQ) {
|
||||
dev_err(ctrl->device, "invalid zone type %#x\n",
|
||||
entry->zt);
|
||||
dev_err(ns->ctrl->device, "invalid zone type %#x\n", entry->zt);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
|
@ -213,8 +212,7 @@ int nvme_ns_report_zones(struct nvme_ns *ns, sector_t sector,
|
|||
break;
|
||||
|
||||
for (i = 0; i < nz && zone_idx < nr_zones; i++) {
|
||||
ret = nvme_zone_parse_entry(ns->ctrl, ns->head,
|
||||
&report->entries[i],
|
||||
ret = nvme_zone_parse_entry(ns, &report->entries[i],
|
||||
zone_idx, cb, data);
|
||||
if (ret)
|
||||
goto out_free;
|
||||
|
|
|
@ -15,6 +15,7 @@
|
|||
#include <linux/ctype.h>
|
||||
#include <linux/random.h>
|
||||
#include <linux/nvme-auth.h>
|
||||
#include <linux/nvme-keyring.h>
|
||||
#include <linux/unaligned.h>
|
||||
|
||||
#include "nvmet.h"
|
||||
|
@ -139,7 +140,7 @@ int nvmet_setup_dhgroup(struct nvmet_ctrl *ctrl, u8 dhgroup_id)
|
|||
return ret;
|
||||
}
|
||||
|
||||
u8 nvmet_setup_auth(struct nvmet_ctrl *ctrl)
|
||||
u8 nvmet_setup_auth(struct nvmet_ctrl *ctrl, struct nvmet_sq *sq)
|
||||
{
|
||||
int ret = 0;
|
||||
struct nvmet_host_link *p;
|
||||
|
@ -165,6 +166,11 @@ u8 nvmet_setup_auth(struct nvmet_ctrl *ctrl)
|
|||
goto out_unlock;
|
||||
}
|
||||
|
||||
if (nvmet_queue_tls_keyid(sq)) {
|
||||
pr_debug("host %s tls enabled\n", ctrl->hostnqn);
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
ret = nvmet_setup_dhgroup(ctrl, host->dhchap_dhgroup_id);
|
||||
if (ret < 0) {
|
||||
pr_warn("Failed to setup DH group");
|
||||
|
@ -233,6 +239,9 @@ out_unlock:
|
|||
void nvmet_auth_sq_free(struct nvmet_sq *sq)
|
||||
{
|
||||
cancel_delayed_work(&sq->auth_expired_work);
|
||||
#ifdef CONFIG_NVME_TARGET_TCP_TLS
|
||||
sq->tls_key = 0;
|
||||
#endif
|
||||
kfree(sq->dhchap_c1);
|
||||
sq->dhchap_c1 = NULL;
|
||||
kfree(sq->dhchap_c2);
|
||||
|
@ -261,6 +270,12 @@ void nvmet_destroy_auth(struct nvmet_ctrl *ctrl)
|
|||
nvme_auth_free_key(ctrl->ctrl_key);
|
||||
ctrl->ctrl_key = NULL;
|
||||
}
|
||||
#ifdef CONFIG_NVME_TARGET_TCP_TLS
|
||||
if (ctrl->tls_key) {
|
||||
key_put(ctrl->tls_key);
|
||||
ctrl->tls_key = NULL;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
bool nvmet_check_auth_status(struct nvmet_req *req)
|
||||
|
@ -542,3 +557,58 @@ int nvmet_auth_ctrl_sesskey(struct nvmet_req *req,
|
|||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void nvmet_auth_insert_psk(struct nvmet_sq *sq)
|
||||
{
|
||||
int hash_len = nvme_auth_hmac_hash_len(sq->ctrl->shash_id);
|
||||
u8 *psk, *digest, *tls_psk;
|
||||
size_t psk_len;
|
||||
int ret;
|
||||
#ifdef CONFIG_NVME_TARGET_TCP_TLS
|
||||
struct key *tls_key = NULL;
|
||||
#endif
|
||||
|
||||
ret = nvme_auth_generate_psk(sq->ctrl->shash_id,
|
||||
sq->dhchap_skey,
|
||||
sq->dhchap_skey_len,
|
||||
sq->dhchap_c1, sq->dhchap_c2,
|
||||
hash_len, &psk, &psk_len);
|
||||
if (ret) {
|
||||
pr_warn("%s: ctrl %d qid %d failed to generate PSK, error %d\n",
|
||||
__func__, sq->ctrl->cntlid, sq->qid, ret);
|
||||
return;
|
||||
}
|
||||
ret = nvme_auth_generate_digest(sq->ctrl->shash_id, psk, psk_len,
|
||||
sq->ctrl->subsysnqn,
|
||||
sq->ctrl->hostnqn, &digest);
|
||||
if (ret) {
|
||||
pr_warn("%s: ctrl %d qid %d failed to generate digest, error %d\n",
|
||||
__func__, sq->ctrl->cntlid, sq->qid, ret);
|
||||
goto out_free_psk;
|
||||
}
|
||||
ret = nvme_auth_derive_tls_psk(sq->ctrl->shash_id, psk, psk_len,
|
||||
digest, &tls_psk);
|
||||
if (ret) {
|
||||
pr_warn("%s: ctrl %d qid %d failed to derive TLS PSK, error %d\n",
|
||||
__func__, sq->ctrl->cntlid, sq->qid, ret);
|
||||
goto out_free_digest;
|
||||
}
|
||||
#ifdef CONFIG_NVME_TARGET_TCP_TLS
|
||||
tls_key = nvme_tls_psk_refresh(NULL, sq->ctrl->hostnqn, sq->ctrl->subsysnqn,
|
||||
sq->ctrl->shash_id, tls_psk, psk_len, digest);
|
||||
if (IS_ERR(tls_key)) {
|
||||
pr_warn("%s: ctrl %d qid %d failed to refresh key, error %ld\n",
|
||||
__func__, sq->ctrl->cntlid, sq->qid, PTR_ERR(tls_key));
|
||||
tls_key = NULL;
|
||||
kfree_sensitive(tls_psk);
|
||||
}
|
||||
if (sq->ctrl->tls_key)
|
||||
key_put(sq->ctrl->tls_key);
|
||||
sq->ctrl->tls_key = tls_key;
|
||||
#endif
|
||||
|
||||
out_free_digest:
|
||||
kfree_sensitive(digest);
|
||||
out_free_psk:
|
||||
kfree_sensitive(psk);
|
||||
}
|
||||
|
|
|
@ -1618,8 +1618,6 @@ struct nvmet_ctrl *nvmet_alloc_ctrl(struct nvmet_alloc_ctrl_args *args)
|
|||
}
|
||||
ctrl->cntlid = ret;
|
||||
|
||||
uuid_copy(&ctrl->hostid, args->hostid);
|
||||
|
||||
/*
|
||||
* Discovery controllers may use some arbitrary high value
|
||||
* in order to cleanup stale discovery sessions
|
||||
|
@ -1647,7 +1645,7 @@ struct nvmet_ctrl *nvmet_alloc_ctrl(struct nvmet_alloc_ctrl_args *args)
|
|||
if (args->hostid)
|
||||
uuid_copy(&ctrl->hostid, args->hostid);
|
||||
|
||||
dhchap_status = nvmet_setup_auth(ctrl);
|
||||
dhchap_status = nvmet_setup_auth(ctrl, args->sq);
|
||||
if (dhchap_status) {
|
||||
pr_err("Failed to setup authentication, dhchap status %u\n",
|
||||
dhchap_status);
|
||||
|
@ -1662,11 +1660,12 @@ struct nvmet_ctrl *nvmet_alloc_ctrl(struct nvmet_alloc_ctrl_args *args)
|
|||
|
||||
args->status = NVME_SC_SUCCESS;
|
||||
|
||||
pr_info("Created %s controller %d for subsystem %s for NQN %s%s%s.\n",
|
||||
pr_info("Created %s controller %d for subsystem %s for NQN %s%s%s%s.\n",
|
||||
nvmet_is_disc_subsys(ctrl->subsys) ? "discovery" : "nvm",
|
||||
ctrl->cntlid, ctrl->subsys->subsysnqn, ctrl->hostnqn,
|
||||
ctrl->pi_support ? " T10-PI is enabled" : "",
|
||||
nvmet_has_auth(ctrl) ? " with DH-HMAC-CHAP" : "");
|
||||
nvmet_has_auth(ctrl, args->sq) ? " with DH-HMAC-CHAP" : "",
|
||||
nvmet_queue_tls_keyid(args->sq) ? ", TLS" : "");
|
||||
|
||||
return ctrl;
|
||||
|
||||
|
|
|
@ -132,6 +132,27 @@ static int nvmet_ctrl_host_traddr_show(struct seq_file *m, void *p)
|
|||
}
|
||||
NVMET_DEBUGFS_ATTR(nvmet_ctrl_host_traddr);
|
||||
|
||||
#ifdef CONFIG_NVME_TARGET_TCP_TLS
|
||||
static int nvmet_ctrl_tls_key_show(struct seq_file *m, void *p)
|
||||
{
|
||||
struct nvmet_ctrl *ctrl = m->private;
|
||||
key_serial_t keyid = nvmet_queue_tls_keyid(ctrl->sqs[0]);
|
||||
|
||||
seq_printf(m, "%08x\n", keyid);
|
||||
return 0;
|
||||
}
|
||||
NVMET_DEBUGFS_ATTR(nvmet_ctrl_tls_key);
|
||||
|
||||
static int nvmet_ctrl_tls_concat_show(struct seq_file *m, void *p)
|
||||
{
|
||||
struct nvmet_ctrl *ctrl = m->private;
|
||||
|
||||
seq_printf(m, "%d\n", ctrl->concat);
|
||||
return 0;
|
||||
}
|
||||
NVMET_DEBUGFS_ATTR(nvmet_ctrl_tls_concat);
|
||||
#endif
|
||||
|
||||
int nvmet_debugfs_ctrl_setup(struct nvmet_ctrl *ctrl)
|
||||
{
|
||||
char name[32];
|
||||
|
@ -157,6 +178,12 @@ int nvmet_debugfs_ctrl_setup(struct nvmet_ctrl *ctrl)
|
|||
&nvmet_ctrl_state_fops);
|
||||
debugfs_create_file("host_traddr", S_IRUSR, ctrl->debugfs_dir, ctrl,
|
||||
&nvmet_ctrl_host_traddr_fops);
|
||||
#ifdef CONFIG_NVME_TARGET_TCP_TLS
|
||||
debugfs_create_file("tls_concat", S_IRUSR, ctrl->debugfs_dir, ctrl,
|
||||
&nvmet_ctrl_tls_concat_fops);
|
||||
debugfs_create_file("tls_key", S_IRUSR, ctrl->debugfs_dir, ctrl,
|
||||
&nvmet_ctrl_tls_key_fops);
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
|
@ -43,8 +43,26 @@ static u8 nvmet_auth_negotiate(struct nvmet_req *req, void *d)
|
|||
data->auth_protocol[0].dhchap.halen,
|
||||
data->auth_protocol[0].dhchap.dhlen);
|
||||
req->sq->dhchap_tid = le16_to_cpu(data->t_id);
|
||||
if (data->sc_c)
|
||||
if (data->sc_c != NVME_AUTH_SECP_NOSC) {
|
||||
if (!IS_ENABLED(CONFIG_NVME_TARGET_TCP_TLS))
|
||||
return NVME_AUTH_DHCHAP_FAILURE_CONCAT_MISMATCH;
|
||||
/* Secure concatenation can only be enabled on the admin queue */
|
||||
if (req->sq->qid)
|
||||
return NVME_AUTH_DHCHAP_FAILURE_CONCAT_MISMATCH;
|
||||
switch (data->sc_c) {
|
||||
case NVME_AUTH_SECP_NEWTLSPSK:
|
||||
if (nvmet_queue_tls_keyid(req->sq))
|
||||
return NVME_AUTH_DHCHAP_FAILURE_CONCAT_MISMATCH;
|
||||
break;
|
||||
case NVME_AUTH_SECP_REPLACETLSPSK:
|
||||
if (!nvmet_queue_tls_keyid(req->sq))
|
||||
return NVME_AUTH_DHCHAP_FAILURE_CONCAT_MISMATCH;
|
||||
break;
|
||||
default:
|
||||
return NVME_AUTH_DHCHAP_FAILURE_CONCAT_MISMATCH;
|
||||
}
|
||||
ctrl->concat = true;
|
||||
}
|
||||
|
||||
if (data->napd != 1)
|
||||
return NVME_AUTH_DHCHAP_FAILURE_HASH_UNUSABLE;
|
||||
|
@ -103,6 +121,12 @@ static u8 nvmet_auth_negotiate(struct nvmet_req *req, void *d)
|
|||
nvme_auth_dhgroup_name(fallback_dhgid));
|
||||
ctrl->dh_gid = fallback_dhgid;
|
||||
}
|
||||
if (ctrl->dh_gid == NVME_AUTH_DHGROUP_NULL && ctrl->concat) {
|
||||
pr_debug("%s: ctrl %d qid %d: NULL DH group invalid "
|
||||
"for secure channel concatenation\n", __func__,
|
||||
ctrl->cntlid, req->sq->qid);
|
||||
return NVME_AUTH_DHCHAP_FAILURE_CONCAT_MISMATCH;
|
||||
}
|
||||
pr_debug("%s: ctrl %d qid %d: selected DH group %s (%d)\n",
|
||||
__func__, ctrl->cntlid, req->sq->qid,
|
||||
nvme_auth_dhgroup_name(ctrl->dh_gid), ctrl->dh_gid);
|
||||
|
@ -148,12 +172,22 @@ static u8 nvmet_auth_reply(struct nvmet_req *req, void *d)
|
|||
if (memcmp(data->rval, response, data->hl)) {
|
||||
pr_info("ctrl %d qid %d host response mismatch\n",
|
||||
ctrl->cntlid, req->sq->qid);
|
||||
pr_debug("ctrl %d qid %d rval %*ph\n",
|
||||
ctrl->cntlid, req->sq->qid, data->hl, data->rval);
|
||||
pr_debug("ctrl %d qid %d response %*ph\n",
|
||||
ctrl->cntlid, req->sq->qid, data->hl, response);
|
||||
kfree(response);
|
||||
return NVME_AUTH_DHCHAP_FAILURE_FAILED;
|
||||
}
|
||||
kfree(response);
|
||||
pr_debug("%s: ctrl %d qid %d host authenticated\n",
|
||||
__func__, ctrl->cntlid, req->sq->qid);
|
||||
if (!data->cvalid && ctrl->concat) {
|
||||
pr_debug("%s: ctrl %d qid %d invalid challenge\n",
|
||||
__func__, ctrl->cntlid, req->sq->qid);
|
||||
return NVME_AUTH_DHCHAP_FAILURE_FAILED;
|
||||
}
|
||||
req->sq->dhchap_s2 = le32_to_cpu(data->seqnum);
|
||||
if (data->cvalid) {
|
||||
req->sq->dhchap_c2 = kmemdup(data->rval + data->hl, data->hl,
|
||||
GFP_KERNEL);
|
||||
|
@ -163,11 +197,23 @@ static u8 nvmet_auth_reply(struct nvmet_req *req, void *d)
|
|||
pr_debug("%s: ctrl %d qid %d challenge %*ph\n",
|
||||
__func__, ctrl->cntlid, req->sq->qid, data->hl,
|
||||
req->sq->dhchap_c2);
|
||||
} else {
|
||||
req->sq->authenticated = true;
|
||||
req->sq->dhchap_c2 = NULL;
|
||||
}
|
||||
req->sq->dhchap_s2 = le32_to_cpu(data->seqnum);
|
||||
/*
|
||||
* NVMe Base Spec 2.2 section 8.3.4.5.4: DH-HMAC-CHAP_Reply message
|
||||
* Sequence Number (SEQNUM): [ .. ]
|
||||
* The value 0h is used to indicate that bidirectional authentication
|
||||
* is not performed, but a challenge value C2 is carried in order to
|
||||
* generate a pre-shared key (PSK) for subsequent establishment of a
|
||||
* secure channel.
|
||||
*/
|
||||
if (req->sq->dhchap_s2 == 0) {
|
||||
if (ctrl->concat)
|
||||
nvmet_auth_insert_psk(req->sq);
|
||||
req->sq->authenticated = true;
|
||||
kfree(req->sq->dhchap_c2);
|
||||
req->sq->dhchap_c2 = NULL;
|
||||
} else if (!data->cvalid)
|
||||
req->sq->authenticated = true;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -246,7 +292,7 @@ void nvmet_execute_auth_send(struct nvmet_req *req)
|
|||
pr_debug("%s: ctrl %d qid %d reset negotiation\n",
|
||||
__func__, ctrl->cntlid, req->sq->qid);
|
||||
if (!req->sq->qid) {
|
||||
dhchap_status = nvmet_setup_auth(ctrl);
|
||||
dhchap_status = nvmet_setup_auth(ctrl, req->sq);
|
||||
if (dhchap_status) {
|
||||
pr_err("ctrl %d qid 0 failed to setup re-authentication\n",
|
||||
ctrl->cntlid);
|
||||
|
@ -303,6 +349,8 @@ void nvmet_execute_auth_send(struct nvmet_req *req)
|
|||
}
|
||||
goto done_kfree;
|
||||
case NVME_AUTH_DHCHAP_MESSAGE_SUCCESS2:
|
||||
if (ctrl->concat)
|
||||
nvmet_auth_insert_psk(req->sq);
|
||||
req->sq->authenticated = true;
|
||||
pr_debug("%s: ctrl %d qid %d ctrl authenticated\n",
|
||||
__func__, ctrl->cntlid, req->sq->qid);
|
||||
|
|
|
@ -234,10 +234,26 @@ err:
|
|||
return ret;
|
||||
}
|
||||
|
||||
static u32 nvmet_connect_result(struct nvmet_ctrl *ctrl)
|
||||
static u32 nvmet_connect_result(struct nvmet_ctrl *ctrl, struct nvmet_sq *sq)
|
||||
{
|
||||
bool needs_auth = nvmet_has_auth(ctrl, sq);
|
||||
key_serial_t keyid = nvmet_queue_tls_keyid(sq);
|
||||
|
||||
/* Do not authenticate I/O queues for secure concatenation */
|
||||
if (ctrl->concat && sq->qid)
|
||||
needs_auth = false;
|
||||
|
||||
if (keyid)
|
||||
pr_debug("%s: ctrl %d qid %d should %sauthenticate, tls psk %08x\n",
|
||||
__func__, ctrl->cntlid, sq->qid,
|
||||
needs_auth ? "" : "not ", keyid);
|
||||
else
|
||||
pr_debug("%s: ctrl %d qid %d should %sauthenticate%s\n",
|
||||
__func__, ctrl->cntlid, sq->qid,
|
||||
needs_auth ? "" : "not ",
|
||||
ctrl->concat ? ", secure concatenation" : "");
|
||||
return (u32)ctrl->cntlid |
|
||||
(nvmet_has_auth(ctrl) ? NVME_CONNECT_AUTHREQ_ATR : 0);
|
||||
(needs_auth ? NVME_CONNECT_AUTHREQ_ATR : 0);
|
||||
}
|
||||
|
||||
static void nvmet_execute_admin_connect(struct nvmet_req *req)
|
||||
|
@ -247,6 +263,7 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req)
|
|||
struct nvmet_ctrl *ctrl = NULL;
|
||||
struct nvmet_alloc_ctrl_args args = {
|
||||
.port = req->port,
|
||||
.sq = req->sq,
|
||||
.ops = req->ops,
|
||||
.p2p_client = req->p2p_client,
|
||||
.kato = le32_to_cpu(c->kato),
|
||||
|
@ -299,7 +316,7 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req)
|
|||
goto out;
|
||||
}
|
||||
|
||||
args.result = cpu_to_le32(nvmet_connect_result(ctrl));
|
||||
args.result = cpu_to_le32(nvmet_connect_result(ctrl, req->sq));
|
||||
out:
|
||||
kfree(d);
|
||||
complete:
|
||||
|
@ -357,7 +374,7 @@ static void nvmet_execute_io_connect(struct nvmet_req *req)
|
|||
goto out_ctrl_put;
|
||||
|
||||
pr_debug("adding queue %d to ctrl %d.\n", qid, ctrl->cntlid);
|
||||
req->cqe->result.u32 = cpu_to_le32(nvmet_connect_result(ctrl));
|
||||
req->cqe->result.u32 = cpu_to_le32(nvmet_connect_result(ctrl, req->sq));
|
||||
out:
|
||||
kfree(d);
|
||||
complete:
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue