mirror of
https://github.com/torvalds/linux.git
synced 2025-08-15 14:11:42 +02:00
Significant patch series in this pull request:
- The 2 patch series "squashfs: Remove page->mapping references" from Matthew Wilcox gets us closer to being able to remove page->mapping. - The 5 patch series "relayfs: misc changes" from Jason Xing does some maintenance and minor feature addition work in relayfs. - The 5 patch series "kdump: crashkernel reservation from CMA" from Jiri Bohac switches us from static preallocation of the kdump crashkernel's working memory over to dynamic allocation. So the difficulty of a-priori estimation of the second kernel's needs is removed and the first kernel obtains extra memory. - The 5 patch series "generalize panic_print's dump function to be used by other kernel parts" from Feng Tang implements some consolidation and rationalizatio of the various ways in which a faiing kernel splats information at the operator. -----BEGIN PGP SIGNATURE----- iHUEABYKAB0WIQTTMBEPP41GrTpTJgfdBJ7gKXxAjgUCaI+82gAKCRDdBJ7gKXxA jj4JAP9xb+w9DrBY6sa+7KTPIb+aTqQ7Zw3o9O2m+riKQJv6jAEA6aEwRnDA0451 fDT5IqVlCWGvnVikdZHSnvhdD7TGsQ0= =rT71 -----END PGP SIGNATURE----- Merge tag 'mm-nonmm-stable-2025-08-03-12-47' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm Pull non-MM updates from Andrew Morton: "Significant patch series in this pull request: - "squashfs: Remove page->mapping references" (Matthew Wilcox) gets us closer to being able to remove page->mapping - "relayfs: misc changes" (Jason Xing) does some maintenance and minor feature addition work in relayfs - "kdump: crashkernel reservation from CMA" (Jiri Bohac) switches us from static preallocation of the kdump crashkernel's working memory over to dynamic allocation. So the difficulty of a-priori estimation of the second kernel's needs is removed and the first kernel obtains extra memory - "generalize panic_print's dump function to be used by other kernel parts" (Feng Tang) implements some consolidation and rationalization of the various ways in which a failing kernel splats information at the operator * tag 'mm-nonmm-stable-2025-08-03-12-47' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: (80 commits) tools/getdelays: add backward compatibility for taskstats version kho: add test for kexec handover delaytop: enhance error logging and add PSI feature description samples: Kconfig: fix spelling mistake "instancess" -> "instances" fat: fix too many log in fat_chain_add() scripts/spelling.txt: add notifer||notifier to spelling.txt xen/xenbus: fix typo "notifer" net: mvneta: fix typo "notifer" drm/xe: fix typo "notifer" cxl: mce: fix typo "notifer" KVM: x86: fix typo "notifer" MAINTAINERS: add maintainers for delaytop ucount: use atomic_long_try_cmpxchg() in atomic_long_inc_below() ucount: fix atomic_long_inc_below() argument type kexec: enable CMA based contiguous allocation stackdepot: make max number of pools boot-time configurable lib/xxhash: remove unused functions init/Kconfig: restore CONFIG_BROKEN help text lib/raid6: update recov_rvv.c zero page usage docs: update docs after introducing delaytop ...
This commit is contained in:
commit
e991acf1bc
106 changed files with 2885 additions and 600 deletions
1
.mailmap
1
.mailmap
|
@ -673,6 +673,7 @@ Muchun Song <muchun.song@linux.dev> <smuchun@gmail.com>
|
|||
Ross Zwisler <zwisler@kernel.org> <ross.zwisler@linux.intel.com>
|
||||
Rudolf Marek <R.Marek@sh.cvut.cz>
|
||||
Rui Saraiva <rmps@joel.ist.utl.pt>
|
||||
Sachin Mokashi <sachin.mokashi@intel.com> <sachinx.mokashi@intel.com>
|
||||
Sachin P Sant <ssant@in.ibm.com>
|
||||
Sai Prakash Ranjan <quic_saipraka@quicinc.com> <saiprakash.ranjan@codeaurora.org>
|
||||
Sakari Ailus <sakari.ailus@linux.intel.com> <sakari.ailus@iki.fi>
|
||||
|
|
|
@ -131,3 +131,59 @@ Get IO accounting for pid 1, it works only with -p::
|
|||
linuxrc: read=65536, write=0, cancelled_write=0
|
||||
|
||||
The above command can be used with -v to get more debug information.
|
||||
|
||||
After the system starts, use `delaytop` to get the system-wide delay information,
|
||||
which includes system-wide PSI information and Top-N high-latency tasks.
|
||||
|
||||
`delaytop` supports sorting by CPU latency in descending order by default,
|
||||
displays the top 20 high-latency tasks by default, and refreshes the latency
|
||||
data every 2 seconds by default.
|
||||
|
||||
Get PSI information and Top-N tasks delay, since system boot::
|
||||
|
||||
bash# ./delaytop
|
||||
System Pressure Information: (avg10/avg60/avg300/total)
|
||||
CPU some: 0.0%/ 0.0%/ 0.0%/ 345(ms)
|
||||
CPU full: 0.0%/ 0.0%/ 0.0%/ 0(ms)
|
||||
Memory full: 0.0%/ 0.0%/ 0.0%/ 0(ms)
|
||||
Memory some: 0.0%/ 0.0%/ 0.0%/ 0(ms)
|
||||
IO full: 0.0%/ 0.0%/ 0.0%/ 65(ms)
|
||||
IO some: 0.0%/ 0.0%/ 0.0%/ 79(ms)
|
||||
IRQ full: 0.0%/ 0.0%/ 0.0%/ 0(ms)
|
||||
Top 20 processes (sorted by CPU delay):
|
||||
PID TGID COMMAND CPU(ms) IO(ms) SWAP(ms) RCL(ms) THR(ms) CMP(ms) WP(ms) IRQ(ms)
|
||||
----------------------------------------------------------------------------------------------
|
||||
161 161 zombie_memcg_re 1.40 0.00 0.00 0.00 0.00 0.00 0.00 0.00
|
||||
130 130 blkcg_punt_bio 1.37 0.00 0.00 0.00 0.00 0.00 0.00 0.00
|
||||
444 444 scsi_tmf_0 0.73 0.00 0.00 0.00 0.00 0.00 0.00 0.00
|
||||
1280 1280 rsyslogd 0.53 0.04 0.00 0.00 0.00 0.00 0.00 0.00
|
||||
12 12 ksoftirqd/0 0.47 0.00 0.00 0.00 0.00 0.00 0.00 0.00
|
||||
1277 1277 nbd-server 0.44 0.00 0.00 0.00 0.00 0.00 0.00 0.00
|
||||
308 308 kworker/2:2-sys 0.41 0.00 0.00 0.00 0.00 0.00 0.00 0.00
|
||||
55 55 netns 0.36 0.00 0.00 0.00 0.00 0.00 0.00 0.00
|
||||
1187 1187 acpid 0.31 0.03 0.00 0.00 0.00 0.00 0.00 0.00
|
||||
6184 6184 kworker/1:2-sys 0.24 0.00 0.00 0.00 0.00 0.00 0.00 0.00
|
||||
186 186 kaluad 0.24 0.00 0.00 0.00 0.00 0.00 0.00 0.00
|
||||
18 18 ksoftirqd/1 0.24 0.00 0.00 0.00 0.00 0.00 0.00 0.00
|
||||
185 185 kmpath_rdacd 0.23 0.00 0.00 0.00 0.00 0.00 0.00 0.00
|
||||
190 190 kstrp 0.23 0.00 0.00 0.00 0.00 0.00 0.00 0.00
|
||||
2759 2759 agetty 0.20 0.03 0.00 0.00 0.00 0.00 0.00 0.00
|
||||
1190 1190 kworker/0:3-sys 0.19 0.00 0.00 0.00 0.00 0.00 0.00 0.00
|
||||
1272 1272 sshd 0.15 0.04 0.00 0.00 0.00 0.00 0.00 0.00
|
||||
1156 1156 license 0.15 0.11 0.00 0.00 0.00 0.00 0.00 0.00
|
||||
134 134 md 0.13 0.00 0.00 0.00 0.00 0.00 0.00 0.00
|
||||
6142 6142 kworker/3:2-xfs 0.13 0.00 0.00 0.00 0.00 0.00 0.00 0.00
|
||||
|
||||
Dynamic interactive interface of delaytop::
|
||||
|
||||
# ./delaytop -p pid
|
||||
Print delayacct stats
|
||||
|
||||
# ./delaytop -P num
|
||||
Display the top N tasks
|
||||
|
||||
# ./delaytop -n num
|
||||
Set delaytop refresh frequency (num times)
|
||||
|
||||
# ./delaytop -d secs
|
||||
Specify refresh interval as secs
|
||||
|
|
|
@ -311,6 +311,27 @@ crashkernel syntax
|
|||
|
||||
crashkernel=0,low
|
||||
|
||||
4) crashkernel=size,cma
|
||||
|
||||
Reserve additional crash kernel memory from CMA. This reservation is
|
||||
usable by the first system's userspace memory and kernel movable
|
||||
allocations (memory balloon, zswap). Pages allocated from this memory
|
||||
range will not be included in the vmcore so this should not be used if
|
||||
dumping of userspace memory is intended and it has to be expected that
|
||||
some movable kernel pages may be missing from the dump.
|
||||
|
||||
A standard crashkernel reservation, as described above, is still needed
|
||||
to hold the crash kernel and initrd.
|
||||
|
||||
This option increases the risk of a kdump failure: DMA transfers
|
||||
configured by the first kernel may end up corrupting the second
|
||||
kernel's memory.
|
||||
|
||||
This reservation method is intended for systems that can't afford to
|
||||
sacrifice enough memory for standard crashkernel reservation and where
|
||||
less reliable and possibly incomplete kdump is preferable to no kdump at
|
||||
all.
|
||||
|
||||
Boot into System Kernel
|
||||
-----------------------
|
||||
1) Update the boot loader (such as grub, yaboot, or lilo) configuration
|
||||
|
|
|
@ -994,6 +994,28 @@
|
|||
0: to disable low allocation.
|
||||
It will be ignored when crashkernel=X,high is not used
|
||||
or memory reserved is below 4G.
|
||||
crashkernel=size[KMG],cma
|
||||
[KNL, X86] Reserve additional crash kernel memory from
|
||||
CMA. This reservation is usable by the first system's
|
||||
userspace memory and kernel movable allocations (memory
|
||||
balloon, zswap). Pages allocated from this memory range
|
||||
will not be included in the vmcore so this should not
|
||||
be used if dumping of userspace memory is intended and
|
||||
it has to be expected that some movable kernel pages
|
||||
may be missing from the dump.
|
||||
|
||||
A standard crashkernel reservation, as described above,
|
||||
is still needed to hold the crash kernel and initrd.
|
||||
|
||||
This option increases the risk of a kdump failure: DMA
|
||||
transfers configured by the first kernel may end up
|
||||
corrupting the second kernel's memory.
|
||||
|
||||
This reservation method is intended for systems that
|
||||
can't afford to sacrifice enough memory for standard
|
||||
crashkernel reservation and where less reliable and
|
||||
possibly incomplete kdump is preferable to no kdump at
|
||||
all.
|
||||
|
||||
cryptomgr.notests
|
||||
[KNL] Disable crypto self-tests
|
||||
|
@ -4557,7 +4579,7 @@
|
|||
bit 2: print timer info
|
||||
bit 3: print locks info if CONFIG_LOCKDEP is on
|
||||
bit 4: print ftrace buffer
|
||||
bit 5: print all printk messages in buffer
|
||||
bit 5: replay all messages on consoles at the end of panic
|
||||
bit 6: print all CPUs backtrace (if available in the arch)
|
||||
bit 7: print only tasks in uninterruptible (blocked) state
|
||||
*Be aware* that this option may print a _lot_ of lines,
|
||||
|
@ -4565,6 +4587,25 @@
|
|||
Use this option carefully, maybe worth to setup a
|
||||
bigger log buffer with "log_buf_len" along with this.
|
||||
|
||||
panic_sys_info= A comma separated list of extra information to be dumped
|
||||
on panic.
|
||||
Format: val[,val...]
|
||||
Where @val can be any of the following:
|
||||
|
||||
tasks: print all tasks info
|
||||
mem: print system memory info
|
||||
timers: print timers info
|
||||
locks: print locks info if CONFIG_LOCKDEP is on
|
||||
ftrace: print ftrace buffer
|
||||
all_bt: print all CPUs backtrace (if available in the arch)
|
||||
blocked_tasks: print only tasks in uninterruptible (blocked) state
|
||||
|
||||
This is a human readable alternative to the 'panic_print' option.
|
||||
|
||||
panic_console_replay
|
||||
When panic happens, replay all kernel messages on
|
||||
consoles at the end of panic.
|
||||
|
||||
parkbd.port= [HW] Parallel port number the keyboard adapter is
|
||||
connected to, default is 0.
|
||||
Format: <parport#>
|
||||
|
@ -7032,6 +7073,11 @@
|
|||
consumed by the stack hash table. By default this is set
|
||||
to false.
|
||||
|
||||
stack_depot_max_pools= [KNL,EARLY]
|
||||
Specify the maximum number of pools to use for storing
|
||||
stack traces. Pools are allocated on-demand up to this
|
||||
limit. Default value is 8191 pools.
|
||||
|
||||
stacktrace [FTRACE]
|
||||
Enabled the stack tracer on boot up.
|
||||
|
||||
|
|
|
@ -890,7 +890,7 @@ bit 1 print system memory info
|
|||
bit 2 print timer info
|
||||
bit 3 print locks info if ``CONFIG_LOCKDEP`` is on
|
||||
bit 4 print ftrace buffer
|
||||
bit 5 print all printk messages in buffer
|
||||
bit 5 replay all messages on consoles at the end of panic
|
||||
bit 6 print all CPUs backtrace (if available in the arch)
|
||||
bit 7 print only tasks in uninterruptible (blocked) state
|
||||
===== ============================================
|
||||
|
@ -900,6 +900,24 @@ So for example to print tasks and memory info on panic, user can::
|
|||
echo 3 > /proc/sys/kernel/panic_print
|
||||
|
||||
|
||||
panic_sys_info
|
||||
==============
|
||||
|
||||
A comma separated list of extra information to be dumped on panic,
|
||||
for example, "tasks,mem,timers,...". It is a human readable alternative
|
||||
to 'panic_print'. Possible values are:
|
||||
|
||||
============= ===================================================
|
||||
tasks print all tasks info
|
||||
mem print system memory info
|
||||
timer print timers info
|
||||
lock print locks info if CONFIG_LOCKDEP is on
|
||||
ftrace print ftrace buffer
|
||||
all_bt print all CPUs backtrace (if available in the arch)
|
||||
blocked_tasks print only tasks in uninterruptible (blocked) state
|
||||
============= ===================================================
|
||||
|
||||
|
||||
panic_on_rcu_stall
|
||||
==================
|
||||
|
||||
|
|
12
MAINTAINERS
12
MAINTAINERS
|
@ -13544,6 +13544,7 @@ F: Documentation/admin-guide/mm/kho.rst
|
|||
F: Documentation/core-api/kho/*
|
||||
F: include/linux/kexec_handover.h
|
||||
F: kernel/kexec_handover.c
|
||||
F: tools/testing/selftests/kho/
|
||||
|
||||
KEYS-ENCRYPTED
|
||||
M: Mimi Zohar <zohar@linux.ibm.com>
|
||||
|
@ -19741,6 +19742,16 @@ S: Maintained
|
|||
F: include/linux/delayacct.h
|
||||
F: kernel/delayacct.c
|
||||
|
||||
TASK DELAY MONITORING TOOLS
|
||||
M: Andrew Morton <akpm@linux-foundation.org>
|
||||
M: Wang Yaxin <wang.yaxin@zte.com.cn>
|
||||
M: Fan Yu <fan.yu9@zte.com.cn>
|
||||
L: linux-kernel@vger.kernel.org
|
||||
S: Maintained
|
||||
F: Documentation/accounting/delay-accounting.rst
|
||||
F: tools/accounting/delaytop.c
|
||||
F: tools/accounting/getdelays.c
|
||||
|
||||
PERFORMANCE EVENTS SUBSYSTEM
|
||||
M: Peter Zijlstra <peterz@infradead.org>
|
||||
M: Ingo Molnar <mingo@redhat.com>
|
||||
|
@ -23382,6 +23393,7 @@ F: drivers/md/md*
|
|||
F: drivers/md/raid*
|
||||
F: include/linux/raid/
|
||||
F: include/uapi/linux/raid/
|
||||
F: lib/raid6/
|
||||
|
||||
SOLIDRUN CLEARFOG SUPPORT
|
||||
M: Russell King <linux@armlinux.org.uk>
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
#include <linux/vmalloc.h>
|
||||
#include <linux/mc146818rtc.h>
|
||||
#include <linux/rtc.h>
|
||||
#include <linux/string.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/memblock.h>
|
||||
|
||||
|
@ -79,10 +80,12 @@ mk_resource_name(int pe, int port, char *str)
|
|||
{
|
||||
char tmp[80];
|
||||
char *name;
|
||||
size_t sz;
|
||||
|
||||
sprintf(tmp, "PCI %s PE %d PORT %d", str, pe, port);
|
||||
name = memblock_alloc_or_panic(strlen(tmp) + 1, SMP_CACHE_BYTES);
|
||||
strcpy(name, tmp);
|
||||
sz = scnprintf(tmp, sizeof(tmp), "PCI %s PE %d PORT %d", str, pe, port);
|
||||
sz += 1; /* NUL terminator */
|
||||
name = memblock_alloc_or_panic(sz, SMP_CACHE_BYTES);
|
||||
strscpy(name, tmp, sz);
|
||||
|
||||
return name;
|
||||
}
|
||||
|
|
|
@ -1004,7 +1004,7 @@ static void __init reserve_crashkernel(void)
|
|||
total_mem = get_total_mem();
|
||||
ret = parse_crashkernel(boot_command_line, total_mem,
|
||||
&crash_size, &crash_base,
|
||||
NULL, NULL);
|
||||
NULL, NULL, NULL);
|
||||
/* invalid value specified or crashkernel=0 */
|
||||
if (ret || !crash_size)
|
||||
return;
|
||||
|
|
|
@ -106,7 +106,7 @@ static void __init arch_reserve_crashkernel(void)
|
|||
|
||||
ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(),
|
||||
&crash_size, &crash_base,
|
||||
&low_size, &high);
|
||||
&low_size, NULL, &high);
|
||||
if (ret)
|
||||
return;
|
||||
|
||||
|
|
|
@ -265,7 +265,7 @@ static void __init arch_reserve_crashkernel(void)
|
|||
return;
|
||||
|
||||
ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(),
|
||||
&crash_size, &crash_base, &low_size, &high);
|
||||
&crash_size, &crash_base, &low_size, NULL, &high);
|
||||
if (ret)
|
||||
return;
|
||||
|
||||
|
|
|
@ -458,7 +458,7 @@ static void __init mips_parse_crashkernel(void)
|
|||
total_mem = memblock_phys_mem_size();
|
||||
ret = parse_crashkernel(boot_command_line, total_mem,
|
||||
&crash_size, &crash_base,
|
||||
NULL, NULL);
|
||||
NULL, NULL, NULL);
|
||||
if (ret != 0 || crash_size <= 0)
|
||||
return;
|
||||
|
||||
|
|
|
@ -333,7 +333,7 @@ static __init u64 fadump_calculate_reserve_size(void)
|
|||
* memory at a predefined offset.
|
||||
*/
|
||||
ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(),
|
||||
&size, &base, NULL, NULL);
|
||||
&size, &base, NULL, NULL, NULL);
|
||||
if (ret == 0 && size > 0) {
|
||||
unsigned long max_size;
|
||||
|
||||
|
|
|
@ -110,7 +110,7 @@ void __init arch_reserve_crashkernel(void)
|
|||
|
||||
/* use common parsing */
|
||||
ret = parse_crashkernel(boot_command_line, total_mem_sz, &crash_size,
|
||||
&crash_base, NULL, NULL);
|
||||
&crash_base, NULL, NULL, NULL);
|
||||
|
||||
if (ret)
|
||||
return;
|
||||
|
|
|
@ -178,7 +178,7 @@ static void __init get_crash_kernel(void *fdt, unsigned long size)
|
|||
int ret;
|
||||
|
||||
ret = parse_crashkernel(boot_command_line, size, &crash_size,
|
||||
&crash_base, NULL, NULL);
|
||||
&crash_base, NULL, NULL, NULL);
|
||||
if (ret != 0 || crash_size == 0)
|
||||
return;
|
||||
if (crash_base == 0)
|
||||
|
|
|
@ -93,6 +93,7 @@ config RISCV
|
|||
select CLINT_TIMER if RISCV_M_MODE
|
||||
select CLONE_BACKWARDS
|
||||
select COMMON_CLK
|
||||
select CPU_NO_EFFICIENT_FFS if !RISCV_ISA_ZBB
|
||||
select CPU_PM if CPU_IDLE || HIBERNATION || SUSPEND
|
||||
select DYNAMIC_FTRACE if FUNCTION_TRACER
|
||||
select EDAC_SUPPORT
|
||||
|
|
|
@ -95,6 +95,7 @@ static int elf_find_pbase(struct kimage *image, unsigned long kernel_len,
|
|||
kbuf.buf_align = PMD_SIZE;
|
||||
kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
|
||||
kbuf.memsz = ALIGN(kernel_len, PAGE_SIZE);
|
||||
kbuf.cma = NULL;
|
||||
kbuf.top_down = false;
|
||||
ret = arch_kexec_locate_mem_hole(&kbuf);
|
||||
if (!ret) {
|
||||
|
|
|
@ -21,6 +21,8 @@
|
|||
#include <linux/efi.h>
|
||||
#include <linux/crash_dump.h>
|
||||
#include <linux/panic_notifier.h>
|
||||
#include <linux/jump_label.h>
|
||||
#include <linux/gcd.h>
|
||||
|
||||
#include <asm/acpi.h>
|
||||
#include <asm/alternative.h>
|
||||
|
@ -362,6 +364,9 @@ void __init setup_arch(char **cmdline_p)
|
|||
|
||||
riscv_user_isa_enable();
|
||||
riscv_spinlock_init();
|
||||
|
||||
if (!IS_ENABLED(CONFIG_RISCV_ISA_ZBB) || !riscv_isa_extension_available(NULL, ZBB))
|
||||
static_branch_disable(&efficient_ffs_key);
|
||||
}
|
||||
|
||||
bool arch_cpu_is_hotpluggable(int cpu)
|
||||
|
|
|
@ -1408,7 +1408,7 @@ static void __init arch_reserve_crashkernel(void)
|
|||
|
||||
ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(),
|
||||
&crash_size, &crash_base,
|
||||
&low_size, &high);
|
||||
&low_size, NULL, &high);
|
||||
if (ret)
|
||||
return;
|
||||
|
||||
|
|
|
@ -605,7 +605,7 @@ static void __init reserve_crashkernel(void)
|
|||
int rc;
|
||||
|
||||
rc = parse_crashkernel(boot_command_line, ident_map_size,
|
||||
&crash_size, &crash_base, NULL, NULL);
|
||||
&crash_size, &crash_base, NULL, NULL, NULL);
|
||||
|
||||
crash_base = ALIGN(crash_base, KEXEC_CRASH_MEM_ALIGN);
|
||||
crash_size = ALIGN(crash_size, KEXEC_CRASH_MEM_ALIGN);
|
||||
|
|
|
@ -146,7 +146,7 @@ void __init reserve_crashkernel(void)
|
|||
return;
|
||||
|
||||
ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(),
|
||||
&crash_size, &crash_base, NULL, NULL);
|
||||
&crash_size, &crash_base, NULL, NULL, NULL);
|
||||
if (ret == 0 && crash_size > 0) {
|
||||
crashk_res.start = crash_base;
|
||||
crashk_res.end = crash_base + crash_size - 1;
|
||||
|
|
|
@ -163,10 +163,10 @@ static struct crash_mem *fill_up_crash_elf_data(void)
|
|||
return NULL;
|
||||
|
||||
/*
|
||||
* Exclusion of crash region and/or crashk_low_res may cause
|
||||
* another range split. So add extra two slots here.
|
||||
* Exclusion of crash region, crashk_low_res and/or crashk_cma_ranges
|
||||
* may cause range splits. So add extra slots here.
|
||||
*/
|
||||
nr_ranges += 2;
|
||||
nr_ranges += 2 + crashk_cma_cnt;
|
||||
cmem = vzalloc(struct_size(cmem, ranges, nr_ranges));
|
||||
if (!cmem)
|
||||
return NULL;
|
||||
|
@ -184,6 +184,7 @@ static struct crash_mem *fill_up_crash_elf_data(void)
|
|||
static int elf_header_exclude_ranges(struct crash_mem *cmem)
|
||||
{
|
||||
int ret = 0;
|
||||
int i;
|
||||
|
||||
/* Exclude the low 1M because it is always reserved */
|
||||
ret = crash_exclude_mem_range(cmem, 0, SZ_1M - 1);
|
||||
|
@ -198,8 +199,17 @@ static int elf_header_exclude_ranges(struct crash_mem *cmem)
|
|||
if (crashk_low_res.end)
|
||||
ret = crash_exclude_mem_range(cmem, crashk_low_res.start,
|
||||
crashk_low_res.end);
|
||||
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
for (i = 0; i < crashk_cma_cnt; ++i) {
|
||||
ret = crash_exclude_mem_range(cmem, crashk_cma_ranges[i].start,
|
||||
crashk_cma_ranges[i].end);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int prepare_elf64_ram_headers_callback(struct resource *res, void *arg)
|
||||
|
@ -374,6 +384,14 @@ int crash_setup_memmap_entries(struct kimage *image, struct boot_params *params)
|
|||
add_e820_entry(params, &ei);
|
||||
}
|
||||
|
||||
for (i = 0; i < crashk_cma_cnt; ++i) {
|
||||
ei.addr = crashk_cma_ranges[i].start;
|
||||
ei.size = crashk_cma_ranges[i].end -
|
||||
crashk_cma_ranges[i].start + 1;
|
||||
ei.type = E820_TYPE_RAM;
|
||||
add_e820_entry(params, &ei);
|
||||
}
|
||||
|
||||
out:
|
||||
vfree(cmem);
|
||||
return ret;
|
||||
|
|
|
@ -603,7 +603,7 @@ static void __init memblock_x86_reserve_range_setup_data(void)
|
|||
|
||||
static void __init arch_reserve_crashkernel(void)
|
||||
{
|
||||
unsigned long long crash_base, crash_size, low_size = 0;
|
||||
unsigned long long crash_base, crash_size, low_size = 0, cma_size = 0;
|
||||
bool high = false;
|
||||
int ret;
|
||||
|
||||
|
@ -612,7 +612,7 @@ static void __init arch_reserve_crashkernel(void)
|
|||
|
||||
ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(),
|
||||
&crash_size, &crash_base,
|
||||
&low_size, &high);
|
||||
&low_size, &cma_size, &high);
|
||||
if (ret)
|
||||
return;
|
||||
|
||||
|
@ -622,6 +622,7 @@ static void __init arch_reserve_crashkernel(void)
|
|||
}
|
||||
|
||||
reserve_crashkernel_generic(crash_size, crash_base, low_size, high);
|
||||
reserve_crashkernel_cma(cma_size);
|
||||
}
|
||||
|
||||
static struct resource standard_io_resources[] = {
|
||||
|
|
|
@ -641,7 +641,7 @@ static void kvm_pit_reset(struct kvm_pit *pit)
|
|||
kvm_pit_reset_reinject(pit);
|
||||
}
|
||||
|
||||
static void pit_mask_notifer(struct kvm_irq_mask_notifier *kimn, bool mask)
|
||||
static void pit_mask_notifier(struct kvm_irq_mask_notifier *kimn, bool mask)
|
||||
{
|
||||
struct kvm_pit *pit = container_of(kimn, struct kvm_pit, mask_notifier);
|
||||
|
||||
|
@ -763,7 +763,7 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags)
|
|||
|
||||
pit_state->irq_ack_notifier.gsi = 0;
|
||||
pit_state->irq_ack_notifier.irq_acked = kvm_pit_ack_irq;
|
||||
pit->mask_notifier.func = pit_mask_notifer;
|
||||
pit->mask_notifier.func = pit_mask_notifier;
|
||||
|
||||
kvm_pit_reset(pit);
|
||||
|
||||
|
|
|
@ -119,7 +119,7 @@ do_sync_gen_syndrome(struct page **blocks, unsigned int *offsets, int disks,
|
|||
for (i = 0; i < disks; i++) {
|
||||
if (blocks[i] == NULL) {
|
||||
BUG_ON(i > disks - 3); /* P or Q can't be zero */
|
||||
srcs[i] = (void*)raid6_empty_zero_page;
|
||||
srcs[i] = raid6_get_zero_page();
|
||||
} else {
|
||||
srcs[i] = page_address(blocks[i]) + offsets[i];
|
||||
|
||||
|
|
|
@ -414,7 +414,7 @@ async_raid6_2data_recov(int disks, size_t bytes, int faila, int failb,
|
|||
async_tx_quiesce(&submit->depend_tx);
|
||||
for (i = 0; i < disks; i++)
|
||||
if (blocks[i] == NULL)
|
||||
ptrs[i] = (void *) raid6_empty_zero_page;
|
||||
ptrs[i] = raid6_get_zero_page();
|
||||
else
|
||||
ptrs[i] = page_address(blocks[i]) + offs[i];
|
||||
|
||||
|
@ -497,7 +497,7 @@ async_raid6_datap_recov(int disks, size_t bytes, int faila,
|
|||
async_tx_quiesce(&submit->depend_tx);
|
||||
for (i = 0; i < disks; i++)
|
||||
if (blocks[i] == NULL)
|
||||
ptrs[i] = (void*)raid6_empty_zero_page;
|
||||
ptrs[i] = raid6_get_zero_page();
|
||||
else
|
||||
ptrs[i] = page_address(blocks[i]) + offs[i];
|
||||
|
||||
|
|
|
@ -7,7 +7,7 @@
|
|||
|
||||
#ifdef CONFIG_CXL_MCE
|
||||
int devm_cxl_register_mce_notifier(struct device *dev,
|
||||
struct notifier_block *mce_notifer);
|
||||
struct notifier_block *mce_notifier);
|
||||
#else
|
||||
static inline int
|
||||
devm_cxl_register_mce_notifier(struct device *dev,
|
||||
|
|
|
@ -220,8 +220,7 @@ static int guc_action_control_log(struct intel_guc *guc, bool enable,
|
|||
*/
|
||||
static int subbuf_start_callback(struct rchan_buf *buf,
|
||||
void *subbuf,
|
||||
void *prev_subbuf,
|
||||
size_t prev_padding)
|
||||
void *prev_subbuf)
|
||||
{
|
||||
/*
|
||||
* Use no-overwrite mode by default, where relay will stop accepting
|
||||
|
|
|
@ -266,7 +266,7 @@ struct xe_vm {
|
|||
* up for revalidation. Protected from access with the
|
||||
* @invalidated_lock. Removing items from the list
|
||||
* additionally requires @lock in write mode, and adding
|
||||
* items to the list requires either the @userptr.notifer_lock in
|
||||
* items to the list requires either the @userptr.notifier_lock in
|
||||
* write mode, OR @lock in write mode.
|
||||
*/
|
||||
struct list_head invalidated;
|
||||
|
|
|
@ -4610,7 +4610,7 @@ static int mvneta_stop(struct net_device *dev)
|
|||
/* Inform that we are stopping so we don't want to setup the
|
||||
* driver for new CPUs in the notifiers. The code of the
|
||||
* notifier for CPU online is protected by the same spinlock,
|
||||
* so when we get the lock, the notifer work is done.
|
||||
* so when we get the lock, the notifier work is done.
|
||||
*/
|
||||
spin_lock(&pp->lock);
|
||||
pp->is_stopped = true;
|
||||
|
|
|
@ -51,8 +51,7 @@ static int ipc_trace_remove_buf_file_handler(struct dentry *dentry)
|
|||
}
|
||||
|
||||
static int ipc_trace_subbuf_start_handler(struct rchan_buf *buf, void *subbuf,
|
||||
void *prev_subbuf,
|
||||
size_t prev_padding)
|
||||
void *prev_subbuf)
|
||||
{
|
||||
if (relay_buf_full(buf)) {
|
||||
pr_err_ratelimited("Relay_buf full dropping traces");
|
||||
|
|
|
@ -33,7 +33,7 @@ static int t7xx_trace_remove_buf_file_handler(struct dentry *dentry)
|
|||
}
|
||||
|
||||
static int t7xx_trace_subbuf_start_handler(struct rchan_buf *buf, void *subbuf,
|
||||
void *prev_subbuf, size_t prev_padding)
|
||||
void *prev_subbuf)
|
||||
{
|
||||
if (relay_buf_full(buf)) {
|
||||
pr_err_ratelimited("Relay_buf full dropping traces");
|
||||
|
|
|
@ -356,7 +356,7 @@ int fat_ent_read(struct inode *inode, struct fat_entry *fatent, int entry)
|
|||
|
||||
if (!fat_valid_entry(sbi, entry)) {
|
||||
fatent_brelse(fatent);
|
||||
fat_fs_error(sb, "invalid access to FAT (entry 0x%08x)", entry);
|
||||
fat_fs_error_ratelimit(sb, "invalid access to FAT (entry 0x%08x)", entry);
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
|
|
|
@ -158,8 +158,8 @@ int fat_chain_add(struct inode *inode, int new_dclus, int nr_cluster)
|
|||
mark_inode_dirty(inode);
|
||||
}
|
||||
if (new_fclus != (inode->i_blocks >> (sbi->cluster_bits - 9))) {
|
||||
fat_fs_error(sb, "clusters badly computed (%d != %llu)",
|
||||
new_fclus,
|
||||
fat_fs_error_ratelimit(
|
||||
sb, "clusters badly computed (%d != %llu)", new_fclus,
|
||||
(llu)(inode->i_blocks >> (sbi->cluster_bits - 9)));
|
||||
fat_cache_inval_inode(inode);
|
||||
}
|
||||
|
|
|
@ -1071,6 +1071,7 @@ static int ocfs2_grab_folios_for_write(struct address_space *mapping,
|
|||
if (IS_ERR(wc->w_folios[i])) {
|
||||
ret = PTR_ERR(wc->w_folios[i]);
|
||||
mlog_errno(ret);
|
||||
wc->w_folios[i] = NULL;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -798,6 +798,14 @@ static int ocfs2_dx_dir_lookup_rec(struct inode *inode,
|
|||
}
|
||||
}
|
||||
|
||||
if (le16_to_cpu(el->l_next_free_rec) == 0) {
|
||||
ret = ocfs2_error(inode->i_sb,
|
||||
"Inode %lu has empty extent list at depth %u\n",
|
||||
inode->i_ino,
|
||||
le16_to_cpu(el->l_tree_depth));
|
||||
goto out;
|
||||
}
|
||||
|
||||
found = 0;
|
||||
for (i = le16_to_cpu(el->l_next_free_rec) - 1; i >= 0; i--) {
|
||||
rec = &el->l_recs[i];
|
||||
|
|
|
@ -2632,7 +2632,7 @@ again:
|
|||
dlm_reco_master_ready(dlm),
|
||||
msecs_to_jiffies(1000));
|
||||
if (!dlm_reco_master_ready(dlm)) {
|
||||
mlog(0, "%s: reco master taking awhile\n",
|
||||
mlog(0, "%s: reco master taking a while\n",
|
||||
dlm->name);
|
||||
goto again;
|
||||
}
|
||||
|
|
|
@ -50,8 +50,6 @@ struct ocfs2_find_inode_args
|
|||
unsigned int fi_sysfile_type;
|
||||
};
|
||||
|
||||
static struct lock_class_key ocfs2_sysfile_lock_key[NUM_SYSTEM_INODES];
|
||||
|
||||
static int ocfs2_read_locked_inode(struct inode *inode,
|
||||
struct ocfs2_find_inode_args *args);
|
||||
static int ocfs2_init_locked_inode(struct inode *inode, void *opaque);
|
||||
|
@ -250,14 +248,77 @@ bail:
|
|||
static int ocfs2_init_locked_inode(struct inode *inode, void *opaque)
|
||||
{
|
||||
struct ocfs2_find_inode_args *args = opaque;
|
||||
#ifdef CONFIG_LOCKDEP
|
||||
static struct lock_class_key ocfs2_sysfile_lock_key[NUM_SYSTEM_INODES];
|
||||
static struct lock_class_key ocfs2_quota_ip_alloc_sem_key,
|
||||
ocfs2_file_ip_alloc_sem_key;
|
||||
#endif
|
||||
|
||||
inode->i_ino = args->fi_ino;
|
||||
OCFS2_I(inode)->ip_blkno = args->fi_blkno;
|
||||
if (args->fi_sysfile_type != 0)
|
||||
#ifdef CONFIG_LOCKDEP
|
||||
switch (args->fi_sysfile_type) {
|
||||
case BAD_BLOCK_SYSTEM_INODE:
|
||||
break;
|
||||
case GLOBAL_INODE_ALLOC_SYSTEM_INODE:
|
||||
lockdep_set_class(&inode->i_rwsem,
|
||||
&ocfs2_sysfile_lock_key[args->fi_sysfile_type]);
|
||||
&ocfs2_sysfile_lock_key[GLOBAL_INODE_ALLOC_SYSTEM_INODE]);
|
||||
break;
|
||||
case SLOT_MAP_SYSTEM_INODE:
|
||||
lockdep_set_class(&inode->i_rwsem,
|
||||
&ocfs2_sysfile_lock_key[SLOT_MAP_SYSTEM_INODE]);
|
||||
break;
|
||||
case HEARTBEAT_SYSTEM_INODE:
|
||||
lockdep_set_class(&inode->i_rwsem,
|
||||
&ocfs2_sysfile_lock_key[HEARTBEAT_SYSTEM_INODE]);
|
||||
break;
|
||||
case GLOBAL_BITMAP_SYSTEM_INODE:
|
||||
lockdep_set_class(&inode->i_rwsem,
|
||||
&ocfs2_sysfile_lock_key[GLOBAL_BITMAP_SYSTEM_INODE]);
|
||||
break;
|
||||
case USER_QUOTA_SYSTEM_INODE:
|
||||
lockdep_set_class(&inode->i_rwsem,
|
||||
&ocfs2_sysfile_lock_key[USER_QUOTA_SYSTEM_INODE]);
|
||||
break;
|
||||
case GROUP_QUOTA_SYSTEM_INODE:
|
||||
lockdep_set_class(&inode->i_rwsem,
|
||||
&ocfs2_sysfile_lock_key[GROUP_QUOTA_SYSTEM_INODE]);
|
||||
break;
|
||||
case ORPHAN_DIR_SYSTEM_INODE:
|
||||
lockdep_set_class(&inode->i_rwsem,
|
||||
&ocfs2_sysfile_lock_key[ORPHAN_DIR_SYSTEM_INODE]);
|
||||
break;
|
||||
case EXTENT_ALLOC_SYSTEM_INODE:
|
||||
lockdep_set_class(&inode->i_rwsem,
|
||||
&ocfs2_sysfile_lock_key[EXTENT_ALLOC_SYSTEM_INODE]);
|
||||
break;
|
||||
case INODE_ALLOC_SYSTEM_INODE:
|
||||
lockdep_set_class(&inode->i_rwsem,
|
||||
&ocfs2_sysfile_lock_key[INODE_ALLOC_SYSTEM_INODE]);
|
||||
break;
|
||||
case JOURNAL_SYSTEM_INODE:
|
||||
lockdep_set_class(&inode->i_rwsem,
|
||||
&ocfs2_sysfile_lock_key[JOURNAL_SYSTEM_INODE]);
|
||||
break;
|
||||
case LOCAL_ALLOC_SYSTEM_INODE:
|
||||
lockdep_set_class(&inode->i_rwsem,
|
||||
&ocfs2_sysfile_lock_key[LOCAL_ALLOC_SYSTEM_INODE]);
|
||||
break;
|
||||
case TRUNCATE_LOG_SYSTEM_INODE:
|
||||
lockdep_set_class(&inode->i_rwsem,
|
||||
&ocfs2_sysfile_lock_key[TRUNCATE_LOG_SYSTEM_INODE]);
|
||||
break;
|
||||
case LOCAL_USER_QUOTA_SYSTEM_INODE:
|
||||
lockdep_set_class(&inode->i_rwsem,
|
||||
&ocfs2_sysfile_lock_key[LOCAL_USER_QUOTA_SYSTEM_INODE]);
|
||||
break;
|
||||
case LOCAL_GROUP_QUOTA_SYSTEM_INODE:
|
||||
lockdep_set_class(&inode->i_rwsem,
|
||||
&ocfs2_sysfile_lock_key[LOCAL_GROUP_QUOTA_SYSTEM_INODE]);
|
||||
break;
|
||||
default:
|
||||
WARN_ONCE(1, "Unknown sysfile type %d\n", args->fi_sysfile_type);
|
||||
}
|
||||
if (args->fi_sysfile_type == USER_QUOTA_SYSTEM_INODE ||
|
||||
args->fi_sysfile_type == GROUP_QUOTA_SYSTEM_INODE ||
|
||||
args->fi_sysfile_type == LOCAL_USER_QUOTA_SYSTEM_INODE ||
|
||||
|
@ -267,6 +328,7 @@ static int ocfs2_init_locked_inode(struct inode *inode, void *opaque)
|
|||
else
|
||||
lockdep_set_class(&OCFS2_I(inode)->ip_alloc_sem,
|
||||
&ocfs2_file_ip_alloc_sem_key);
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -617,6 +617,8 @@ static int ocfs2_move_extent(struct ocfs2_move_extents_context *context,
|
|||
*/
|
||||
credits += OCFS2_INODE_UPDATE_CREDITS + 1;
|
||||
|
||||
inode_lock(tl_inode);
|
||||
|
||||
/*
|
||||
* ocfs2_move_extent() didn't reserve any clusters in lock_allocators()
|
||||
* logic, while we still need to lock the global_bitmap.
|
||||
|
@ -626,7 +628,7 @@ static int ocfs2_move_extent(struct ocfs2_move_extents_context *context,
|
|||
if (!gb_inode) {
|
||||
mlog(ML_ERROR, "unable to get global_bitmap inode\n");
|
||||
ret = -EIO;
|
||||
goto out;
|
||||
goto out_unlock_tl_inode;
|
||||
}
|
||||
|
||||
inode_lock(gb_inode);
|
||||
|
@ -634,16 +636,14 @@ static int ocfs2_move_extent(struct ocfs2_move_extents_context *context,
|
|||
ret = ocfs2_inode_lock(gb_inode, &gb_bh, 1);
|
||||
if (ret) {
|
||||
mlog_errno(ret);
|
||||
goto out_unlock_gb_mutex;
|
||||
goto out_unlock_gb_inode;
|
||||
}
|
||||
|
||||
inode_lock(tl_inode);
|
||||
|
||||
handle = ocfs2_start_trans(osb, credits);
|
||||
if (IS_ERR(handle)) {
|
||||
ret = PTR_ERR(handle);
|
||||
mlog_errno(ret);
|
||||
goto out_unlock_tl_inode;
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
new_phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, *new_phys_cpos);
|
||||
|
@ -703,15 +703,14 @@ static int ocfs2_move_extent(struct ocfs2_move_extents_context *context,
|
|||
out_commit:
|
||||
ocfs2_commit_trans(osb, handle);
|
||||
brelse(gd_bh);
|
||||
|
||||
out_unlock_tl_inode:
|
||||
inode_unlock(tl_inode);
|
||||
|
||||
out_unlock:
|
||||
ocfs2_inode_unlock(gb_inode, 1);
|
||||
out_unlock_gb_mutex:
|
||||
out_unlock_gb_inode:
|
||||
inode_unlock(gb_inode);
|
||||
brelse(gb_bh);
|
||||
iput(gb_inode);
|
||||
out_unlock_tl_inode:
|
||||
inode_unlock(tl_inode);
|
||||
|
||||
out:
|
||||
if (context->meta_ac) {
|
||||
|
|
|
@ -142,6 +142,8 @@ static struct dentry *ocfs2_lookup(struct inode *dir, struct dentry *dentry,
|
|||
|
||||
bail_add:
|
||||
ret = d_splice_alias(inode, dentry);
|
||||
if (IS_ERR(ret))
|
||||
goto bail_unlock;
|
||||
|
||||
if (inode) {
|
||||
/*
|
||||
|
@ -154,15 +156,16 @@ bail_add:
|
|||
* NOTE: This dentry already has ->d_op set from
|
||||
* ocfs2_get_parent() and ocfs2_get_dentry()
|
||||
*/
|
||||
if (!IS_ERR_OR_NULL(ret))
|
||||
if (ret)
|
||||
dentry = ret;
|
||||
|
||||
status = ocfs2_dentry_attach_lock(dentry, inode,
|
||||
OCFS2_I(dir)->ip_blkno);
|
||||
if (status) {
|
||||
mlog_errno(status);
|
||||
if (ret)
|
||||
dput(ret);
|
||||
ret = ERR_PTR(status);
|
||||
goto bail_unlock;
|
||||
}
|
||||
} else
|
||||
ocfs2_dentry_attach_gen(dentry);
|
||||
|
@ -1452,8 +1455,8 @@ static int ocfs2_rename(struct mnt_idmap *idmap,
|
|||
newfe = (struct ocfs2_dinode *) newfe_bh->b_data;
|
||||
|
||||
trace_ocfs2_rename_over_existing(
|
||||
(unsigned long long)newfe_blkno, newfe_bh, newfe_bh ?
|
||||
(unsigned long long)newfe_bh->b_blocknr : 0ULL);
|
||||
(unsigned long long)newfe_blkno, newfe_bh,
|
||||
(unsigned long long)newfe_bh->b_blocknr);
|
||||
|
||||
if (S_ISDIR(new_inode->i_mode) || (new_inode->i_nlink == 1)) {
|
||||
status = ocfs2_prepare_orphan_dir(osb, &orphan_dir,
|
||||
|
|
|
@ -360,7 +360,6 @@ static int ocfs2_control_do_setnode_msg(struct file *file,
|
|||
struct ocfs2_control_message_setn *msg)
|
||||
{
|
||||
long nodenum;
|
||||
char *ptr = NULL;
|
||||
struct ocfs2_control_private *p = file->private_data;
|
||||
|
||||
if (ocfs2_control_get_handshake_state(file) !=
|
||||
|
@ -375,8 +374,7 @@ static int ocfs2_control_do_setnode_msg(struct file *file,
|
|||
return -EINVAL;
|
||||
msg->space = msg->newline = '\0';
|
||||
|
||||
nodenum = simple_strtol(msg->nodestr, &ptr, 16);
|
||||
if (!ptr || *ptr)
|
||||
if (kstrtol(msg->nodestr, 16, &nodenum))
|
||||
return -EINVAL;
|
||||
|
||||
if ((nodenum == LONG_MIN) || (nodenum == LONG_MAX) ||
|
||||
|
@ -391,7 +389,6 @@ static int ocfs2_control_do_setversion_msg(struct file *file,
|
|||
struct ocfs2_control_message_setv *msg)
|
||||
{
|
||||
long major, minor;
|
||||
char *ptr = NULL;
|
||||
struct ocfs2_control_private *p = file->private_data;
|
||||
struct ocfs2_protocol_version *max =
|
||||
&ocfs2_user_plugin.sp_max_proto;
|
||||
|
@ -409,11 +406,9 @@ static int ocfs2_control_do_setversion_msg(struct file *file,
|
|||
return -EINVAL;
|
||||
msg->space1 = msg->space2 = msg->newline = '\0';
|
||||
|
||||
major = simple_strtol(msg->major, &ptr, 16);
|
||||
if (!ptr || *ptr)
|
||||
if (kstrtol(msg->major, 16, &major))
|
||||
return -EINVAL;
|
||||
minor = simple_strtol(msg->minor, &ptr, 16);
|
||||
if (!ptr || *ptr)
|
||||
if (kstrtol(msg->minor, 16, &minor))
|
||||
return -EINVAL;
|
||||
|
||||
/*
|
||||
|
@ -441,7 +436,6 @@ static int ocfs2_control_do_down_msg(struct file *file,
|
|||
struct ocfs2_control_message_down *msg)
|
||||
{
|
||||
long nodenum;
|
||||
char *p = NULL;
|
||||
|
||||
if (ocfs2_control_get_handshake_state(file) !=
|
||||
OCFS2_CONTROL_HANDSHAKE_VALID)
|
||||
|
@ -456,8 +450,7 @@ static int ocfs2_control_do_down_msg(struct file *file,
|
|||
return -EINVAL;
|
||||
msg->space1 = msg->space2 = msg->newline = '\0';
|
||||
|
||||
nodenum = simple_strtol(msg->nodestr, &p, 16);
|
||||
if (!p || *p)
|
||||
if (kstrtol(msg->nodestr, 16, &nodenum))
|
||||
return -EINVAL;
|
||||
|
||||
if ((nodenum == LONG_MIN) || (nodenum == LONG_MAX) ||
|
||||
|
|
|
@ -1490,10 +1490,8 @@ int vmcore_add_device_dump(struct vmcoredd_data *data)
|
|||
return -EINVAL;
|
||||
|
||||
dump = vzalloc(sizeof(*dump));
|
||||
if (!dump) {
|
||||
ret = -ENOMEM;
|
||||
goto out_err;
|
||||
}
|
||||
if (!dump)
|
||||
return -ENOMEM;
|
||||
|
||||
/* Keep size of the buffer page aligned so that it can be mmaped */
|
||||
data_size = roundup(sizeof(struct vmcoredd_header) + data->size,
|
||||
|
@ -1519,22 +1517,19 @@ int vmcore_add_device_dump(struct vmcoredd_data *data)
|
|||
dump->size = data_size;
|
||||
|
||||
/* Add the dump to driver sysfs list and update the elfcore hdr */
|
||||
mutex_lock(&vmcore_mutex);
|
||||
scoped_guard(mutex, &vmcore_mutex) {
|
||||
if (vmcore_opened)
|
||||
pr_warn_once("Unexpected adding of device dump\n");
|
||||
if (vmcore_open) {
|
||||
ret = -EBUSY;
|
||||
goto unlock;
|
||||
goto out_err;
|
||||
}
|
||||
|
||||
list_add_tail(&dump->list, &vmcoredd_list);
|
||||
vmcoredd_update_size(data_size);
|
||||
mutex_unlock(&vmcore_mutex);
|
||||
}
|
||||
return 0;
|
||||
|
||||
unlock:
|
||||
mutex_unlock(&vmcore_mutex);
|
||||
|
||||
out_err:
|
||||
vfree(buf);
|
||||
vfree(dump);
|
||||
|
|
|
@ -80,23 +80,22 @@ static int squashfs_bio_read_cached(struct bio *fullbio,
|
|||
struct address_space *cache_mapping, u64 index, int length,
|
||||
u64 read_start, u64 read_end, int page_count)
|
||||
{
|
||||
struct page *head_to_cache = NULL, *tail_to_cache = NULL;
|
||||
struct folio *head_to_cache = NULL, *tail_to_cache = NULL;
|
||||
struct block_device *bdev = fullbio->bi_bdev;
|
||||
int start_idx = 0, end_idx = 0;
|
||||
struct bvec_iter_all iter_all;
|
||||
struct folio_iter fi;
|
||||
struct bio *bio = NULL;
|
||||
struct bio_vec *bv;
|
||||
int idx = 0;
|
||||
int err = 0;
|
||||
#ifdef CONFIG_SQUASHFS_COMP_CACHE_FULL
|
||||
struct page **cache_pages = kmalloc_array(page_count,
|
||||
sizeof(void *), GFP_KERNEL | __GFP_ZERO);
|
||||
struct folio **cache_folios = kmalloc_array(page_count,
|
||||
sizeof(*cache_folios), GFP_KERNEL | __GFP_ZERO);
|
||||
#endif
|
||||
|
||||
bio_for_each_segment_all(bv, fullbio, iter_all) {
|
||||
struct page *page = bv->bv_page;
|
||||
bio_for_each_folio_all(fi, fullbio) {
|
||||
struct folio *folio = fi.folio;
|
||||
|
||||
if (page->mapping == cache_mapping) {
|
||||
if (folio->mapping == cache_mapping) {
|
||||
idx++;
|
||||
continue;
|
||||
}
|
||||
|
@ -111,13 +110,13 @@ static int squashfs_bio_read_cached(struct bio *fullbio,
|
|||
* adjacent blocks.
|
||||
*/
|
||||
if (idx == 0 && index != read_start)
|
||||
head_to_cache = page;
|
||||
head_to_cache = folio;
|
||||
else if (idx == page_count - 1 && index + length != read_end)
|
||||
tail_to_cache = page;
|
||||
tail_to_cache = folio;
|
||||
#ifdef CONFIG_SQUASHFS_COMP_CACHE_FULL
|
||||
/* Cache all pages in the BIO for repeated reads */
|
||||
else if (cache_pages)
|
||||
cache_pages[idx] = page;
|
||||
else if (cache_folios)
|
||||
cache_folios[idx] = folio;
|
||||
#endif
|
||||
|
||||
if (!bio || idx != end_idx) {
|
||||
|
@ -150,45 +149,45 @@ static int squashfs_bio_read_cached(struct bio *fullbio,
|
|||
return err;
|
||||
|
||||
if (head_to_cache) {
|
||||
int ret = add_to_page_cache_lru(head_to_cache, cache_mapping,
|
||||
int ret = filemap_add_folio(cache_mapping, head_to_cache,
|
||||
read_start >> PAGE_SHIFT,
|
||||
GFP_NOIO);
|
||||
|
||||
if (!ret) {
|
||||
SetPageUptodate(head_to_cache);
|
||||
unlock_page(head_to_cache);
|
||||
folio_mark_uptodate(head_to_cache);
|
||||
folio_unlock(head_to_cache);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if (tail_to_cache) {
|
||||
int ret = add_to_page_cache_lru(tail_to_cache, cache_mapping,
|
||||
int ret = filemap_add_folio(cache_mapping, tail_to_cache,
|
||||
(read_end >> PAGE_SHIFT) - 1,
|
||||
GFP_NOIO);
|
||||
|
||||
if (!ret) {
|
||||
SetPageUptodate(tail_to_cache);
|
||||
unlock_page(tail_to_cache);
|
||||
folio_mark_uptodate(tail_to_cache);
|
||||
folio_unlock(tail_to_cache);
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SQUASHFS_COMP_CACHE_FULL
|
||||
if (!cache_pages)
|
||||
if (!cache_folios)
|
||||
goto out;
|
||||
|
||||
for (idx = 0; idx < page_count; idx++) {
|
||||
if (!cache_pages[idx])
|
||||
if (!cache_folios[idx])
|
||||
continue;
|
||||
int ret = add_to_page_cache_lru(cache_pages[idx], cache_mapping,
|
||||
int ret = filemap_add_folio(cache_mapping, cache_folios[idx],
|
||||
(read_start >> PAGE_SHIFT) + idx,
|
||||
GFP_NOIO);
|
||||
|
||||
if (!ret) {
|
||||
SetPageUptodate(cache_pages[idx]);
|
||||
unlock_page(cache_pages[idx]);
|
||||
folio_mark_uptodate(cache_folios[idx]);
|
||||
folio_unlock(cache_folios[idx]);
|
||||
}
|
||||
}
|
||||
kfree(cache_pages);
|
||||
kfree(cache_folios);
|
||||
out:
|
||||
#endif
|
||||
return 0;
|
||||
|
|
|
@ -493,10 +493,9 @@ out:
|
|||
return res;
|
||||
}
|
||||
|
||||
static int squashfs_readahead_fragment(struct page **page,
|
||||
static int squashfs_readahead_fragment(struct inode *inode, struct page **page,
|
||||
unsigned int pages, unsigned int expected, loff_t start)
|
||||
{
|
||||
struct inode *inode = page[0]->mapping->host;
|
||||
struct squashfs_cache_entry *buffer = squashfs_get_fragment(inode->i_sb,
|
||||
squashfs_i(inode)->fragment_block,
|
||||
squashfs_i(inode)->fragment_size);
|
||||
|
@ -605,8 +604,8 @@ static void squashfs_readahead(struct readahead_control *ractl)
|
|||
|
||||
if (start >> msblk->block_log == file_end &&
|
||||
squashfs_i(inode)->fragment_block != SQUASHFS_INVALID_BLK) {
|
||||
res = squashfs_readahead_fragment(pages, nr_pages,
|
||||
expected, start);
|
||||
res = squashfs_readahead_fragment(inode, pages,
|
||||
nr_pages, expected, start);
|
||||
if (res)
|
||||
goto skip_pages;
|
||||
continue;
|
||||
|
|
|
@ -13,10 +13,23 @@
|
|||
*/
|
||||
extern struct resource crashk_res;
|
||||
extern struct resource crashk_low_res;
|
||||
extern struct range crashk_cma_ranges[];
|
||||
#if defined(CONFIG_CMA) && defined(CONFIG_ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION)
|
||||
#define CRASHKERNEL_CMA
|
||||
#define CRASHKERNEL_CMA_RANGES_MAX 4
|
||||
extern int crashk_cma_cnt;
|
||||
#else
|
||||
#define crashk_cma_cnt 0
|
||||
#define CRASHKERNEL_CMA_RANGES_MAX 0
|
||||
#endif
|
||||
|
||||
|
||||
int __init parse_crashkernel(char *cmdline, unsigned long long system_ram,
|
||||
unsigned long long *crash_size, unsigned long long *crash_base,
|
||||
unsigned long long *low_size, bool *high);
|
||||
unsigned long long *low_size, unsigned long long *cma_size,
|
||||
bool *high);
|
||||
|
||||
void __init reserve_crashkernel_cma(unsigned long long cma_size);
|
||||
|
||||
#ifdef CONFIG_ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION
|
||||
#ifndef DEFAULT_CRASH_KERNEL_LOW_SIZE
|
||||
|
|
|
@ -3,6 +3,9 @@
|
|||
#define _GCD_H
|
||||
|
||||
#include <linux/compiler.h>
|
||||
#include <linux/jump_label.h>
|
||||
|
||||
DECLARE_STATIC_KEY_TRUE(efficient_ffs_key);
|
||||
|
||||
unsigned long gcd(unsigned long a, unsigned long b) __attribute_const__;
|
||||
|
||||
|
|
|
@ -23,13 +23,13 @@
|
|||
* Type encoding:
|
||||
* 00 - Blocked on mutex (BLOCKER_TYPE_MUTEX)
|
||||
* 01 - Blocked on semaphore (BLOCKER_TYPE_SEM)
|
||||
* 10 - Blocked on rt-mutex (BLOCKER_TYPE_RTMUTEX)
|
||||
* 11 - Blocked on rw-semaphore (BLOCKER_TYPE_RWSEM)
|
||||
* 10 - Blocked on rw-semaphore as READER (BLOCKER_TYPE_RWSEM_READER)
|
||||
* 11 - Blocked on rw-semaphore as WRITER (BLOCKER_TYPE_RWSEM_WRITER)
|
||||
*/
|
||||
#define BLOCKER_TYPE_MUTEX 0x00UL
|
||||
#define BLOCKER_TYPE_SEM 0x01UL
|
||||
#define BLOCKER_TYPE_RTMUTEX 0x02UL
|
||||
#define BLOCKER_TYPE_RWSEM 0x03UL
|
||||
#define BLOCKER_TYPE_RWSEM_READER 0x02UL
|
||||
#define BLOCKER_TYPE_RWSEM_WRITER 0x03UL
|
||||
|
||||
#define BLOCKER_TYPE_MASK 0x03UL
|
||||
|
||||
|
|
|
@ -24,7 +24,7 @@
|
|||
* Jozsef
|
||||
*/
|
||||
#include <linux/bitops.h>
|
||||
#include <linux/unaligned/packed_struct.h>
|
||||
#include <linux/unaligned.h>
|
||||
|
||||
/* Best hash sizes are of power of two */
|
||||
#define jhash_size(n) ((u32)1<<(n))
|
||||
|
@ -77,9 +77,9 @@ static inline u32 jhash(const void *key, u32 length, u32 initval)
|
|||
|
||||
/* All but the last block: affect some 32 bits of (a,b,c) */
|
||||
while (length > 12) {
|
||||
a += __get_unaligned_cpu32(k);
|
||||
b += __get_unaligned_cpu32(k + 4);
|
||||
c += __get_unaligned_cpu32(k + 8);
|
||||
a += get_unaligned((u32 *)k);
|
||||
b += get_unaligned((u32 *)(k + 4));
|
||||
c += get_unaligned((u32 *)(k + 8));
|
||||
__jhash_mix(a, b, c);
|
||||
length -= 12;
|
||||
k += 12;
|
||||
|
|
|
@ -79,6 +79,12 @@ extern note_buf_t __percpu *crash_notes;
|
|||
|
||||
typedef unsigned long kimage_entry_t;
|
||||
|
||||
/*
|
||||
* This is a copy of the UAPI struct kexec_segment and must be identical
|
||||
* to it because it gets copied straight from user space into kernel
|
||||
* memory. Do not modify this structure unless you change the way segments
|
||||
* get ingested from user space.
|
||||
*/
|
||||
struct kexec_segment {
|
||||
/*
|
||||
* This pointer can point to user memory if kexec_load() system
|
||||
|
@ -172,6 +178,7 @@ int kexec_image_post_load_cleanup_default(struct kimage *image);
|
|||
* @buf_align: Minimum alignment needed.
|
||||
* @buf_min: The buffer can't be placed below this address.
|
||||
* @buf_max: The buffer can't be placed above this address.
|
||||
* @cma: CMA page if the buffer is backed by CMA.
|
||||
* @top_down: Allocate from top of memory.
|
||||
* @random: Place the buffer at a random position.
|
||||
*/
|
||||
|
@ -184,6 +191,7 @@ struct kexec_buf {
|
|||
unsigned long buf_align;
|
||||
unsigned long buf_min;
|
||||
unsigned long buf_max;
|
||||
struct page *cma;
|
||||
bool top_down;
|
||||
#ifdef CONFIG_CRASH_DUMP
|
||||
bool random;
|
||||
|
@ -340,6 +348,7 @@ struct kimage {
|
|||
|
||||
unsigned long nr_segments;
|
||||
struct kexec_segment segment[KEXEC_SEGMENT_MAX];
|
||||
struct page *segment_cma[KEXEC_SEGMENT_MAX];
|
||||
|
||||
struct list_head control_pages;
|
||||
struct list_head dest_pages;
|
||||
|
@ -361,6 +370,7 @@ struct kimage {
|
|||
*/
|
||||
unsigned int hotplug_support:1;
|
||||
#endif
|
||||
unsigned int no_cma:1;
|
||||
|
||||
#ifdef ARCH_HAS_KIMAGE_ARCH
|
||||
struct kimage_arch arch;
|
||||
|
|
|
@ -11,8 +11,13 @@
|
|||
#ifdef __KERNEL__
|
||||
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/mm.h>
|
||||
|
||||
extern const char raid6_empty_zero_page[PAGE_SIZE];
|
||||
/* This should be const but the raid6 code is too convoluted for that. */
|
||||
static inline void *raid6_get_zero_page(void)
|
||||
{
|
||||
return page_address(ZERO_PAGE(0));
|
||||
}
|
||||
|
||||
#else /* ! __KERNEL__ */
|
||||
/* Used for testing in user space */
|
||||
|
@ -191,6 +196,11 @@ static inline uint32_t raid6_jiffies(void)
|
|||
return tv.tv_sec*1000 + tv.tv_usec/1000;
|
||||
}
|
||||
|
||||
static inline void *raid6_get_zero_page(void)
|
||||
{
|
||||
return raid6_empty_zero_page;
|
||||
}
|
||||
|
||||
#endif /* ! __KERNEL__ */
|
||||
|
||||
#endif /* LINUX_RAID_RAID6_H */
|
||||
|
|
|
@ -28,6 +28,22 @@
|
|||
*/
|
||||
#define RELAYFS_CHANNEL_VERSION 7
|
||||
|
||||
/*
|
||||
* Relay buffer statistics
|
||||
*/
|
||||
enum {
|
||||
RELAY_STATS_BUF_FULL = (1 << 0),
|
||||
RELAY_STATS_WRT_BIG = (1 << 1),
|
||||
|
||||
RELAY_STATS_LAST = RELAY_STATS_WRT_BIG,
|
||||
};
|
||||
|
||||
struct rchan_buf_stats
|
||||
{
|
||||
unsigned int full_count; /* counter for buffer full */
|
||||
unsigned int big_count; /* counter for too big to write */
|
||||
};
|
||||
|
||||
/*
|
||||
* Per-cpu relay channel buffer
|
||||
*/
|
||||
|
@ -43,11 +59,11 @@ struct rchan_buf
|
|||
struct irq_work wakeup_work; /* reader wakeup */
|
||||
struct dentry *dentry; /* channel file dentry */
|
||||
struct kref kref; /* channel buffer refcount */
|
||||
struct rchan_buf_stats stats; /* buffer stats */
|
||||
struct page **page_array; /* array of current buffer pages */
|
||||
unsigned int page_count; /* number of current buffer pages */
|
||||
unsigned int finalized; /* buffer has been finalized */
|
||||
size_t *padding; /* padding counts per sub-buffer */
|
||||
size_t prev_padding; /* temporary variable */
|
||||
size_t bytes_consumed; /* bytes consumed in cur read subbuf */
|
||||
size_t early_bytes; /* bytes consumed before VFS inited */
|
||||
unsigned int cpu; /* this buf's cpu */
|
||||
|
@ -65,7 +81,6 @@ struct rchan
|
|||
const struct rchan_callbacks *cb; /* client callbacks */
|
||||
struct kref kref; /* channel refcount */
|
||||
void *private_data; /* for user-defined data */
|
||||
size_t last_toobig; /* tried to log event > subbuf size */
|
||||
struct rchan_buf * __percpu *buf; /* per-cpu channel buffers */
|
||||
int is_global; /* One global buffer ? */
|
||||
struct list_head list; /* for channel list */
|
||||
|
@ -84,7 +99,6 @@ struct rchan_callbacks
|
|||
* @buf: the channel buffer containing the new sub-buffer
|
||||
* @subbuf: the start of the new sub-buffer
|
||||
* @prev_subbuf: the start of the previous sub-buffer
|
||||
* @prev_padding: unused space at the end of previous sub-buffer
|
||||
*
|
||||
* The client should return 1 to continue logging, 0 to stop
|
||||
* logging.
|
||||
|
@ -100,8 +114,7 @@ struct rchan_callbacks
|
|||
*/
|
||||
int (*subbuf_start) (struct rchan_buf *buf,
|
||||
void *subbuf,
|
||||
void *prev_subbuf,
|
||||
size_t prev_padding);
|
||||
void *prev_subbuf);
|
||||
|
||||
/*
|
||||
* create_buf_file - create file to represent a relay channel buffer
|
||||
|
@ -161,6 +174,7 @@ struct rchan *relay_open(const char *base_filename,
|
|||
void *private_data);
|
||||
extern void relay_close(struct rchan *chan);
|
||||
extern void relay_flush(struct rchan *chan);
|
||||
size_t relay_stats(struct rchan *chan, int flags);
|
||||
extern void relay_subbufs_consumed(struct rchan *chan,
|
||||
unsigned int cpu,
|
||||
size_t consumed);
|
||||
|
|
|
@ -132,6 +132,18 @@ static inline int rwsem_is_contended(struct rw_semaphore *sem)
|
|||
return !list_empty(&sem->wait_list);
|
||||
}
|
||||
|
||||
#if defined(CONFIG_DEBUG_RWSEMS) || defined(CONFIG_DETECT_HUNG_TASK_BLOCKER)
|
||||
/*
|
||||
* Return just the real task structure pointer of the owner
|
||||
*/
|
||||
extern struct task_struct *rwsem_owner(struct rw_semaphore *sem);
|
||||
|
||||
/*
|
||||
* Return true if the rwsem is owned by a reader.
|
||||
*/
|
||||
extern bool is_rwsem_reader_owned(struct rw_semaphore *sem);
|
||||
#endif
|
||||
|
||||
#else /* !CONFIG_PREEMPT_RT */
|
||||
|
||||
#include <linux/rwbase_rt.h>
|
||||
|
|
28
include/linux/sys_info.h
Normal file
28
include/linux/sys_info.h
Normal file
|
@ -0,0 +1,28 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef _LINUX_SYS_INFO_H
|
||||
#define _LINUX_SYS_INFO_H
|
||||
|
||||
#include <linux/sysctl.h>
|
||||
|
||||
/*
|
||||
* SYS_INFO_PANIC_CONSOLE_REPLAY is for panic case only, as it needs special
|
||||
* handling which only fits panic case.
|
||||
*/
|
||||
#define SYS_INFO_TASKS 0x00000001
|
||||
#define SYS_INFO_MEM 0x00000002
|
||||
#define SYS_INFO_TIMERS 0x00000004
|
||||
#define SYS_INFO_LOCKS 0x00000008
|
||||
#define SYS_INFO_FTRACE 0x00000010
|
||||
#define SYS_INFO_PANIC_CONSOLE_REPLAY 0x00000020
|
||||
#define SYS_INFO_ALL_CPU_BT 0x00000040
|
||||
#define SYS_INFO_BLOCKED_TASKS 0x00000080
|
||||
|
||||
void sys_info(unsigned long si_mask);
|
||||
unsigned long sys_info_parse_param(char *str);
|
||||
|
||||
#ifdef CONFIG_SYSCTL
|
||||
int sysctl_sys_info_handler(const struct ctl_table *ro_table, int write,
|
||||
void *buffer, size_t *lenp,
|
||||
loff_t *ppos);
|
||||
#endif
|
||||
#endif /* _LINUX_SYS_INFO_H */
|
|
@ -177,32 +177,6 @@ struct xxh64_state {
|
|||
*/
|
||||
void xxh32_reset(struct xxh32_state *state, uint32_t seed);
|
||||
|
||||
/**
|
||||
* xxh32_update() - hash the data given and update the xxh32 state
|
||||
*
|
||||
* @state: The xxh32 state to update.
|
||||
* @input: The data to hash.
|
||||
* @length: The length of the data to hash.
|
||||
*
|
||||
* After calling xxh32_reset() call xxh32_update() as many times as necessary.
|
||||
*
|
||||
* Return: Zero on success, otherwise an error code.
|
||||
*/
|
||||
int xxh32_update(struct xxh32_state *state, const void *input, size_t length);
|
||||
|
||||
/**
|
||||
* xxh32_digest() - produce the current xxh32 hash
|
||||
*
|
||||
* @state: Produce the current xxh32 hash of this state.
|
||||
*
|
||||
* A hash value can be produced at any time. It is still possible to continue
|
||||
* inserting input into the hash state after a call to xxh32_digest(), and
|
||||
* generate new hashes later on, by calling xxh32_digest() again.
|
||||
*
|
||||
* Return: The xxh32 hash stored in the state.
|
||||
*/
|
||||
uint32_t xxh32_digest(const struct xxh32_state *state);
|
||||
|
||||
/**
|
||||
* xxh64_reset() - reset the xxh64 state to start a new hashing operation
|
||||
*
|
||||
|
|
|
@ -27,6 +27,7 @@
|
|||
#define KEXEC_FILE_ON_CRASH 0x00000002
|
||||
#define KEXEC_FILE_NO_INITRAMFS 0x00000004
|
||||
#define KEXEC_FILE_DEBUG 0x00000008
|
||||
#define KEXEC_FILE_NO_CMA 0x00000010
|
||||
|
||||
/* These values match the ELF architecture values.
|
||||
* Unless there is a good reason that should continue to be the case.
|
||||
|
|
|
@ -178,7 +178,7 @@ int xenbus_printf(struct xenbus_transaction t,
|
|||
* sprintf-style type string, and pointer. Returns 0 or errno.*/
|
||||
int xenbus_gather(struct xenbus_transaction t, const char *dir, ...);
|
||||
|
||||
/* notifer routines for when the xenstore comes up */
|
||||
/* notifier routines for when the xenstore comes up */
|
||||
extern int xenstored_ready;
|
||||
int register_xenstore_notifier(struct notifier_block *nb);
|
||||
void unregister_xenstore_notifier(struct notifier_block *nb);
|
||||
|
|
|
@ -172,6 +172,10 @@ menu "General setup"
|
|||
|
||||
config BROKEN
|
||||
bool
|
||||
help
|
||||
This option allows you to choose whether you want to try to
|
||||
compile (and fix) old drivers that haven't been updated to
|
||||
new infrastructure.
|
||||
|
||||
config BROKEN_ON_SMP
|
||||
bool
|
||||
|
|
|
@ -1587,7 +1587,11 @@ static noinline void __init kernel_init_freeable(void)
|
|||
* check if there is an early userspace init. If yes, let it do all
|
||||
* the work
|
||||
*/
|
||||
if (init_eaccess(ramdisk_execute_command) != 0) {
|
||||
int ramdisk_command_access;
|
||||
ramdisk_command_access = init_eaccess(ramdisk_execute_command);
|
||||
if (ramdisk_command_access != 0) {
|
||||
pr_warn("check access for rdinit=%s failed: %i, ignoring\n",
|
||||
ramdisk_execute_command, ramdisk_command_access);
|
||||
ramdisk_execute_command = NULL;
|
||||
prepare_namespace();
|
||||
}
|
||||
|
|
|
@ -21,6 +21,7 @@
|
|||
#include <linux/reboot.h>
|
||||
#include <linux/btf.h>
|
||||
#include <linux/objtool.h>
|
||||
#include <linux/delay.h>
|
||||
|
||||
#include <asm/page.h>
|
||||
#include <asm/sections.h>
|
||||
|
@ -33,6 +34,11 @@
|
|||
/* Per cpu memory for storing cpu states in case of system crash. */
|
||||
note_buf_t __percpu *crash_notes;
|
||||
|
||||
/* time to wait for possible DMA to finish before starting the kdump kernel
|
||||
* when a CMA reservation is used
|
||||
*/
|
||||
#define CMA_DMA_TIMEOUT_SEC 10
|
||||
|
||||
#ifdef CONFIG_CRASH_DUMP
|
||||
|
||||
int kimage_crash_copy_vmcoreinfo(struct kimage *image)
|
||||
|
@ -97,6 +103,14 @@ int kexec_crash_loaded(void)
|
|||
}
|
||||
EXPORT_SYMBOL_GPL(kexec_crash_loaded);
|
||||
|
||||
static void crash_cma_clear_pending_dma(void)
|
||||
{
|
||||
if (!crashk_cma_cnt)
|
||||
return;
|
||||
|
||||
mdelay(CMA_DMA_TIMEOUT_SEC * 1000);
|
||||
}
|
||||
|
||||
/*
|
||||
* No panic_cpu check version of crash_kexec(). This function is called
|
||||
* only when panic_cpu holds the current CPU number; this is the only CPU
|
||||
|
@ -119,6 +133,7 @@ void __noclone __crash_kexec(struct pt_regs *regs)
|
|||
crash_setup_regs(&fixed_regs, regs);
|
||||
crash_save_vmcoreinfo();
|
||||
machine_crash_shutdown(&fixed_regs);
|
||||
crash_cma_clear_pending_dma();
|
||||
machine_kexec(kexec_crash_image);
|
||||
}
|
||||
kexec_unlock();
|
||||
|
|
|
@ -14,6 +14,8 @@
|
|||
#include <linux/cpuhotplug.h>
|
||||
#include <linux/memblock.h>
|
||||
#include <linux/kmemleak.h>
|
||||
#include <linux/cma.h>
|
||||
#include <linux/crash_reserve.h>
|
||||
|
||||
#include <asm/page.h>
|
||||
#include <asm/sections.h>
|
||||
|
@ -172,17 +174,19 @@ static int __init parse_crashkernel_simple(char *cmdline,
|
|||
|
||||
#define SUFFIX_HIGH 0
|
||||
#define SUFFIX_LOW 1
|
||||
#define SUFFIX_NULL 2
|
||||
#define SUFFIX_CMA 2
|
||||
#define SUFFIX_NULL 3
|
||||
static __initdata char *suffix_tbl[] = {
|
||||
[SUFFIX_HIGH] = ",high",
|
||||
[SUFFIX_LOW] = ",low",
|
||||
[SUFFIX_CMA] = ",cma",
|
||||
[SUFFIX_NULL] = NULL,
|
||||
};
|
||||
|
||||
/*
|
||||
* That function parses "suffix" crashkernel command lines like
|
||||
*
|
||||
* crashkernel=size,[high|low]
|
||||
* crashkernel=size,[high|low|cma]
|
||||
*
|
||||
* It returns 0 on success and -EINVAL on failure.
|
||||
*/
|
||||
|
@ -298,9 +302,11 @@ int __init parse_crashkernel(char *cmdline,
|
|||
unsigned long long *crash_size,
|
||||
unsigned long long *crash_base,
|
||||
unsigned long long *low_size,
|
||||
unsigned long long *cma_size,
|
||||
bool *high)
|
||||
{
|
||||
int ret;
|
||||
unsigned long long __always_unused cma_base;
|
||||
|
||||
/* crashkernel=X[@offset] */
|
||||
ret = __parse_crashkernel(cmdline, system_ram, crash_size,
|
||||
|
@ -331,6 +337,14 @@ int __init parse_crashkernel(char *cmdline,
|
|||
|
||||
*high = true;
|
||||
}
|
||||
|
||||
/*
|
||||
* optional CMA reservation
|
||||
* cma_base is ignored
|
||||
*/
|
||||
if (cma_size)
|
||||
__parse_crashkernel(cmdline, 0, cma_size,
|
||||
&cma_base, suffix_tbl[SUFFIX_CMA]);
|
||||
#endif
|
||||
if (!*crash_size)
|
||||
ret = -EINVAL;
|
||||
|
@ -457,6 +471,56 @@ retry:
|
|||
#endif
|
||||
}
|
||||
|
||||
struct range crashk_cma_ranges[CRASHKERNEL_CMA_RANGES_MAX];
|
||||
#ifdef CRASHKERNEL_CMA
|
||||
int crashk_cma_cnt;
|
||||
void __init reserve_crashkernel_cma(unsigned long long cma_size)
|
||||
{
|
||||
unsigned long long request_size = roundup(cma_size, PAGE_SIZE);
|
||||
unsigned long long reserved_size = 0;
|
||||
|
||||
if (!cma_size)
|
||||
return;
|
||||
|
||||
while (cma_size > reserved_size &&
|
||||
crashk_cma_cnt < CRASHKERNEL_CMA_RANGES_MAX) {
|
||||
|
||||
struct cma *res;
|
||||
|
||||
if (cma_declare_contiguous(0, request_size, 0, 0, 0, false,
|
||||
"crashkernel", &res)) {
|
||||
/* reservation failed, try half-sized blocks */
|
||||
if (request_size <= PAGE_SIZE)
|
||||
break;
|
||||
|
||||
request_size = roundup(request_size / 2, PAGE_SIZE);
|
||||
continue;
|
||||
}
|
||||
|
||||
crashk_cma_ranges[crashk_cma_cnt].start = cma_get_base(res);
|
||||
crashk_cma_ranges[crashk_cma_cnt].end =
|
||||
crashk_cma_ranges[crashk_cma_cnt].start +
|
||||
cma_get_size(res) - 1;
|
||||
++crashk_cma_cnt;
|
||||
reserved_size += request_size;
|
||||
}
|
||||
|
||||
if (cma_size > reserved_size)
|
||||
pr_warn("crashkernel CMA reservation failed: %lld MB requested, %lld MB reserved in %d ranges\n",
|
||||
cma_size >> 20, reserved_size >> 20, crashk_cma_cnt);
|
||||
else
|
||||
pr_info("crashkernel CMA reserved: %lld MB in %d ranges\n",
|
||||
reserved_size >> 20, crashk_cma_cnt);
|
||||
}
|
||||
|
||||
#else /* CRASHKERNEL_CMA */
|
||||
void __init reserve_crashkernel_cma(unsigned long long cma_size)
|
||||
{
|
||||
if (cma_size)
|
||||
pr_warn("crashkernel CMA reservation not supported\n");
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef HAVE_ARCH_ADD_CRASH_RES_TO_IOMEM_EARLY
|
||||
static __init int insert_crashkernel_resources(void)
|
||||
{
|
||||
|
|
|
@ -580,8 +580,8 @@ retry:
|
|||
|
||||
out:
|
||||
/* Revert back reference counter if instruction update failed. */
|
||||
if (ret < 0 && is_register && ref_ctr_updated)
|
||||
update_ref_ctr(uprobe, mm, -1);
|
||||
if (ret < 0 && ref_ctr_updated)
|
||||
update_ref_ctr(uprobe, mm, is_register ? -1 : 1);
|
||||
|
||||
/* try collapse pmd for compound page */
|
||||
if (ret > 0)
|
||||
|
|
|
@ -693,12 +693,7 @@ static void reparent_leader(struct task_struct *father, struct task_struct *p,
|
|||
}
|
||||
|
||||
/*
|
||||
* This does two things:
|
||||
*
|
||||
* A. Make init inherit all the child processes
|
||||
* B. Check to see if any process groups have become orphaned
|
||||
* as a result of our exiting, and if they have any stopped
|
||||
* jobs, send them a SIGHUP and then a SIGCONT. (POSIX 3.2.2.2)
|
||||
* Make init inherit all the child processes
|
||||
*/
|
||||
static void forget_original_parent(struct task_struct *father,
|
||||
struct list_head *dead)
|
||||
|
|
|
@ -189,33 +189,33 @@ static inline void free_task_struct(struct task_struct *tsk)
|
|||
kmem_cache_free(task_struct_cachep, tsk);
|
||||
}
|
||||
|
||||
/*
|
||||
* Allocate pages if THREAD_SIZE is >= PAGE_SIZE, otherwise use a
|
||||
* kmemcache based allocator.
|
||||
*/
|
||||
# if THREAD_SIZE >= PAGE_SIZE || defined(CONFIG_VMAP_STACK)
|
||||
|
||||
# ifdef CONFIG_VMAP_STACK
|
||||
#ifdef CONFIG_VMAP_STACK
|
||||
/*
|
||||
* vmalloc() is a bit slow, and calling vfree() enough times will force a TLB
|
||||
* flush. Try to minimize the number of calls by caching stacks.
|
||||
*/
|
||||
#define NR_CACHED_STACKS 2
|
||||
static DEFINE_PER_CPU(struct vm_struct *, cached_stacks[NR_CACHED_STACKS]);
|
||||
/*
|
||||
* Allocated stacks are cached and later reused by new threads, so memcg
|
||||
* accounting is performed by the code assigning/releasing stacks to tasks.
|
||||
* We need a zeroed memory without __GFP_ACCOUNT.
|
||||
*/
|
||||
#define GFP_VMAP_STACK (GFP_KERNEL | __GFP_ZERO)
|
||||
|
||||
struct vm_stack {
|
||||
struct rcu_head rcu;
|
||||
struct vm_struct *stack_vm_area;
|
||||
};
|
||||
|
||||
static bool try_release_thread_stack_to_cache(struct vm_struct *vm)
|
||||
static bool try_release_thread_stack_to_cache(struct vm_struct *vm_area)
|
||||
{
|
||||
unsigned int i;
|
||||
|
||||
for (i = 0; i < NR_CACHED_STACKS; i++) {
|
||||
struct vm_struct *tmp = NULL;
|
||||
|
||||
if (this_cpu_try_cmpxchg(cached_stacks[i], &tmp, vm))
|
||||
if (this_cpu_try_cmpxchg(cached_stacks[i], &tmp, vm_area))
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
|
@ -224,11 +224,12 @@ static bool try_release_thread_stack_to_cache(struct vm_struct *vm)
|
|||
static void thread_stack_free_rcu(struct rcu_head *rh)
|
||||
{
|
||||
struct vm_stack *vm_stack = container_of(rh, struct vm_stack, rcu);
|
||||
struct vm_struct *vm_area = vm_stack->stack_vm_area;
|
||||
|
||||
if (try_release_thread_stack_to_cache(vm_stack->stack_vm_area))
|
||||
return;
|
||||
|
||||
vfree(vm_stack);
|
||||
vfree(vm_area->addr);
|
||||
}
|
||||
|
||||
static void thread_stack_delayed_free(struct task_struct *tsk)
|
||||
|
@ -241,32 +242,32 @@ static void thread_stack_delayed_free(struct task_struct *tsk)
|
|||
|
||||
static int free_vm_stack_cache(unsigned int cpu)
|
||||
{
|
||||
struct vm_struct **cached_vm_stacks = per_cpu_ptr(cached_stacks, cpu);
|
||||
struct vm_struct **cached_vm_stack_areas = per_cpu_ptr(cached_stacks, cpu);
|
||||
int i;
|
||||
|
||||
for (i = 0; i < NR_CACHED_STACKS; i++) {
|
||||
struct vm_struct *vm_stack = cached_vm_stacks[i];
|
||||
struct vm_struct *vm_area = cached_vm_stack_areas[i];
|
||||
|
||||
if (!vm_stack)
|
||||
if (!vm_area)
|
||||
continue;
|
||||
|
||||
vfree(vm_stack->addr);
|
||||
cached_vm_stacks[i] = NULL;
|
||||
vfree(vm_area->addr);
|
||||
cached_vm_stack_areas[i] = NULL;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int memcg_charge_kernel_stack(struct vm_struct *vm)
|
||||
static int memcg_charge_kernel_stack(struct vm_struct *vm_area)
|
||||
{
|
||||
int i;
|
||||
int ret;
|
||||
int nr_charged = 0;
|
||||
|
||||
BUG_ON(vm->nr_pages != THREAD_SIZE / PAGE_SIZE);
|
||||
BUG_ON(vm_area->nr_pages != THREAD_SIZE / PAGE_SIZE);
|
||||
|
||||
for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++) {
|
||||
ret = memcg_kmem_charge_page(vm->pages[i], GFP_KERNEL, 0);
|
||||
ret = memcg_kmem_charge_page(vm_area->pages[i], GFP_KERNEL, 0);
|
||||
if (ret)
|
||||
goto err;
|
||||
nr_charged++;
|
||||
|
@ -274,55 +275,47 @@ static int memcg_charge_kernel_stack(struct vm_struct *vm)
|
|||
return 0;
|
||||
err:
|
||||
for (i = 0; i < nr_charged; i++)
|
||||
memcg_kmem_uncharge_page(vm->pages[i], 0);
|
||||
memcg_kmem_uncharge_page(vm_area->pages[i], 0);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int alloc_thread_stack_node(struct task_struct *tsk, int node)
|
||||
{
|
||||
struct vm_struct *vm;
|
||||
struct vm_struct *vm_area;
|
||||
void *stack;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < NR_CACHED_STACKS; i++) {
|
||||
struct vm_struct *s;
|
||||
|
||||
s = this_cpu_xchg(cached_stacks[i], NULL);
|
||||
|
||||
if (!s)
|
||||
vm_area = this_cpu_xchg(cached_stacks[i], NULL);
|
||||
if (!vm_area)
|
||||
continue;
|
||||
|
||||
/* Reset stack metadata. */
|
||||
kasan_unpoison_range(s->addr, THREAD_SIZE);
|
||||
kasan_unpoison_range(vm_area->addr, THREAD_SIZE);
|
||||
|
||||
stack = kasan_reset_tag(s->addr);
|
||||
stack = kasan_reset_tag(vm_area->addr);
|
||||
|
||||
/* Clear stale pointers from reused stack. */
|
||||
memset(stack, 0, THREAD_SIZE);
|
||||
|
||||
if (memcg_charge_kernel_stack(s)) {
|
||||
vfree(s->addr);
|
||||
if (memcg_charge_kernel_stack(vm_area)) {
|
||||
vfree(vm_area->addr);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
tsk->stack_vm_area = s;
|
||||
tsk->stack_vm_area = vm_area;
|
||||
tsk->stack = stack;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Allocated stacks are cached and later reused by new threads,
|
||||
* so memcg accounting is performed manually on assigning/releasing
|
||||
* stacks to tasks. Drop __GFP_ACCOUNT.
|
||||
*/
|
||||
stack = __vmalloc_node(THREAD_SIZE, THREAD_ALIGN,
|
||||
THREADINFO_GFP & ~__GFP_ACCOUNT,
|
||||
GFP_VMAP_STACK,
|
||||
node, __builtin_return_address(0));
|
||||
if (!stack)
|
||||
return -ENOMEM;
|
||||
|
||||
vm = find_vm_area(stack);
|
||||
if (memcg_charge_kernel_stack(vm)) {
|
||||
vm_area = find_vm_area(stack);
|
||||
if (memcg_charge_kernel_stack(vm_area)) {
|
||||
vfree(stack);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
@ -331,7 +324,7 @@ static int alloc_thread_stack_node(struct task_struct *tsk, int node)
|
|||
* free_thread_stack() can be called in interrupt context,
|
||||
* so cache the vm_struct.
|
||||
*/
|
||||
tsk->stack_vm_area = vm;
|
||||
tsk->stack_vm_area = vm_area;
|
||||
stack = kasan_reset_tag(stack);
|
||||
tsk->stack = stack;
|
||||
return 0;
|
||||
|
@ -346,7 +339,13 @@ static void free_thread_stack(struct task_struct *tsk)
|
|||
tsk->stack_vm_area = NULL;
|
||||
}
|
||||
|
||||
# else /* !CONFIG_VMAP_STACK */
|
||||
#else /* !CONFIG_VMAP_STACK */
|
||||
|
||||
/*
|
||||
* Allocate pages if THREAD_SIZE is >= PAGE_SIZE, otherwise use a
|
||||
* kmemcache based allocator.
|
||||
*/
|
||||
#if THREAD_SIZE >= PAGE_SIZE
|
||||
|
||||
static void thread_stack_free_rcu(struct rcu_head *rh)
|
||||
{
|
||||
|
@ -378,8 +377,7 @@ static void free_thread_stack(struct task_struct *tsk)
|
|||
tsk->stack = NULL;
|
||||
}
|
||||
|
||||
# endif /* CONFIG_VMAP_STACK */
|
||||
# else /* !(THREAD_SIZE >= PAGE_SIZE || defined(CONFIG_VMAP_STACK)) */
|
||||
#else /* !(THREAD_SIZE >= PAGE_SIZE) */
|
||||
|
||||
static struct kmem_cache *thread_stack_cache;
|
||||
|
||||
|
@ -418,7 +416,8 @@ void thread_stack_cache_init(void)
|
|||
BUG_ON(thread_stack_cache == NULL);
|
||||
}
|
||||
|
||||
# endif /* THREAD_SIZE >= PAGE_SIZE || defined(CONFIG_VMAP_STACK) */
|
||||
#endif /* THREAD_SIZE >= PAGE_SIZE */
|
||||
#endif /* CONFIG_VMAP_STACK */
|
||||
|
||||
/* SLAB cache for signal_struct structures (tsk->signal) */
|
||||
static struct kmem_cache *signal_cachep;
|
||||
|
@ -438,11 +437,11 @@ static struct kmem_cache *mm_cachep;
|
|||
static void account_kernel_stack(struct task_struct *tsk, int account)
|
||||
{
|
||||
if (IS_ENABLED(CONFIG_VMAP_STACK)) {
|
||||
struct vm_struct *vm = task_stack_vm_area(tsk);
|
||||
struct vm_struct *vm_area = task_stack_vm_area(tsk);
|
||||
int i;
|
||||
|
||||
for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++)
|
||||
mod_lruvec_page_state(vm->pages[i], NR_KERNEL_STACK_KB,
|
||||
mod_lruvec_page_state(vm_area->pages[i], NR_KERNEL_STACK_KB,
|
||||
account * (PAGE_SIZE / 1024));
|
||||
} else {
|
||||
void *stack = task_stack_page(tsk);
|
||||
|
@ -458,12 +457,12 @@ void exit_task_stack_account(struct task_struct *tsk)
|
|||
account_kernel_stack(tsk, -1);
|
||||
|
||||
if (IS_ENABLED(CONFIG_VMAP_STACK)) {
|
||||
struct vm_struct *vm;
|
||||
struct vm_struct *vm_area;
|
||||
int i;
|
||||
|
||||
vm = task_stack_vm_area(tsk);
|
||||
vm_area = task_stack_vm_area(tsk);
|
||||
for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++)
|
||||
memcg_kmem_uncharge_page(vm->pages[i], 0);
|
||||
memcg_kmem_uncharge_page(vm_area->pages[i], 0);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -23,6 +23,7 @@
|
|||
#include <linux/sched/debug.h>
|
||||
#include <linux/sched/sysctl.h>
|
||||
#include <linux/hung_task.h>
|
||||
#include <linux/rwsem.h>
|
||||
|
||||
#include <trace/events/sched.h>
|
||||
|
||||
|
@ -100,6 +101,7 @@ static void debug_show_blocker(struct task_struct *task)
|
|||
{
|
||||
struct task_struct *g, *t;
|
||||
unsigned long owner, blocker, blocker_type;
|
||||
const char *rwsem_blocked_by, *rwsem_blocked_as;
|
||||
|
||||
RCU_LOCKDEP_WARN(!rcu_read_lock_held(), "No rcu lock held");
|
||||
|
||||
|
@ -111,12 +113,20 @@ static void debug_show_blocker(struct task_struct *task)
|
|||
|
||||
switch (blocker_type) {
|
||||
case BLOCKER_TYPE_MUTEX:
|
||||
owner = mutex_get_owner(
|
||||
(struct mutex *)hung_task_blocker_to_lock(blocker));
|
||||
owner = mutex_get_owner(hung_task_blocker_to_lock(blocker));
|
||||
break;
|
||||
case BLOCKER_TYPE_SEM:
|
||||
owner = sem_last_holder(
|
||||
(struct semaphore *)hung_task_blocker_to_lock(blocker));
|
||||
owner = sem_last_holder(hung_task_blocker_to_lock(blocker));
|
||||
break;
|
||||
case BLOCKER_TYPE_RWSEM_READER:
|
||||
case BLOCKER_TYPE_RWSEM_WRITER:
|
||||
owner = (unsigned long)rwsem_owner(
|
||||
hung_task_blocker_to_lock(blocker));
|
||||
rwsem_blocked_as = (blocker_type == BLOCKER_TYPE_RWSEM_READER) ?
|
||||
"reader" : "writer";
|
||||
rwsem_blocked_by = is_rwsem_reader_owned(
|
||||
hung_task_blocker_to_lock(blocker)) ?
|
||||
"reader" : "writer";
|
||||
break;
|
||||
default:
|
||||
WARN_ON_ONCE(1);
|
||||
|
@ -134,6 +144,11 @@ static void debug_show_blocker(struct task_struct *task)
|
|||
pr_err("INFO: task %s:%d is blocked on a semaphore, but the last holder is not found.\n",
|
||||
task->comm, task->pid);
|
||||
break;
|
||||
case BLOCKER_TYPE_RWSEM_READER:
|
||||
case BLOCKER_TYPE_RWSEM_WRITER:
|
||||
pr_err("INFO: task %s:%d is blocked on an rw-semaphore, but the owner is not found.\n",
|
||||
task->comm, task->pid);
|
||||
break;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
@ -152,6 +167,12 @@ static void debug_show_blocker(struct task_struct *task)
|
|||
pr_err("INFO: task %s:%d blocked on a semaphore likely last held by task %s:%d\n",
|
||||
task->comm, task->pid, t->comm, t->pid);
|
||||
break;
|
||||
case BLOCKER_TYPE_RWSEM_READER:
|
||||
case BLOCKER_TYPE_RWSEM_WRITER:
|
||||
pr_err("INFO: task %s:%d <%s> blocked on an rw-semaphore likely owned by task %s:%d <%s>\n",
|
||||
task->comm, task->pid, rwsem_blocked_as, t->comm,
|
||||
t->pid, rwsem_blocked_by);
|
||||
break;
|
||||
}
|
||||
sched_show_task(t);
|
||||
return;
|
||||
|
|
|
@ -552,7 +552,7 @@ static int kcov_get_mode(unsigned long arg)
|
|||
|
||||
/*
|
||||
* Fault in a lazily-faulted vmalloc area before it can be used by
|
||||
* __santizer_cov_trace_pc(), to avoid recursion issues if any code on the
|
||||
* __sanitizer_cov_trace_pc(), to avoid recursion issues if any code on the
|
||||
* vmalloc fault handling path is instrumented.
|
||||
*/
|
||||
static void kcov_fault_in_area(struct kcov *kcov)
|
||||
|
|
|
@ -152,7 +152,7 @@ static int do_kexec_load(unsigned long entry, unsigned long nr_segments,
|
|||
goto out;
|
||||
|
||||
for (i = 0; i < nr_segments; i++) {
|
||||
ret = kimage_load_segment(image, &image->segment[i]);
|
||||
ret = kimage_load_segment(image, i);
|
||||
if (ret)
|
||||
goto out;
|
||||
}
|
||||
|
|
|
@ -40,6 +40,7 @@
|
|||
#include <linux/hugetlb.h>
|
||||
#include <linux/objtool.h>
|
||||
#include <linux/kmsg_dump.h>
|
||||
#include <linux/dma-map-ops.h>
|
||||
|
||||
#include <asm/page.h>
|
||||
#include <asm/sections.h>
|
||||
|
@ -553,6 +554,24 @@ static void kimage_free_entry(kimage_entry_t entry)
|
|||
kimage_free_pages(page);
|
||||
}
|
||||
|
||||
static void kimage_free_cma(struct kimage *image)
|
||||
{
|
||||
unsigned long i;
|
||||
|
||||
for (i = 0; i < image->nr_segments; i++) {
|
||||
struct page *cma = image->segment_cma[i];
|
||||
u32 nr_pages = image->segment[i].memsz >> PAGE_SHIFT;
|
||||
|
||||
if (!cma)
|
||||
continue;
|
||||
|
||||
arch_kexec_pre_free_pages(page_address(cma), nr_pages);
|
||||
dma_release_from_contiguous(NULL, cma, nr_pages);
|
||||
image->segment_cma[i] = NULL;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void kimage_free(struct kimage *image)
|
||||
{
|
||||
kimage_entry_t *ptr, entry;
|
||||
|
@ -591,6 +610,9 @@ void kimage_free(struct kimage *image)
|
|||
/* Free the kexec control pages... */
|
||||
kimage_free_page_list(&image->control_pages);
|
||||
|
||||
/* Free CMA allocations */
|
||||
kimage_free_cma(image);
|
||||
|
||||
/*
|
||||
* Free up any temporary buffers allocated. This might hit if
|
||||
* error occurred much later after buffer allocation.
|
||||
|
@ -716,9 +738,69 @@ static struct page *kimage_alloc_page(struct kimage *image,
|
|||
return page;
|
||||
}
|
||||
|
||||
static int kimage_load_normal_segment(struct kimage *image,
|
||||
struct kexec_segment *segment)
|
||||
static int kimage_load_cma_segment(struct kimage *image, int idx)
|
||||
{
|
||||
struct kexec_segment *segment = &image->segment[idx];
|
||||
struct page *cma = image->segment_cma[idx];
|
||||
char *ptr = page_address(cma);
|
||||
unsigned long maddr;
|
||||
size_t ubytes, mbytes;
|
||||
int result = 0;
|
||||
unsigned char __user *buf = NULL;
|
||||
unsigned char *kbuf = NULL;
|
||||
|
||||
if (image->file_mode)
|
||||
kbuf = segment->kbuf;
|
||||
else
|
||||
buf = segment->buf;
|
||||
ubytes = segment->bufsz;
|
||||
mbytes = segment->memsz;
|
||||
maddr = segment->mem;
|
||||
|
||||
/* Then copy from source buffer to the CMA one */
|
||||
while (mbytes) {
|
||||
size_t uchunk, mchunk;
|
||||
|
||||
ptr += maddr & ~PAGE_MASK;
|
||||
mchunk = min_t(size_t, mbytes,
|
||||
PAGE_SIZE - (maddr & ~PAGE_MASK));
|
||||
uchunk = min(ubytes, mchunk);
|
||||
|
||||
if (uchunk) {
|
||||
/* For file based kexec, source pages are in kernel memory */
|
||||
if (image->file_mode)
|
||||
memcpy(ptr, kbuf, uchunk);
|
||||
else
|
||||
result = copy_from_user(ptr, buf, uchunk);
|
||||
ubytes -= uchunk;
|
||||
if (image->file_mode)
|
||||
kbuf += uchunk;
|
||||
else
|
||||
buf += uchunk;
|
||||
}
|
||||
|
||||
if (result) {
|
||||
result = -EFAULT;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ptr += mchunk;
|
||||
maddr += mchunk;
|
||||
mbytes -= mchunk;
|
||||
|
||||
cond_resched();
|
||||
}
|
||||
|
||||
/* Clear any remainder */
|
||||
memset(ptr, 0, mbytes);
|
||||
|
||||
out:
|
||||
return result;
|
||||
}
|
||||
|
||||
static int kimage_load_normal_segment(struct kimage *image, int idx)
|
||||
{
|
||||
struct kexec_segment *segment = &image->segment[idx];
|
||||
unsigned long maddr;
|
||||
size_t ubytes, mbytes;
|
||||
int result;
|
||||
|
@ -733,6 +815,9 @@ static int kimage_load_normal_segment(struct kimage *image,
|
|||
mbytes = segment->memsz;
|
||||
maddr = segment->mem;
|
||||
|
||||
if (image->segment_cma[idx])
|
||||
return kimage_load_cma_segment(image, idx);
|
||||
|
||||
result = kimage_set_destination(image, maddr);
|
||||
if (result < 0)
|
||||
goto out;
|
||||
|
@ -787,13 +872,13 @@ out:
|
|||
}
|
||||
|
||||
#ifdef CONFIG_CRASH_DUMP
|
||||
static int kimage_load_crash_segment(struct kimage *image,
|
||||
struct kexec_segment *segment)
|
||||
static int kimage_load_crash_segment(struct kimage *image, int idx)
|
||||
{
|
||||
/* For crash dumps kernels we simply copy the data from
|
||||
* user space to it's destination.
|
||||
* We do things a page at a time for the sake of kmap.
|
||||
*/
|
||||
struct kexec_segment *segment = &image->segment[idx];
|
||||
unsigned long maddr;
|
||||
size_t ubytes, mbytes;
|
||||
int result;
|
||||
|
@ -858,18 +943,17 @@ out:
|
|||
}
|
||||
#endif
|
||||
|
||||
int kimage_load_segment(struct kimage *image,
|
||||
struct kexec_segment *segment)
|
||||
int kimage_load_segment(struct kimage *image, int idx)
|
||||
{
|
||||
int result = -ENOMEM;
|
||||
|
||||
switch (image->type) {
|
||||
case KEXEC_TYPE_DEFAULT:
|
||||
result = kimage_load_normal_segment(image, segment);
|
||||
result = kimage_load_normal_segment(image, idx);
|
||||
break;
|
||||
#ifdef CONFIG_CRASH_DUMP
|
||||
case KEXEC_TYPE_CRASH:
|
||||
result = kimage_load_crash_segment(image, segment);
|
||||
result = kimage_load_crash_segment(image, idx);
|
||||
break;
|
||||
#endif
|
||||
}
|
||||
|
|
|
@ -26,6 +26,7 @@
|
|||
#include <linux/kernel_read_file.h>
|
||||
#include <linux/syscalls.h>
|
||||
#include <linux/vmalloc.h>
|
||||
#include <linux/dma-map-ops.h>
|
||||
#include "kexec_internal.h"
|
||||
|
||||
#ifdef CONFIG_KEXEC_SIG
|
||||
|
@ -253,6 +254,8 @@ kimage_file_prepare_segments(struct kimage *image, int kernel_fd, int initrd_fd,
|
|||
ret = 0;
|
||||
}
|
||||
|
||||
image->no_cma = !!(flags & KEXEC_FILE_NO_CMA);
|
||||
|
||||
if (cmdline_len) {
|
||||
image->cmdline_buf = memdup_user(cmdline_ptr, cmdline_len);
|
||||
if (IS_ERR(image->cmdline_buf)) {
|
||||
|
@ -434,7 +437,7 @@ SYSCALL_DEFINE5(kexec_file_load, int, kernel_fd, int, initrd_fd,
|
|||
i, ksegment->buf, ksegment->bufsz, ksegment->mem,
|
||||
ksegment->memsz);
|
||||
|
||||
ret = kimage_load_segment(image, &image->segment[i]);
|
||||
ret = kimage_load_segment(image, i);
|
||||
if (ret)
|
||||
goto out;
|
||||
}
|
||||
|
@ -663,6 +666,43 @@ static int kexec_walk_resources(struct kexec_buf *kbuf,
|
|||
return walk_system_ram_res(0, ULONG_MAX, kbuf, func);
|
||||
}
|
||||
|
||||
static int kexec_alloc_contig(struct kexec_buf *kbuf)
|
||||
{
|
||||
size_t nr_pages = kbuf->memsz >> PAGE_SHIFT;
|
||||
unsigned long mem;
|
||||
struct page *p;
|
||||
|
||||
/* User space disabled CMA allocations, bail out. */
|
||||
if (kbuf->image->no_cma)
|
||||
return -EPERM;
|
||||
|
||||
/* Skip CMA logic for crash kernel */
|
||||
if (kbuf->image->type == KEXEC_TYPE_CRASH)
|
||||
return -EPERM;
|
||||
|
||||
p = dma_alloc_from_contiguous(NULL, nr_pages, get_order(kbuf->buf_align), true);
|
||||
if (!p)
|
||||
return -ENOMEM;
|
||||
|
||||
pr_debug("allocated %zu DMA pages at 0x%lx", nr_pages, page_to_boot_pfn(p));
|
||||
|
||||
mem = page_to_boot_pfn(p) << PAGE_SHIFT;
|
||||
|
||||
if (kimage_is_destination_range(kbuf->image, mem, mem + kbuf->memsz)) {
|
||||
/* Our region is already in use by a statically defined one. Bail out. */
|
||||
pr_debug("CMA overlaps existing mem: 0x%lx+0x%lx\n", mem, kbuf->memsz);
|
||||
dma_release_from_contiguous(NULL, p, nr_pages);
|
||||
return -EBUSY;
|
||||
}
|
||||
|
||||
kbuf->mem = page_to_boot_pfn(p) << PAGE_SHIFT;
|
||||
kbuf->cma = p;
|
||||
|
||||
arch_kexec_post_alloc_pages(page_address(p), (int)nr_pages, 0);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* kexec_locate_mem_hole - find free memory for the purgatory or the next kernel
|
||||
* @kbuf: Parameters for the memory search.
|
||||
|
@ -687,6 +727,13 @@ int kexec_locate_mem_hole(struct kexec_buf *kbuf)
|
|||
if (ret <= 0)
|
||||
return ret;
|
||||
|
||||
/*
|
||||
* Try to find a free physically contiguous block of memory first. With that, we
|
||||
* can avoid any copying at kexec time.
|
||||
*/
|
||||
if (!kexec_alloc_contig(kbuf))
|
||||
return 0;
|
||||
|
||||
if (!IS_ENABLED(CONFIG_ARCH_KEEP_MEMBLOCK))
|
||||
ret = kexec_walk_resources(kbuf, locate_mem_hole_callback);
|
||||
else
|
||||
|
@ -732,6 +779,7 @@ int kexec_add_buffer(struct kexec_buf *kbuf)
|
|||
/* Ensure minimum alignment needed for segments. */
|
||||
kbuf->memsz = ALIGN(kbuf->memsz, PAGE_SIZE);
|
||||
kbuf->buf_align = max(kbuf->buf_align, PAGE_SIZE);
|
||||
kbuf->cma = NULL;
|
||||
|
||||
/* Walk the RAM ranges and allocate a suitable range for the buffer */
|
||||
ret = arch_kexec_locate_mem_hole(kbuf);
|
||||
|
@ -744,6 +792,7 @@ int kexec_add_buffer(struct kexec_buf *kbuf)
|
|||
ksegment->bufsz = kbuf->bufsz;
|
||||
ksegment->mem = kbuf->mem;
|
||||
ksegment->memsz = kbuf->memsz;
|
||||
kbuf->image->segment_cma[kbuf->image->nr_segments] = kbuf->cma;
|
||||
kbuf->image->nr_segments++;
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -10,7 +10,7 @@ struct kimage *do_kimage_alloc_init(void);
|
|||
int sanity_check_segment_list(struct kimage *image);
|
||||
void kimage_free_page_list(struct list_head *list);
|
||||
void kimage_free(struct kimage *image);
|
||||
int kimage_load_segment(struct kimage *image, struct kexec_segment *segment);
|
||||
int kimage_load_segment(struct kimage *image, int idx);
|
||||
void kimage_terminate(struct kimage *image);
|
||||
int kimage_is_destination_range(struct kimage *image,
|
||||
unsigned long start, unsigned long end);
|
||||
|
|
|
@ -88,13 +88,12 @@ static inline struct kthread *to_kthread(struct task_struct *k)
|
|||
/*
|
||||
* Variant of to_kthread() that doesn't assume @p is a kthread.
|
||||
*
|
||||
* Per construction; when:
|
||||
* When "(p->flags & PF_KTHREAD)" is set the task is a kthread and will
|
||||
* always remain a kthread. For kthreads p->worker_private always
|
||||
* points to a struct kthread. For tasks that are not kthreads
|
||||
* p->worker_private is used to point to other things.
|
||||
*
|
||||
* (p->flags & PF_KTHREAD) && p->worker_private
|
||||
*
|
||||
* the task is both a kthread and struct kthread is persistent. However
|
||||
* PF_KTHREAD on it's own is not, kernel_thread() can exec() (See umh.c and
|
||||
* begin_new_exec()).
|
||||
* Return NULL for any task that is not a kthread.
|
||||
*/
|
||||
static inline struct kthread *__to_kthread(struct task_struct *p)
|
||||
{
|
||||
|
|
|
@ -27,6 +27,7 @@
|
|||
#include <linux/export.h>
|
||||
#include <linux/rwsem.h>
|
||||
#include <linux/atomic.h>
|
||||
#include <linux/hung_task.h>
|
||||
#include <trace/events/lock.h>
|
||||
|
||||
#ifndef CONFIG_PREEMPT_RT
|
||||
|
@ -181,11 +182,11 @@ static inline void rwsem_set_reader_owned(struct rw_semaphore *sem)
|
|||
__rwsem_set_reader_owned(sem, current);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_DEBUG_RWSEMS
|
||||
#if defined(CONFIG_DEBUG_RWSEMS) || defined(CONFIG_DETECT_HUNG_TASK_BLOCKER)
|
||||
/*
|
||||
* Return just the real task structure pointer of the owner
|
||||
*/
|
||||
static inline struct task_struct *rwsem_owner(struct rw_semaphore *sem)
|
||||
struct task_struct *rwsem_owner(struct rw_semaphore *sem)
|
||||
{
|
||||
return (struct task_struct *)
|
||||
(atomic_long_read(&sem->owner) & ~RWSEM_OWNER_FLAGS_MASK);
|
||||
|
@ -194,7 +195,7 @@ static inline struct task_struct *rwsem_owner(struct rw_semaphore *sem)
|
|||
/*
|
||||
* Return true if the rwsem is owned by a reader.
|
||||
*/
|
||||
static inline bool is_rwsem_reader_owned(struct rw_semaphore *sem)
|
||||
bool is_rwsem_reader_owned(struct rw_semaphore *sem)
|
||||
{
|
||||
/*
|
||||
* Check the count to see if it is write-locked.
|
||||
|
@ -207,10 +208,10 @@ static inline bool is_rwsem_reader_owned(struct rw_semaphore *sem)
|
|||
}
|
||||
|
||||
/*
|
||||
* With CONFIG_DEBUG_RWSEMS configured, it will make sure that if there
|
||||
* is a task pointer in owner of a reader-owned rwsem, it will be the
|
||||
* real owner or one of the real owners. The only exception is when the
|
||||
* unlock is done by up_read_non_owner().
|
||||
* With CONFIG_DEBUG_RWSEMS or CONFIG_DETECT_HUNG_TASK_BLOCKER configured,
|
||||
* it will make sure that the owner field of a reader-owned rwsem either
|
||||
* points to a real reader-owner(s) or gets cleared. The only exception is
|
||||
* when the unlock is done by up_read_non_owner().
|
||||
*/
|
||||
static inline void rwsem_clear_reader_owned(struct rw_semaphore *sem)
|
||||
{
|
||||
|
@ -1063,10 +1064,13 @@ queue:
|
|||
wake_up_q(&wake_q);
|
||||
|
||||
trace_contention_begin(sem, LCB_F_READ);
|
||||
set_current_state(state);
|
||||
|
||||
if (state == TASK_UNINTERRUPTIBLE)
|
||||
hung_task_set_blocker(sem, BLOCKER_TYPE_RWSEM_READER);
|
||||
|
||||
/* wait to be given the lock */
|
||||
for (;;) {
|
||||
set_current_state(state);
|
||||
if (!smp_load_acquire(&waiter.task)) {
|
||||
/* Matches rwsem_mark_wake()'s smp_store_release(). */
|
||||
break;
|
||||
|
@ -1081,8 +1085,12 @@ queue:
|
|||
}
|
||||
schedule_preempt_disabled();
|
||||
lockevent_inc(rwsem_sleep_reader);
|
||||
set_current_state(state);
|
||||
}
|
||||
|
||||
if (state == TASK_UNINTERRUPTIBLE)
|
||||
hung_task_clear_blocker();
|
||||
|
||||
__set_current_state(TASK_RUNNING);
|
||||
lockevent_inc(rwsem_rlock);
|
||||
trace_contention_end(sem, 0);
|
||||
|
@ -1144,6 +1152,9 @@ rwsem_down_write_slowpath(struct rw_semaphore *sem, int state)
|
|||
set_current_state(state);
|
||||
trace_contention_begin(sem, LCB_F_WRITE);
|
||||
|
||||
if (state == TASK_UNINTERRUPTIBLE)
|
||||
hung_task_set_blocker(sem, BLOCKER_TYPE_RWSEM_WRITER);
|
||||
|
||||
for (;;) {
|
||||
if (rwsem_try_write_lock(sem, &waiter)) {
|
||||
/* rwsem_try_write_lock() implies ACQUIRE on success */
|
||||
|
@ -1177,6 +1188,10 @@ rwsem_down_write_slowpath(struct rw_semaphore *sem, int state)
|
|||
trylock_again:
|
||||
raw_spin_lock_irq(&sem->wait_lock);
|
||||
}
|
||||
|
||||
if (state == TASK_UNINTERRUPTIBLE)
|
||||
hung_task_clear_blocker();
|
||||
|
||||
__set_current_state(TASK_RUNNING);
|
||||
raw_spin_unlock_irq(&sem->wait_lock);
|
||||
lockevent_inc(rwsem_wlock);
|
||||
|
|
|
@ -36,6 +36,7 @@
|
|||
#include <linux/sysfs.h>
|
||||
#include <linux/context_tracking.h>
|
||||
#include <linux/seq_buf.h>
|
||||
#include <linux/sys_info.h>
|
||||
#include <trace/events/error_report.h>
|
||||
#include <asm/sections.h>
|
||||
|
||||
|
@ -63,20 +64,13 @@ int panic_on_warn __read_mostly;
|
|||
unsigned long panic_on_taint;
|
||||
bool panic_on_taint_nousertaint = false;
|
||||
static unsigned int warn_limit __read_mostly;
|
||||
static bool panic_console_replay;
|
||||
|
||||
bool panic_triggering_all_cpu_backtrace;
|
||||
|
||||
int panic_timeout = CONFIG_PANIC_TIMEOUT;
|
||||
EXPORT_SYMBOL_GPL(panic_timeout);
|
||||
|
||||
#define PANIC_PRINT_TASK_INFO 0x00000001
|
||||
#define PANIC_PRINT_MEM_INFO 0x00000002
|
||||
#define PANIC_PRINT_TIMER_INFO 0x00000004
|
||||
#define PANIC_PRINT_LOCK_INFO 0x00000008
|
||||
#define PANIC_PRINT_FTRACE_INFO 0x00000010
|
||||
#define PANIC_PRINT_ALL_PRINTK_MSG 0x00000020
|
||||
#define PANIC_PRINT_ALL_CPU_BT 0x00000040
|
||||
#define PANIC_PRINT_BLOCKED_TASKS 0x00000080
|
||||
unsigned long panic_print;
|
||||
|
||||
ATOMIC_NOTIFIER_HEAD(panic_notifier_list);
|
||||
|
@ -128,6 +122,13 @@ static int proc_taint(const struct ctl_table *table, int write,
|
|||
return err;
|
||||
}
|
||||
|
||||
static int sysctl_panic_print_handler(const struct ctl_table *table, int write,
|
||||
void *buffer, size_t *lenp, loff_t *ppos)
|
||||
{
|
||||
pr_info_once("Kernel: 'panic_print' sysctl interface will be obsoleted by both 'panic_sys_info' and 'panic_console_replay'\n");
|
||||
return proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
|
||||
}
|
||||
|
||||
static const struct ctl_table kern_panic_table[] = {
|
||||
#ifdef CONFIG_SMP
|
||||
{
|
||||
|
@ -165,7 +166,7 @@ static const struct ctl_table kern_panic_table[] = {
|
|||
.data = &panic_print,
|
||||
.maxlen = sizeof(unsigned long),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_doulongvec_minmax,
|
||||
.proc_handler = sysctl_panic_print_handler,
|
||||
},
|
||||
{
|
||||
.procname = "panic_on_warn",
|
||||
|
@ -193,6 +194,13 @@ static const struct ctl_table kern_panic_table[] = {
|
|||
.proc_handler = proc_dointvec,
|
||||
},
|
||||
#endif
|
||||
{
|
||||
.procname = "panic_sys_info",
|
||||
.data = &panic_print,
|
||||
.maxlen = sizeof(panic_print),
|
||||
.mode = 0644,
|
||||
.proc_handler = sysctl_sys_info_handler,
|
||||
},
|
||||
};
|
||||
|
||||
static __init int kernel_panic_sysctls_init(void)
|
||||
|
@ -203,6 +211,15 @@ static __init int kernel_panic_sysctls_init(void)
|
|||
late_initcall(kernel_panic_sysctls_init);
|
||||
#endif
|
||||
|
||||
/* The format is "panic_sys_info=tasks,mem,locks,ftrace,..." */
|
||||
static int __init setup_panic_sys_info(char *buf)
|
||||
{
|
||||
/* There is no risk of race in kernel boot phase */
|
||||
panic_print = sys_info_parse_param(buf);
|
||||
return 1;
|
||||
}
|
||||
__setup("panic_sys_info=", setup_panic_sys_info);
|
||||
|
||||
static atomic_t warn_count = ATOMIC_INIT(0);
|
||||
|
||||
#ifdef CONFIG_SYSFS
|
||||
|
@ -298,33 +315,6 @@ void nmi_panic(struct pt_regs *regs, const char *msg)
|
|||
}
|
||||
EXPORT_SYMBOL(nmi_panic);
|
||||
|
||||
static void panic_print_sys_info(bool console_flush)
|
||||
{
|
||||
if (console_flush) {
|
||||
if (panic_print & PANIC_PRINT_ALL_PRINTK_MSG)
|
||||
console_flush_on_panic(CONSOLE_REPLAY_ALL);
|
||||
return;
|
||||
}
|
||||
|
||||
if (panic_print & PANIC_PRINT_TASK_INFO)
|
||||
show_state();
|
||||
|
||||
if (panic_print & PANIC_PRINT_MEM_INFO)
|
||||
show_mem();
|
||||
|
||||
if (panic_print & PANIC_PRINT_TIMER_INFO)
|
||||
sysrq_timer_list_show();
|
||||
|
||||
if (panic_print & PANIC_PRINT_LOCK_INFO)
|
||||
debug_show_all_locks();
|
||||
|
||||
if (panic_print & PANIC_PRINT_FTRACE_INFO)
|
||||
ftrace_dump(DUMP_ALL);
|
||||
|
||||
if (panic_print & PANIC_PRINT_BLOCKED_TASKS)
|
||||
show_state_filter(TASK_UNINTERRUPTIBLE);
|
||||
}
|
||||
|
||||
void check_panic_on_warn(const char *origin)
|
||||
{
|
||||
unsigned int limit;
|
||||
|
@ -345,7 +335,7 @@ void check_panic_on_warn(const char *origin)
|
|||
*/
|
||||
static void panic_other_cpus_shutdown(bool crash_kexec)
|
||||
{
|
||||
if (panic_print & PANIC_PRINT_ALL_CPU_BT) {
|
||||
if (panic_print & SYS_INFO_ALL_CPU_BT) {
|
||||
/* Temporary allow non-panic CPUs to write their backtraces. */
|
||||
panic_triggering_all_cpu_backtrace = true;
|
||||
trigger_all_cpu_backtrace();
|
||||
|
@ -468,7 +458,7 @@ void vpanic(const char *fmt, va_list args)
|
|||
*/
|
||||
atomic_notifier_call_chain(&panic_notifier_list, 0, buf);
|
||||
|
||||
panic_print_sys_info(false);
|
||||
sys_info(panic_print);
|
||||
|
||||
kmsg_dump_desc(KMSG_DUMP_PANIC, buf);
|
||||
|
||||
|
@ -497,7 +487,9 @@ void vpanic(const char *fmt, va_list args)
|
|||
debug_locks_off();
|
||||
console_flush_on_panic(CONSOLE_FLUSH_PENDING);
|
||||
|
||||
panic_print_sys_info(true);
|
||||
if ((panic_print & SYS_INFO_PANIC_CONSOLE_REPLAY) ||
|
||||
panic_console_replay)
|
||||
console_flush_on_panic(CONSOLE_REPLAY_ALL);
|
||||
|
||||
if (!panic_blink)
|
||||
panic_blink = no_blink;
|
||||
|
@ -949,6 +941,7 @@ core_param(panic_print, panic_print, ulong, 0644);
|
|||
core_param(pause_on_oops, pause_on_oops, int, 0644);
|
||||
core_param(panic_on_warn, panic_on_warn, int, 0644);
|
||||
core_param(crash_kexec_post_notifiers, crash_kexec_post_notifiers, bool, 0644);
|
||||
core_param(panic_console_replay, panic_console_replay, bool, 0644);
|
||||
|
||||
static int __init oops_setup(char *s)
|
||||
{
|
||||
|
|
|
@ -118,7 +118,7 @@ static void *relay_alloc_buf(struct rchan_buf *buf, size_t *size)
|
|||
return NULL;
|
||||
|
||||
for (i = 0; i < n_pages; i++) {
|
||||
buf->page_array[i] = alloc_page(GFP_KERNEL);
|
||||
buf->page_array[i] = alloc_page(GFP_KERNEL | __GFP_ZERO);
|
||||
if (unlikely(!buf->page_array[i]))
|
||||
goto depopulate;
|
||||
set_page_private(buf->page_array[i], (unsigned long)buf);
|
||||
|
@ -127,7 +127,6 @@ static void *relay_alloc_buf(struct rchan_buf *buf, size_t *size)
|
|||
if (!mem)
|
||||
goto depopulate;
|
||||
|
||||
memset(mem, 0, *size);
|
||||
buf->page_count = n_pages;
|
||||
return mem;
|
||||
|
||||
|
@ -250,13 +249,18 @@ EXPORT_SYMBOL_GPL(relay_buf_full);
|
|||
*/
|
||||
|
||||
static int relay_subbuf_start(struct rchan_buf *buf, void *subbuf,
|
||||
void *prev_subbuf, size_t prev_padding)
|
||||
void *prev_subbuf)
|
||||
{
|
||||
int full = relay_buf_full(buf);
|
||||
|
||||
if (full)
|
||||
buf->stats.full_count++;
|
||||
|
||||
if (!buf->chan->cb->subbuf_start)
|
||||
return !relay_buf_full(buf);
|
||||
return !full;
|
||||
|
||||
return buf->chan->cb->subbuf_start(buf, subbuf,
|
||||
prev_subbuf, prev_padding);
|
||||
prev_subbuf);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -298,11 +302,13 @@ static void __relay_reset(struct rchan_buf *buf, unsigned int init)
|
|||
buf->finalized = 0;
|
||||
buf->data = buf->start;
|
||||
buf->offset = 0;
|
||||
buf->stats.full_count = 0;
|
||||
buf->stats.big_count = 0;
|
||||
|
||||
for (i = 0; i < buf->chan->n_subbufs; i++)
|
||||
buf->padding[i] = 0;
|
||||
|
||||
relay_subbuf_start(buf, buf->data, NULL, 0);
|
||||
relay_subbuf_start(buf, buf->data, NULL);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -555,9 +561,11 @@ size_t relay_switch_subbuf(struct rchan_buf *buf, size_t length)
|
|||
goto toobig;
|
||||
|
||||
if (buf->offset != buf->chan->subbuf_size + 1) {
|
||||
buf->prev_padding = buf->chan->subbuf_size - buf->offset;
|
||||
size_t prev_padding;
|
||||
|
||||
prev_padding = buf->chan->subbuf_size - buf->offset;
|
||||
old_subbuf = buf->subbufs_produced % buf->chan->n_subbufs;
|
||||
buf->padding[old_subbuf] = buf->prev_padding;
|
||||
buf->padding[old_subbuf] = prev_padding;
|
||||
buf->subbufs_produced++;
|
||||
if (buf->dentry)
|
||||
d_inode(buf->dentry)->i_size +=
|
||||
|
@ -582,7 +590,7 @@ size_t relay_switch_subbuf(struct rchan_buf *buf, size_t length)
|
|||
new_subbuf = buf->subbufs_produced % buf->chan->n_subbufs;
|
||||
new = buf->start + new_subbuf * buf->chan->subbuf_size;
|
||||
buf->offset = 0;
|
||||
if (!relay_subbuf_start(buf, new, old, buf->prev_padding)) {
|
||||
if (!relay_subbuf_start(buf, new, old)) {
|
||||
buf->offset = buf->chan->subbuf_size + 1;
|
||||
return 0;
|
||||
}
|
||||
|
@ -595,7 +603,7 @@ size_t relay_switch_subbuf(struct rchan_buf *buf, size_t length)
|
|||
return length;
|
||||
|
||||
toobig:
|
||||
buf->chan->last_toobig = length;
|
||||
buf->stats.big_count++;
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(relay_switch_subbuf);
|
||||
|
@ -655,11 +663,6 @@ void relay_close(struct rchan *chan)
|
|||
if ((buf = *per_cpu_ptr(chan->buf, i)))
|
||||
relay_close_buf(buf);
|
||||
|
||||
if (chan->last_toobig)
|
||||
printk(KERN_WARNING "relay: one or more items not logged "
|
||||
"[item size (%zd) > sub-buffer size (%zd)]\n",
|
||||
chan->last_toobig, chan->subbuf_size);
|
||||
|
||||
list_del(&chan->list);
|
||||
kref_put(&chan->kref, relay_destroy_channel);
|
||||
mutex_unlock(&relay_channels_mutex);
|
||||
|
@ -693,6 +696,42 @@ void relay_flush(struct rchan *chan)
|
|||
}
|
||||
EXPORT_SYMBOL_GPL(relay_flush);
|
||||
|
||||
/**
|
||||
* relay_stats - get channel buffer statistics
|
||||
* @chan: the channel
|
||||
* @flags: select particular information to get
|
||||
*
|
||||
* Returns the count of certain field that caller specifies.
|
||||
*/
|
||||
size_t relay_stats(struct rchan *chan, int flags)
|
||||
{
|
||||
unsigned int i, count = 0;
|
||||
struct rchan_buf *rbuf;
|
||||
|
||||
if (!chan || flags > RELAY_STATS_LAST)
|
||||
return 0;
|
||||
|
||||
if (chan->is_global) {
|
||||
rbuf = *per_cpu_ptr(chan->buf, 0);
|
||||
if (flags & RELAY_STATS_BUF_FULL)
|
||||
count = rbuf->stats.full_count;
|
||||
else if (flags & RELAY_STATS_WRT_BIG)
|
||||
count = rbuf->stats.big_count;
|
||||
} else {
|
||||
for_each_online_cpu(i) {
|
||||
rbuf = *per_cpu_ptr(chan->buf, i);
|
||||
if (rbuf) {
|
||||
if (flags & RELAY_STATS_BUF_FULL)
|
||||
count += rbuf->stats.full_count;
|
||||
else if (flags & RELAY_STATS_WRT_BIG)
|
||||
count += rbuf->stats.big_count;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
/**
|
||||
* relay_file_open - open file op for relay files
|
||||
* @inode: the inode
|
||||
|
|
|
@ -415,9 +415,10 @@ static ssize_t blk_dropped_read(struct file *filp, char __user *buffer,
|
|||
size_t count, loff_t *ppos)
|
||||
{
|
||||
struct blk_trace *bt = filp->private_data;
|
||||
size_t dropped = relay_stats(bt->rchan, RELAY_STATS_BUF_FULL);
|
||||
char buf[16];
|
||||
|
||||
snprintf(buf, sizeof(buf), "%u\n", atomic_read(&bt->dropped));
|
||||
snprintf(buf, sizeof(buf), "%zu\n", dropped);
|
||||
|
||||
return simple_read_from_buffer(buffer, count, ppos, buf, strlen(buf));
|
||||
}
|
||||
|
@ -456,23 +457,6 @@ static const struct file_operations blk_msg_fops = {
|
|||
.llseek = noop_llseek,
|
||||
};
|
||||
|
||||
/*
|
||||
* Keep track of how many times we encountered a full subbuffer, to aid
|
||||
* the user space app in telling how many lost events there were.
|
||||
*/
|
||||
static int blk_subbuf_start_callback(struct rchan_buf *buf, void *subbuf,
|
||||
void *prev_subbuf, size_t prev_padding)
|
||||
{
|
||||
struct blk_trace *bt;
|
||||
|
||||
if (!relay_buf_full(buf))
|
||||
return 1;
|
||||
|
||||
bt = buf->chan->private_data;
|
||||
atomic_inc(&bt->dropped);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int blk_remove_buf_file_callback(struct dentry *dentry)
|
||||
{
|
||||
debugfs_remove(dentry);
|
||||
|
@ -491,7 +475,6 @@ static struct dentry *blk_create_buf_file_callback(const char *filename,
|
|||
}
|
||||
|
||||
static const struct rchan_callbacks blk_relay_callbacks = {
|
||||
.subbuf_start = blk_subbuf_start_callback,
|
||||
.create_buf_file = blk_create_buf_file_callback,
|
||||
.remove_buf_file = blk_remove_buf_file_callback,
|
||||
};
|
||||
|
@ -580,7 +563,6 @@ static int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
|
|||
}
|
||||
|
||||
bt->dev = dev;
|
||||
atomic_set(&bt->dropped, 0);
|
||||
INIT_LIST_HEAD(&bt->running_list);
|
||||
|
||||
ret = -EIO;
|
||||
|
|
|
@ -199,18 +199,16 @@ void put_ucounts(struct ucounts *ucounts)
|
|||
}
|
||||
}
|
||||
|
||||
static inline bool atomic_long_inc_below(atomic_long_t *v, int u)
|
||||
static inline bool atomic_long_inc_below(atomic_long_t *v, long u)
|
||||
{
|
||||
long c, old;
|
||||
c = atomic_long_read(v);
|
||||
for (;;) {
|
||||
long c = atomic_long_read(v);
|
||||
|
||||
do {
|
||||
if (unlikely(c >= u))
|
||||
return false;
|
||||
old = atomic_long_cmpxchg(v, c, c+1);
|
||||
if (likely(old == c))
|
||||
} while (!atomic_long_try_cmpxchg(v, &c, c+1));
|
||||
|
||||
return true;
|
||||
c = old;
|
||||
}
|
||||
}
|
||||
|
||||
struct ucounts *inc_ucount(struct user_namespace *ns, kuid_t uid,
|
||||
|
|
|
@ -3214,6 +3214,26 @@ config TEST_OBJPOOL
|
|||
|
||||
If unsure, say N.
|
||||
|
||||
config TEST_KEXEC_HANDOVER
|
||||
bool "Test for Kexec HandOver"
|
||||
default n
|
||||
depends on KEXEC_HANDOVER
|
||||
help
|
||||
This option enables test for Kexec HandOver (KHO).
|
||||
The test consists of two parts: saving kernel data before kexec and
|
||||
restoring the data after kexec and verifying that it was properly
|
||||
handed over. This test module creates and saves data on the boot of
|
||||
the first kernel and restores and verifies the data on the boot of
|
||||
kexec'ed kernel.
|
||||
|
||||
For detailed documentation about KHO, see Documentation/core-api/kho.
|
||||
|
||||
To run the test run:
|
||||
|
||||
tools/testing/selftests/kho/vmtest.sh -h
|
||||
|
||||
If unsure, say N.
|
||||
|
||||
config RATELIMIT_KUNIT_TEST
|
||||
tristate "KUnit Test for correctness and stress of ratelimit" if !KUNIT_ALL_TESTS
|
||||
depends on KUNIT
|
||||
|
|
|
@ -40,7 +40,7 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \
|
|||
is_single_threaded.o plist.o decompress.o kobject_uevent.o \
|
||||
earlycpio.o seq_buf.o siphash.o dec_and_lock.o \
|
||||
nmi_backtrace.o win_minmax.o memcat_p.o \
|
||||
buildid.o objpool.o iomem_copy.o
|
||||
buildid.o objpool.o iomem_copy.o sys_info.o
|
||||
|
||||
lib-$(CONFIG_UNION_FIND) += union_find.o
|
||||
lib-$(CONFIG_PRINTK) += dump_stack.o
|
||||
|
@ -102,6 +102,7 @@ obj-$(CONFIG_TEST_HMM) += test_hmm.o
|
|||
obj-$(CONFIG_TEST_FREE_PAGES) += test_free_pages.o
|
||||
obj-$(CONFIG_TEST_REF_TRACKER) += test_ref_tracker.o
|
||||
obj-$(CONFIG_TEST_OBJPOOL) += test_objpool.o
|
||||
obj-$(CONFIG_TEST_KEXEC_HANDOVER) += test_kho.o
|
||||
|
||||
obj-$(CONFIG_TEST_FPU) += test_fpu.o
|
||||
test_fpu-y := test_fpu_glue.o test_fpu_impl.o
|
||||
|
|
|
@ -212,12 +212,13 @@ u64 mul_u64_u64_div_u64(u64 a, u64 b, u64 c)
|
|||
|
||||
#endif
|
||||
|
||||
/* make sure c is not zero, trigger exception otherwise */
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wdiv-by-zero"
|
||||
if (unlikely(c == 0))
|
||||
return 1/0;
|
||||
#pragma GCC diagnostic pop
|
||||
/* make sure c is not zero, trigger runtime exception otherwise */
|
||||
if (unlikely(c == 0)) {
|
||||
unsigned long zero = 0;
|
||||
|
||||
OPTIMIZER_HIDE_VAR(zero);
|
||||
return ~0UL/zero;
|
||||
}
|
||||
|
||||
int shift = __builtin_ctzll(c);
|
||||
|
||||
|
|
|
@ -11,22 +11,16 @@
|
|||
* has decent hardware division.
|
||||
*/
|
||||
|
||||
DEFINE_STATIC_KEY_TRUE(efficient_ffs_key);
|
||||
|
||||
#if !defined(CONFIG_CPU_NO_EFFICIENT_FFS)
|
||||
|
||||
/* If __ffs is available, the even/odd algorithm benchmarks slower. */
|
||||
|
||||
/**
|
||||
* gcd - calculate and return the greatest common divisor of 2 unsigned longs
|
||||
* @a: first value
|
||||
* @b: second value
|
||||
*/
|
||||
unsigned long gcd(unsigned long a, unsigned long b)
|
||||
static unsigned long binary_gcd(unsigned long a, unsigned long b)
|
||||
{
|
||||
unsigned long r = a | b;
|
||||
|
||||
if (!a || !b)
|
||||
return r;
|
||||
|
||||
b >>= __ffs(b);
|
||||
if (b == 1)
|
||||
return r & -r;
|
||||
|
@ -44,9 +38,15 @@ unsigned long gcd(unsigned long a, unsigned long b)
|
|||
}
|
||||
}
|
||||
|
||||
#else
|
||||
#endif
|
||||
|
||||
/* If normalization is done by loops, the even/odd algorithm is a win. */
|
||||
|
||||
/**
|
||||
* gcd - calculate and return the greatest common divisor of 2 unsigned longs
|
||||
* @a: first value
|
||||
* @b: second value
|
||||
*/
|
||||
unsigned long gcd(unsigned long a, unsigned long b)
|
||||
{
|
||||
unsigned long r = a | b;
|
||||
|
@ -54,6 +54,11 @@ unsigned long gcd(unsigned long a, unsigned long b)
|
|||
if (!a || !b)
|
||||
return r;
|
||||
|
||||
#if !defined(CONFIG_CPU_NO_EFFICIENT_FFS)
|
||||
if (static_branch_likely(&efficient_ffs_key))
|
||||
return binary_gcd(a, b);
|
||||
#endif
|
||||
|
||||
/* Isolate lsbit of r */
|
||||
r &= -r;
|
||||
|
||||
|
@ -80,6 +85,4 @@ unsigned long gcd(unsigned long a, unsigned long b)
|
|||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
EXPORT_SYMBOL_GPL(gcd);
|
||||
|
|
|
@ -18,9 +18,6 @@
|
|||
#else
|
||||
#include <linux/module.h>
|
||||
#include <linux/gfp.h>
|
||||
/* In .bss so it's zeroed */
|
||||
const char raid6_empty_zero_page[PAGE_SIZE] __attribute__((aligned(256)));
|
||||
EXPORT_SYMBOL(raid6_empty_zero_page);
|
||||
#endif
|
||||
|
||||
struct raid6_calls raid6_call;
|
||||
|
|
|
@ -31,10 +31,10 @@ static void raid6_2data_recov_intx1(int disks, size_t bytes, int faila,
|
|||
Use the dead data pages as temporary storage for
|
||||
delta p and delta q */
|
||||
dp = (u8 *)ptrs[faila];
|
||||
ptrs[faila] = (void *)raid6_empty_zero_page;
|
||||
ptrs[faila] = raid6_get_zero_page();
|
||||
ptrs[disks-2] = dp;
|
||||
dq = (u8 *)ptrs[failb];
|
||||
ptrs[failb] = (void *)raid6_empty_zero_page;
|
||||
ptrs[failb] = raid6_get_zero_page();
|
||||
ptrs[disks-1] = dq;
|
||||
|
||||
raid6_call.gen_syndrome(disks, bytes, ptrs);
|
||||
|
@ -72,7 +72,7 @@ static void raid6_datap_recov_intx1(int disks, size_t bytes, int faila,
|
|||
/* Compute syndrome with zero for the missing data page
|
||||
Use the dead data page as temporary storage for delta q */
|
||||
dq = (u8 *)ptrs[faila];
|
||||
ptrs[faila] = (void *)raid6_empty_zero_page;
|
||||
ptrs[faila] = raid6_get_zero_page();
|
||||
ptrs[disks-1] = dq;
|
||||
|
||||
raid6_call.gen_syndrome(disks, bytes, ptrs);
|
||||
|
|
|
@ -28,10 +28,10 @@ static void raid6_2data_recov_avx2(int disks, size_t bytes, int faila,
|
|||
Use the dead data pages as temporary storage for
|
||||
delta p and delta q */
|
||||
dp = (u8 *)ptrs[faila];
|
||||
ptrs[faila] = (void *)raid6_empty_zero_page;
|
||||
ptrs[faila] = raid6_get_zero_page();
|
||||
ptrs[disks-2] = dp;
|
||||
dq = (u8 *)ptrs[failb];
|
||||
ptrs[failb] = (void *)raid6_empty_zero_page;
|
||||
ptrs[failb] = raid6_get_zero_page();
|
||||
ptrs[disks-1] = dq;
|
||||
|
||||
raid6_call.gen_syndrome(disks, bytes, ptrs);
|
||||
|
@ -196,7 +196,7 @@ static void raid6_datap_recov_avx2(int disks, size_t bytes, int faila,
|
|||
/* Compute syndrome with zero for the missing data page
|
||||
Use the dead data page as temporary storage for delta q */
|
||||
dq = (u8 *)ptrs[faila];
|
||||
ptrs[faila] = (void *)raid6_empty_zero_page;
|
||||
ptrs[faila] = raid6_get_zero_page();
|
||||
ptrs[disks-1] = dq;
|
||||
|
||||
raid6_call.gen_syndrome(disks, bytes, ptrs);
|
||||
|
|
|
@ -37,10 +37,10 @@ static void raid6_2data_recov_avx512(int disks, size_t bytes, int faila,
|
|||
*/
|
||||
|
||||
dp = (u8 *)ptrs[faila];
|
||||
ptrs[faila] = (void *)raid6_empty_zero_page;
|
||||
ptrs[faila] = raid6_get_zero_page();
|
||||
ptrs[disks-2] = dp;
|
||||
dq = (u8 *)ptrs[failb];
|
||||
ptrs[failb] = (void *)raid6_empty_zero_page;
|
||||
ptrs[failb] = raid6_get_zero_page();
|
||||
ptrs[disks-1] = dq;
|
||||
|
||||
raid6_call.gen_syndrome(disks, bytes, ptrs);
|
||||
|
@ -238,7 +238,7 @@ static void raid6_datap_recov_avx512(int disks, size_t bytes, int faila,
|
|||
*/
|
||||
|
||||
dq = (u8 *)ptrs[faila];
|
||||
ptrs[faila] = (void *)raid6_empty_zero_page;
|
||||
ptrs[faila] = raid6_get_zero_page();
|
||||
ptrs[disks-1] = dq;
|
||||
|
||||
raid6_call.gen_syndrome(disks, bytes, ptrs);
|
||||
|
|
|
@ -42,10 +42,10 @@ static void raid6_2data_recov_lsx(int disks, size_t bytes, int faila,
|
|||
* delta p and delta q
|
||||
*/
|
||||
dp = (u8 *)ptrs[faila];
|
||||
ptrs[faila] = (void *)raid6_empty_zero_page;
|
||||
ptrs[faila] = raid6_get_zero_page();
|
||||
ptrs[disks - 2] = dp;
|
||||
dq = (u8 *)ptrs[failb];
|
||||
ptrs[failb] = (void *)raid6_empty_zero_page;
|
||||
ptrs[failb] = raid6_get_zero_page();
|
||||
ptrs[disks - 1] = dq;
|
||||
|
||||
raid6_call.gen_syndrome(disks, bytes, ptrs);
|
||||
|
@ -197,7 +197,7 @@ static void raid6_datap_recov_lsx(int disks, size_t bytes, int faila,
|
|||
* Use the dead data page as temporary storage for delta q
|
||||
*/
|
||||
dq = (u8 *)ptrs[faila];
|
||||
ptrs[faila] = (void *)raid6_empty_zero_page;
|
||||
ptrs[faila] = raid6_get_zero_page();
|
||||
ptrs[disks - 1] = dq;
|
||||
|
||||
raid6_call.gen_syndrome(disks, bytes, ptrs);
|
||||
|
@ -316,10 +316,10 @@ static void raid6_2data_recov_lasx(int disks, size_t bytes, int faila,
|
|||
* delta p and delta q
|
||||
*/
|
||||
dp = (u8 *)ptrs[faila];
|
||||
ptrs[faila] = (void *)raid6_empty_zero_page;
|
||||
ptrs[faila] = raid6_get_zero_page();
|
||||
ptrs[disks - 2] = dp;
|
||||
dq = (u8 *)ptrs[failb];
|
||||
ptrs[failb] = (void *)raid6_empty_zero_page;
|
||||
ptrs[failb] = raid6_get_zero_page();
|
||||
ptrs[disks - 1] = dq;
|
||||
|
||||
raid6_call.gen_syndrome(disks, bytes, ptrs);
|
||||
|
@ -436,7 +436,7 @@ static void raid6_datap_recov_lasx(int disks, size_t bytes, int faila,
|
|||
* Use the dead data page as temporary storage for delta q
|
||||
*/
|
||||
dq = (u8 *)ptrs[faila];
|
||||
ptrs[faila] = (void *)raid6_empty_zero_page;
|
||||
ptrs[faila] = raid6_get_zero_page();
|
||||
ptrs[disks - 1] = dq;
|
||||
|
||||
raid6_call.gen_syndrome(disks, bytes, ptrs);
|
||||
|
|
|
@ -36,10 +36,10 @@ static void raid6_2data_recov_neon(int disks, size_t bytes, int faila,
|
|||
* delta p and delta q
|
||||
*/
|
||||
dp = (u8 *)ptrs[faila];
|
||||
ptrs[faila] = (void *)raid6_empty_zero_page;
|
||||
ptrs[faila] = raid6_get_zero_page();
|
||||
ptrs[disks - 2] = dp;
|
||||
dq = (u8 *)ptrs[failb];
|
||||
ptrs[failb] = (void *)raid6_empty_zero_page;
|
||||
ptrs[failb] = raid6_get_zero_page();
|
||||
ptrs[disks - 1] = dq;
|
||||
|
||||
raid6_call.gen_syndrome(disks, bytes, ptrs);
|
||||
|
@ -74,7 +74,7 @@ static void raid6_datap_recov_neon(int disks, size_t bytes, int faila,
|
|||
* Use the dead data page as temporary storage for delta q
|
||||
*/
|
||||
dq = (u8 *)ptrs[faila];
|
||||
ptrs[faila] = (void *)raid6_empty_zero_page;
|
||||
ptrs[faila] = raid6_get_zero_page();
|
||||
ptrs[disks - 1] = dq;
|
||||
|
||||
raid6_call.gen_syndrome(disks, bytes, ptrs);
|
||||
|
|
|
@ -165,10 +165,10 @@ static void raid6_2data_recov_rvv(int disks, size_t bytes, int faila,
|
|||
* delta p and delta q
|
||||
*/
|
||||
dp = (u8 *)ptrs[faila];
|
||||
ptrs[faila] = (void *)raid6_empty_zero_page;
|
||||
ptrs[faila] = raid6_get_zero_page();
|
||||
ptrs[disks - 2] = dp;
|
||||
dq = (u8 *)ptrs[failb];
|
||||
ptrs[failb] = (void *)raid6_empty_zero_page;
|
||||
ptrs[failb] = raid6_get_zero_page();
|
||||
ptrs[disks - 1] = dq;
|
||||
|
||||
raid6_call.gen_syndrome(disks, bytes, ptrs);
|
||||
|
@ -203,7 +203,7 @@ static void raid6_datap_recov_rvv(int disks, size_t bytes, int faila,
|
|||
* Use the dead data page as temporary storage for delta q
|
||||
*/
|
||||
dq = (u8 *)ptrs[faila];
|
||||
ptrs[faila] = (void *)raid6_empty_zero_page;
|
||||
ptrs[faila] = raid6_get_zero_page();
|
||||
ptrs[disks - 1] = dq;
|
||||
|
||||
raid6_call.gen_syndrome(disks, bytes, ptrs);
|
||||
|
|
|
@ -34,10 +34,10 @@ static void raid6_2data_recov_s390xc(int disks, size_t bytes, int faila,
|
|||
Use the dead data pages as temporary storage for
|
||||
delta p and delta q */
|
||||
dp = (u8 *)ptrs[faila];
|
||||
ptrs[faila] = (void *)raid6_empty_zero_page;
|
||||
ptrs[faila] = raid6_get_zero_page();
|
||||
ptrs[disks-2] = dp;
|
||||
dq = (u8 *)ptrs[failb];
|
||||
ptrs[failb] = (void *)raid6_empty_zero_page;
|
||||
ptrs[failb] = raid6_get_zero_page();
|
||||
ptrs[disks-1] = dq;
|
||||
|
||||
raid6_call.gen_syndrome(disks, bytes, ptrs);
|
||||
|
@ -81,7 +81,7 @@ static void raid6_datap_recov_s390xc(int disks, size_t bytes, int faila,
|
|||
/* Compute syndrome with zero for the missing data page
|
||||
Use the dead data page as temporary storage for delta q */
|
||||
dq = (u8 *)ptrs[faila];
|
||||
ptrs[faila] = (void *)raid6_empty_zero_page;
|
||||
ptrs[faila] = raid6_get_zero_page();
|
||||
ptrs[disks-1] = dq;
|
||||
|
||||
raid6_call.gen_syndrome(disks, bytes, ptrs);
|
||||
|
|
|
@ -30,10 +30,10 @@ static void raid6_2data_recov_ssse3(int disks, size_t bytes, int faila,
|
|||
Use the dead data pages as temporary storage for
|
||||
delta p and delta q */
|
||||
dp = (u8 *)ptrs[faila];
|
||||
ptrs[faila] = (void *)raid6_empty_zero_page;
|
||||
ptrs[faila] = raid6_get_zero_page();
|
||||
ptrs[disks-2] = dp;
|
||||
dq = (u8 *)ptrs[failb];
|
||||
ptrs[failb] = (void *)raid6_empty_zero_page;
|
||||
ptrs[failb] = raid6_get_zero_page();
|
||||
ptrs[disks-1] = dq;
|
||||
|
||||
raid6_call.gen_syndrome(disks, bytes, ptrs);
|
||||
|
@ -203,7 +203,7 @@ static void raid6_datap_recov_ssse3(int disks, size_t bytes, int faila,
|
|||
/* Compute syndrome with zero for the missing data page
|
||||
Use the dead data page as temporary storage for delta q */
|
||||
dq = (u8 *)ptrs[faila];
|
||||
ptrs[faila] = (void *)raid6_empty_zero_page;
|
||||
ptrs[faila] = raid6_get_zero_page();
|
||||
ptrs[disks-1] = dq;
|
||||
|
||||
raid6_call.gen_syndrome(disks, bytes, ptrs);
|
||||
|
|
|
@ -36,11 +36,11 @@
|
|||
#include <linux/memblock.h>
|
||||
#include <linux/kasan-enabled.h>
|
||||
|
||||
#define DEPOT_POOLS_CAP 8192
|
||||
/* The pool_index is offset by 1 so the first record does not have a 0 handle. */
|
||||
#define DEPOT_MAX_POOLS \
|
||||
(((1LL << (DEPOT_POOL_INDEX_BITS)) - 1 < DEPOT_POOLS_CAP) ? \
|
||||
(1LL << (DEPOT_POOL_INDEX_BITS)) - 1 : DEPOT_POOLS_CAP)
|
||||
/*
|
||||
* The pool_index is offset by 1 so the first record does not have a 0 handle.
|
||||
*/
|
||||
static unsigned int stack_max_pools __read_mostly =
|
||||
MIN((1LL << DEPOT_POOL_INDEX_BITS) - 1, 8192);
|
||||
|
||||
static bool stack_depot_disabled;
|
||||
static bool __stack_depot_early_init_requested __initdata = IS_ENABLED(CONFIG_STACKDEPOT_ALWAYS_INIT);
|
||||
|
@ -62,7 +62,7 @@ static unsigned int stack_bucket_number_order;
|
|||
static unsigned int stack_hash_mask;
|
||||
|
||||
/* Array of memory regions that store stack records. */
|
||||
static void *stack_pools[DEPOT_MAX_POOLS];
|
||||
static void **stack_pools;
|
||||
/* Newly allocated pool that is not yet added to stack_pools. */
|
||||
static void *new_pool;
|
||||
/* Number of pools in stack_pools. */
|
||||
|
@ -101,6 +101,34 @@ static int __init disable_stack_depot(char *str)
|
|||
}
|
||||
early_param("stack_depot_disable", disable_stack_depot);
|
||||
|
||||
static int __init parse_max_pools(char *str)
|
||||
{
|
||||
const long long limit = (1LL << (DEPOT_POOL_INDEX_BITS)) - 1;
|
||||
unsigned int max_pools;
|
||||
int rv;
|
||||
|
||||
rv = kstrtouint(str, 0, &max_pools);
|
||||
if (rv)
|
||||
return rv;
|
||||
|
||||
if (max_pools < 1024) {
|
||||
pr_err("stack_depot_max_pools below 1024, using default of %u\n",
|
||||
stack_max_pools);
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (max_pools > limit) {
|
||||
pr_err("stack_depot_max_pools exceeds %lld, using default of %u\n",
|
||||
limit, stack_max_pools);
|
||||
goto out;
|
||||
}
|
||||
|
||||
stack_max_pools = max_pools;
|
||||
out:
|
||||
return 0;
|
||||
}
|
||||
early_param("stack_depot_max_pools", parse_max_pools);
|
||||
|
||||
void __init stack_depot_request_early_init(void)
|
||||
{
|
||||
/* Too late to request early init now. */
|
||||
|
@ -182,6 +210,17 @@ int __init stack_depot_early_init(void)
|
|||
}
|
||||
init_stack_table(entries);
|
||||
|
||||
pr_info("allocating space for %u stack pools via memblock\n",
|
||||
stack_max_pools);
|
||||
stack_pools =
|
||||
memblock_alloc(stack_max_pools * sizeof(void *), PAGE_SIZE);
|
||||
if (!stack_pools) {
|
||||
pr_err("stack pools allocation failed, disabling\n");
|
||||
memblock_free(stack_table, entries * sizeof(struct list_head));
|
||||
stack_depot_disabled = true;
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -231,6 +270,16 @@ int stack_depot_init(void)
|
|||
stack_hash_mask = entries - 1;
|
||||
init_stack_table(entries);
|
||||
|
||||
pr_info("allocating space for %u stack pools via kvcalloc\n",
|
||||
stack_max_pools);
|
||||
stack_pools = kvcalloc(stack_max_pools, sizeof(void *), GFP_KERNEL);
|
||||
if (!stack_pools) {
|
||||
pr_err("stack pools allocation failed, disabling\n");
|
||||
kvfree(stack_table);
|
||||
stack_depot_disabled = true;
|
||||
ret = -ENOMEM;
|
||||
}
|
||||
|
||||
out_unlock:
|
||||
mutex_unlock(&stack_depot_init_mutex);
|
||||
|
||||
|
@ -245,9 +294,9 @@ static bool depot_init_pool(void **prealloc)
|
|||
{
|
||||
lockdep_assert_held(&pool_lock);
|
||||
|
||||
if (unlikely(pools_num >= DEPOT_MAX_POOLS)) {
|
||||
if (unlikely(pools_num >= stack_max_pools)) {
|
||||
/* Bail out if we reached the pool limit. */
|
||||
WARN_ON_ONCE(pools_num > DEPOT_MAX_POOLS); /* should never happen */
|
||||
WARN_ON_ONCE(pools_num > stack_max_pools); /* should never happen */
|
||||
WARN_ON_ONCE(!new_pool); /* to avoid unnecessary pre-allocation */
|
||||
WARN_ONCE(1, "Stack depot reached limit capacity");
|
||||
return false;
|
||||
|
@ -273,7 +322,7 @@ static bool depot_init_pool(void **prealloc)
|
|||
* NULL; do not reset to NULL if we have reached the maximum number of
|
||||
* pools.
|
||||
*/
|
||||
if (pools_num < DEPOT_MAX_POOLS)
|
||||
if (pools_num < stack_max_pools)
|
||||
WRITE_ONCE(new_pool, NULL);
|
||||
else
|
||||
WRITE_ONCE(new_pool, STACK_DEPOT_POISON);
|
||||
|
|
122
lib/sys_info.c
Normal file
122
lib/sys_info.c
Normal file
|
@ -0,0 +1,122 @@
|
|||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
#include <linux/sched/debug.h>
|
||||
#include <linux/console.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/ftrace.h>
|
||||
#include <linux/sysctl.h>
|
||||
#include <linux/nmi.h>
|
||||
|
||||
#include <linux/sys_info.h>
|
||||
|
||||
struct sys_info_name {
|
||||
unsigned long bit;
|
||||
const char *name;
|
||||
};
|
||||
|
||||
/*
|
||||
* When 'si_names' gets updated, please make sure the 'sys_info_avail'
|
||||
* below is updated accordingly.
|
||||
*/
|
||||
static const struct sys_info_name si_names[] = {
|
||||
{ SYS_INFO_TASKS, "tasks" },
|
||||
{ SYS_INFO_MEM, "mem" },
|
||||
{ SYS_INFO_TIMERS, "timers" },
|
||||
{ SYS_INFO_LOCKS, "locks" },
|
||||
{ SYS_INFO_FTRACE, "ftrace" },
|
||||
{ SYS_INFO_ALL_CPU_BT, "all_bt" },
|
||||
{ SYS_INFO_BLOCKED_TASKS, "blocked_tasks" },
|
||||
};
|
||||
|
||||
/* Expecting string like "xxx_sys_info=tasks,mem,timers,locks,ftrace,..." */
|
||||
unsigned long sys_info_parse_param(char *str)
|
||||
{
|
||||
unsigned long si_bits = 0;
|
||||
char *s, *name;
|
||||
int i;
|
||||
|
||||
s = str;
|
||||
while ((name = strsep(&s, ",")) && *name) {
|
||||
for (i = 0; i < ARRAY_SIZE(si_names); i++) {
|
||||
if (!strcmp(name, si_names[i].name)) {
|
||||
si_bits |= si_names[i].bit;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return si_bits;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SYSCTL
|
||||
|
||||
static const char sys_info_avail[] __maybe_unused = "tasks,mem,timers,locks,ftrace,all_bt,blocked_tasks";
|
||||
|
||||
int sysctl_sys_info_handler(const struct ctl_table *ro_table, int write,
|
||||
void *buffer, size_t *lenp,
|
||||
loff_t *ppos)
|
||||
{
|
||||
char names[sizeof(sys_info_avail) + 1];
|
||||
struct ctl_table table;
|
||||
unsigned long *si_bits_global;
|
||||
|
||||
si_bits_global = ro_table->data;
|
||||
|
||||
if (write) {
|
||||
unsigned long si_bits;
|
||||
int ret;
|
||||
|
||||
table = *ro_table;
|
||||
table.data = names;
|
||||
table.maxlen = sizeof(names);
|
||||
ret = proc_dostring(&table, write, buffer, lenp, ppos);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
si_bits = sys_info_parse_param(names);
|
||||
/* The access to the global value is not synchronized. */
|
||||
WRITE_ONCE(*si_bits_global, si_bits);
|
||||
return 0;
|
||||
} else {
|
||||
/* for 'read' operation */
|
||||
char *delim = "";
|
||||
int i, len = 0;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(si_names); i++) {
|
||||
if (*si_bits_global & si_names[i].bit) {
|
||||
len += scnprintf(names + len, sizeof(names) - len,
|
||||
"%s%s", delim, si_names[i].name);
|
||||
delim = ",";
|
||||
}
|
||||
}
|
||||
|
||||
table = *ro_table;
|
||||
table.data = names;
|
||||
table.maxlen = sizeof(names);
|
||||
return proc_dostring(&table, write, buffer, lenp, ppos);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
void sys_info(unsigned long si_mask)
|
||||
{
|
||||
if (si_mask & SYS_INFO_TASKS)
|
||||
show_state();
|
||||
|
||||
if (si_mask & SYS_INFO_MEM)
|
||||
show_mem();
|
||||
|
||||
if (si_mask & SYS_INFO_TIMERS)
|
||||
sysrq_timer_list_show();
|
||||
|
||||
if (si_mask & SYS_INFO_LOCKS)
|
||||
debug_show_all_locks();
|
||||
|
||||
if (si_mask & SYS_INFO_FTRACE)
|
||||
ftrace_dump(DUMP_ALL);
|
||||
|
||||
if (si_mask & SYS_INFO_ALL_CPU_BT)
|
||||
trigger_all_cpu_backtrace();
|
||||
|
||||
if (si_mask & SYS_INFO_BLOCKED_TASKS)
|
||||
show_state_filter(TASK_UNINTERRUPTIBLE);
|
||||
}
|
305
lib/test_kho.c
Normal file
305
lib/test_kho.c
Normal file
|
@ -0,0 +1,305 @@
|
|||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
/*
|
||||
* Test module for KHO
|
||||
* Copyright (c) 2025 Microsoft Corporation.
|
||||
*
|
||||
* Authors:
|
||||
* Saurabh Sengar <ssengar@microsoft.com>
|
||||
* Mike Rapoport <rppt@kernel.org>
|
||||
*/
|
||||
|
||||
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
|
||||
|
||||
#include <linux/mm.h>
|
||||
#include <linux/gfp.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/kexec.h>
|
||||
#include <linux/libfdt.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/printk.h>
|
||||
#include <linux/vmalloc.h>
|
||||
#include <linux/kexec_handover.h>
|
||||
|
||||
#include <net/checksum.h>
|
||||
|
||||
#define KHO_TEST_MAGIC 0x4b484f21 /* KHO! */
|
||||
#define KHO_TEST_FDT "kho_test"
|
||||
#define KHO_TEST_COMPAT "kho-test-v1"
|
||||
|
||||
static long max_mem = (PAGE_SIZE << MAX_PAGE_ORDER) * 2;
|
||||
module_param(max_mem, long, 0644);
|
||||
|
||||
struct kho_test_state {
|
||||
unsigned int nr_folios;
|
||||
struct folio **folios;
|
||||
struct folio *fdt;
|
||||
__wsum csum;
|
||||
};
|
||||
|
||||
static struct kho_test_state kho_test_state;
|
||||
|
||||
static int kho_test_notifier(struct notifier_block *self, unsigned long cmd,
|
||||
void *v)
|
||||
{
|
||||
struct kho_test_state *state = &kho_test_state;
|
||||
struct kho_serialization *ser = v;
|
||||
int err = 0;
|
||||
|
||||
switch (cmd) {
|
||||
case KEXEC_KHO_ABORT:
|
||||
return NOTIFY_DONE;
|
||||
case KEXEC_KHO_FINALIZE:
|
||||
/* Handled below */
|
||||
break;
|
||||
default:
|
||||
return NOTIFY_BAD;
|
||||
}
|
||||
|
||||
err |= kho_preserve_folio(state->fdt);
|
||||
err |= kho_add_subtree(ser, KHO_TEST_FDT, folio_address(state->fdt));
|
||||
|
||||
return err ? NOTIFY_BAD : NOTIFY_DONE;
|
||||
}
|
||||
|
||||
static struct notifier_block kho_test_nb = {
|
||||
.notifier_call = kho_test_notifier,
|
||||
};
|
||||
|
||||
static int kho_test_save_data(struct kho_test_state *state, void *fdt)
|
||||
{
|
||||
phys_addr_t *folios_info __free(kvfree) = NULL;
|
||||
int err = 0;
|
||||
|
||||
folios_info = kvmalloc_array(state->nr_folios, sizeof(*folios_info),
|
||||
GFP_KERNEL);
|
||||
if (!folios_info)
|
||||
return -ENOMEM;
|
||||
|
||||
for (int i = 0; i < state->nr_folios; i++) {
|
||||
struct folio *folio = state->folios[i];
|
||||
unsigned int order = folio_order(folio);
|
||||
|
||||
folios_info[i] = virt_to_phys(folio_address(folio)) | order;
|
||||
|
||||
err = kho_preserve_folio(folio);
|
||||
if (err)
|
||||
return err;
|
||||
}
|
||||
|
||||
err |= fdt_begin_node(fdt, "data");
|
||||
err |= fdt_property(fdt, "nr_folios", &state->nr_folios,
|
||||
sizeof(state->nr_folios));
|
||||
err |= fdt_property(fdt, "folios_info", folios_info,
|
||||
state->nr_folios * sizeof(*folios_info));
|
||||
err |= fdt_property(fdt, "csum", &state->csum, sizeof(state->csum));
|
||||
err |= fdt_end_node(fdt);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static int kho_test_prepare_fdt(struct kho_test_state *state)
|
||||
{
|
||||
const char compatible[] = KHO_TEST_COMPAT;
|
||||
unsigned int magic = KHO_TEST_MAGIC;
|
||||
ssize_t fdt_size;
|
||||
int err = 0;
|
||||
void *fdt;
|
||||
|
||||
fdt_size = state->nr_folios * sizeof(phys_addr_t) + PAGE_SIZE;
|
||||
state->fdt = folio_alloc(GFP_KERNEL, get_order(fdt_size));
|
||||
if (!state->fdt)
|
||||
return -ENOMEM;
|
||||
|
||||
fdt = folio_address(state->fdt);
|
||||
|
||||
err |= fdt_create(fdt, fdt_size);
|
||||
err |= fdt_finish_reservemap(fdt);
|
||||
|
||||
err |= fdt_begin_node(fdt, "");
|
||||
err |= fdt_property(fdt, "compatible", compatible, sizeof(compatible));
|
||||
err |= fdt_property(fdt, "magic", &magic, sizeof(magic));
|
||||
err |= kho_test_save_data(state, fdt);
|
||||
err |= fdt_end_node(fdt);
|
||||
|
||||
err |= fdt_finish(fdt);
|
||||
|
||||
if (err)
|
||||
folio_put(state->fdt);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static int kho_test_generate_data(struct kho_test_state *state)
|
||||
{
|
||||
size_t alloc_size = 0;
|
||||
__wsum csum = 0;
|
||||
|
||||
while (alloc_size < max_mem) {
|
||||
int order = get_random_u32() % NR_PAGE_ORDERS;
|
||||
struct folio *folio;
|
||||
unsigned int size;
|
||||
void *addr;
|
||||
|
||||
/* cap allocation so that we won't exceed max_mem */
|
||||
if (alloc_size + (PAGE_SIZE << order) > max_mem) {
|
||||
order = get_order(max_mem - alloc_size);
|
||||
if (order)
|
||||
order--;
|
||||
}
|
||||
size = PAGE_SIZE << order;
|
||||
|
||||
folio = folio_alloc(GFP_KERNEL | __GFP_NORETRY, order);
|
||||
if (!folio)
|
||||
goto err_free_folios;
|
||||
|
||||
state->folios[state->nr_folios++] = folio;
|
||||
addr = folio_address(folio);
|
||||
get_random_bytes(addr, size);
|
||||
csum = csum_partial(addr, size, csum);
|
||||
alloc_size += size;
|
||||
}
|
||||
|
||||
state->csum = csum;
|
||||
return 0;
|
||||
|
||||
err_free_folios:
|
||||
for (int i = 0; i < state->nr_folios; i++)
|
||||
folio_put(state->folios[i]);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
static int kho_test_save(void)
|
||||
{
|
||||
struct kho_test_state *state = &kho_test_state;
|
||||
struct folio **folios __free(kvfree) = NULL;
|
||||
unsigned long max_nr;
|
||||
int err;
|
||||
|
||||
max_mem = PAGE_ALIGN(max_mem);
|
||||
max_nr = max_mem >> PAGE_SHIFT;
|
||||
|
||||
folios = kvmalloc_array(max_nr, sizeof(*state->folios), GFP_KERNEL);
|
||||
if (!folios)
|
||||
return -ENOMEM;
|
||||
state->folios = folios;
|
||||
|
||||
err = kho_test_generate_data(state);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
err = kho_test_prepare_fdt(state);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
return register_kho_notifier(&kho_test_nb);
|
||||
}
|
||||
|
||||
static int kho_test_restore_data(const void *fdt, int node)
|
||||
{
|
||||
const unsigned int *nr_folios;
|
||||
const phys_addr_t *folios_info;
|
||||
const __wsum *old_csum;
|
||||
__wsum csum = 0;
|
||||
int len;
|
||||
|
||||
node = fdt_path_offset(fdt, "/data");
|
||||
|
||||
nr_folios = fdt_getprop(fdt, node, "nr_folios", &len);
|
||||
if (!nr_folios || len != sizeof(*nr_folios))
|
||||
return -EINVAL;
|
||||
|
||||
old_csum = fdt_getprop(fdt, node, "csum", &len);
|
||||
if (!old_csum || len != sizeof(*old_csum))
|
||||
return -EINVAL;
|
||||
|
||||
folios_info = fdt_getprop(fdt, node, "folios_info", &len);
|
||||
if (!folios_info || len != sizeof(*folios_info) * *nr_folios)
|
||||
return -EINVAL;
|
||||
|
||||
for (int i = 0; i < *nr_folios; i++) {
|
||||
unsigned int order = folios_info[i] & ~PAGE_MASK;
|
||||
phys_addr_t phys = folios_info[i] & PAGE_MASK;
|
||||
unsigned int size = PAGE_SIZE << order;
|
||||
struct folio *folio;
|
||||
|
||||
folio = kho_restore_folio(phys);
|
||||
if (!folio)
|
||||
break;
|
||||
|
||||
if (folio_order(folio) != order)
|
||||
break;
|
||||
|
||||
csum = csum_partial(folio_address(folio), size, csum);
|
||||
folio_put(folio);
|
||||
}
|
||||
|
||||
if (csum != *old_csum)
|
||||
return -EINVAL;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int kho_test_restore(phys_addr_t fdt_phys)
|
||||
{
|
||||
void *fdt = phys_to_virt(fdt_phys);
|
||||
const unsigned int *magic;
|
||||
int node, len, err;
|
||||
|
||||
node = fdt_path_offset(fdt, "/");
|
||||
if (node < 0)
|
||||
return -EINVAL;
|
||||
|
||||
if (fdt_node_check_compatible(fdt, node, KHO_TEST_COMPAT))
|
||||
return -EINVAL;
|
||||
|
||||
magic = fdt_getprop(fdt, node, "magic", &len);
|
||||
if (!magic || len != sizeof(*magic))
|
||||
return -EINVAL;
|
||||
|
||||
if (*magic != KHO_TEST_MAGIC)
|
||||
return -EINVAL;
|
||||
|
||||
err = kho_test_restore_data(fdt, node);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
pr_info("KHO restore succeeded\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __init kho_test_init(void)
|
||||
{
|
||||
phys_addr_t fdt_phys;
|
||||
int err;
|
||||
|
||||
err = kho_retrieve_subtree(KHO_TEST_FDT, &fdt_phys);
|
||||
if (!err)
|
||||
return kho_test_restore(fdt_phys);
|
||||
|
||||
if (err != -ENOENT) {
|
||||
pr_warn("failed to retrieve %s FDT: %d\n", KHO_TEST_FDT, err);
|
||||
return err;
|
||||
}
|
||||
|
||||
return kho_test_save();
|
||||
}
|
||||
module_init(kho_test_init);
|
||||
|
||||
static void kho_test_cleanup(void)
|
||||
{
|
||||
for (int i = 0; i < kho_test_state.nr_folios; i++)
|
||||
folio_put(kho_test_state.folios[i]);
|
||||
|
||||
kvfree(kho_test_state.folios);
|
||||
}
|
||||
|
||||
static void __exit kho_test_exit(void)
|
||||
{
|
||||
unregister_kho_notifier(&kho_test_nb);
|
||||
kho_test_cleanup();
|
||||
}
|
||||
module_exit(kho_test_exit);
|
||||
|
||||
MODULE_AUTHOR("Mike Rapoport <rppt@kernel.org>");
|
||||
MODULE_DESCRIPTION("KHO test module");
|
||||
MODULE_LICENSE("GPL");
|
107
lib/xxhash.c
107
lib/xxhash.c
|
@ -267,113 +267,6 @@ void xxh64_reset(struct xxh64_state *statePtr, const uint64_t seed)
|
|||
}
|
||||
EXPORT_SYMBOL(xxh64_reset);
|
||||
|
||||
int xxh32_update(struct xxh32_state *state, const void *input, const size_t len)
|
||||
{
|
||||
const uint8_t *p = (const uint8_t *)input;
|
||||
const uint8_t *const b_end = p + len;
|
||||
|
||||
if (input == NULL)
|
||||
return -EINVAL;
|
||||
|
||||
state->total_len_32 += (uint32_t)len;
|
||||
state->large_len |= (len >= 16) | (state->total_len_32 >= 16);
|
||||
|
||||
if (state->memsize + len < 16) { /* fill in tmp buffer */
|
||||
memcpy((uint8_t *)(state->mem32) + state->memsize, input, len);
|
||||
state->memsize += (uint32_t)len;
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (state->memsize) { /* some data left from previous update */
|
||||
const uint32_t *p32 = state->mem32;
|
||||
|
||||
memcpy((uint8_t *)(state->mem32) + state->memsize, input,
|
||||
16 - state->memsize);
|
||||
|
||||
state->v1 = xxh32_round(state->v1, get_unaligned_le32(p32));
|
||||
p32++;
|
||||
state->v2 = xxh32_round(state->v2, get_unaligned_le32(p32));
|
||||
p32++;
|
||||
state->v3 = xxh32_round(state->v3, get_unaligned_le32(p32));
|
||||
p32++;
|
||||
state->v4 = xxh32_round(state->v4, get_unaligned_le32(p32));
|
||||
p32++;
|
||||
|
||||
p += 16-state->memsize;
|
||||
state->memsize = 0;
|
||||
}
|
||||
|
||||
if (p <= b_end - 16) {
|
||||
const uint8_t *const limit = b_end - 16;
|
||||
uint32_t v1 = state->v1;
|
||||
uint32_t v2 = state->v2;
|
||||
uint32_t v3 = state->v3;
|
||||
uint32_t v4 = state->v4;
|
||||
|
||||
do {
|
||||
v1 = xxh32_round(v1, get_unaligned_le32(p));
|
||||
p += 4;
|
||||
v2 = xxh32_round(v2, get_unaligned_le32(p));
|
||||
p += 4;
|
||||
v3 = xxh32_round(v3, get_unaligned_le32(p));
|
||||
p += 4;
|
||||
v4 = xxh32_round(v4, get_unaligned_le32(p));
|
||||
p += 4;
|
||||
} while (p <= limit);
|
||||
|
||||
state->v1 = v1;
|
||||
state->v2 = v2;
|
||||
state->v3 = v3;
|
||||
state->v4 = v4;
|
||||
}
|
||||
|
||||
if (p < b_end) {
|
||||
memcpy(state->mem32, p, (size_t)(b_end-p));
|
||||
state->memsize = (uint32_t)(b_end-p);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(xxh32_update);
|
||||
|
||||
uint32_t xxh32_digest(const struct xxh32_state *state)
|
||||
{
|
||||
const uint8_t *p = (const uint8_t *)state->mem32;
|
||||
const uint8_t *const b_end = (const uint8_t *)(state->mem32) +
|
||||
state->memsize;
|
||||
uint32_t h32;
|
||||
|
||||
if (state->large_len) {
|
||||
h32 = xxh_rotl32(state->v1, 1) + xxh_rotl32(state->v2, 7) +
|
||||
xxh_rotl32(state->v3, 12) + xxh_rotl32(state->v4, 18);
|
||||
} else {
|
||||
h32 = state->v3 /* == seed */ + PRIME32_5;
|
||||
}
|
||||
|
||||
h32 += state->total_len_32;
|
||||
|
||||
while (p + 4 <= b_end) {
|
||||
h32 += get_unaligned_le32(p) * PRIME32_3;
|
||||
h32 = xxh_rotl32(h32, 17) * PRIME32_4;
|
||||
p += 4;
|
||||
}
|
||||
|
||||
while (p < b_end) {
|
||||
h32 += (*p) * PRIME32_5;
|
||||
h32 = xxh_rotl32(h32, 11) * PRIME32_1;
|
||||
p++;
|
||||
}
|
||||
|
||||
h32 ^= h32 >> 15;
|
||||
h32 *= PRIME32_2;
|
||||
h32 ^= h32 >> 13;
|
||||
h32 *= PRIME32_3;
|
||||
h32 ^= h32 >> 16;
|
||||
|
||||
return h32;
|
||||
}
|
||||
EXPORT_SYMBOL(xxh32_digest);
|
||||
|
||||
int xxh64_update(struct xxh64_state *state, const void *input, const size_t len)
|
||||
{
|
||||
const uint8_t *p = (const uint8_t *)input;
|
||||
|
|
|
@ -54,7 +54,7 @@ config SAMPLE_FTRACE_OPS
|
|||
measures the time taken to invoke one function a number of times.
|
||||
|
||||
config SAMPLE_TRACE_ARRAY
|
||||
tristate "Build sample module for kernel access to Ftrace instancess"
|
||||
tristate "Build sample module for kernel access to Ftrace instances"
|
||||
depends on EVENT_TRACING && m
|
||||
help
|
||||
This builds a module that demonstrates the use of various APIs to
|
||||
|
@ -316,10 +316,9 @@ config SAMPLE_HUNG_TASK
|
|||
depends on DETECT_HUNG_TASK && DEBUG_FS
|
||||
help
|
||||
Build a module that provides debugfs files (e.g., mutex, semaphore,
|
||||
etc.) under <debugfs>/hung_task. If user reads one of these files,
|
||||
it will sleep long time (256 seconds) with holding a lock. Thus,
|
||||
if 2 or more processes read the same file concurrently, it will
|
||||
be detected by the hung_task watchdog.
|
||||
rw_semaphore_read, rw_semaphore_write) under <debugfs>/hung_task.
|
||||
Reading these files with multiple processes triggers hung task
|
||||
detection by holding locks for a long time (256 seconds).
|
||||
|
||||
source "samples/rust/Kconfig"
|
||||
|
||||
|
|
|
@ -4,11 +4,12 @@
|
|||
* semaphore, etc.
|
||||
*
|
||||
* Usage: Load this module and read `<debugfs>/hung_task/mutex`,
|
||||
* `<debugfs>/hung_task/semaphore`, etc., with 2 or more processes.
|
||||
* `<debugfs>/hung_task/semaphore`, `<debugfs>/hung_task/rw_semaphore_read`,
|
||||
* `<debugfs>/hung_task/rw_semaphore_write`, etc., with 2 or more processes.
|
||||
*
|
||||
* This is for testing kernel hung_task error messages with various locking
|
||||
* mechanisms (e.g., mutex, semaphore, etc.). Note that this may freeze
|
||||
* your system or cause a panic. Use only for testing purposes.
|
||||
* mechanisms (e.g., mutex, semaphore, rw_semaphore_read, rw_semaphore_write, etc.).
|
||||
* Note that this may freeze your system or cause a panic. Use only for testing purposes.
|
||||
*/
|
||||
|
||||
#include <linux/debugfs.h>
|
||||
|
@ -17,21 +18,29 @@
|
|||
#include <linux/module.h>
|
||||
#include <linux/mutex.h>
|
||||
#include <linux/semaphore.h>
|
||||
#include <linux/rwsem.h>
|
||||
|
||||
#define HUNG_TASK_DIR "hung_task"
|
||||
#define HUNG_TASK_MUTEX_FILE "mutex"
|
||||
#define HUNG_TASK_SEM_FILE "semaphore"
|
||||
#define HUNG_TASK_RWSEM_READ_FILE "rw_semaphore_read"
|
||||
#define HUNG_TASK_RWSEM_WRITE_FILE "rw_semaphore_write"
|
||||
#define SLEEP_SECOND 256
|
||||
|
||||
static const char dummy_string[] = "This is a dummy string.";
|
||||
static DEFINE_MUTEX(dummy_mutex);
|
||||
static DEFINE_SEMAPHORE(dummy_sem, 1);
|
||||
static DECLARE_RWSEM(dummy_rwsem);
|
||||
static struct dentry *hung_task_dir;
|
||||
|
||||
/* Mutex-based read function */
|
||||
static ssize_t read_dummy_mutex(struct file *file, char __user *user_buf,
|
||||
size_t count, loff_t *ppos)
|
||||
{
|
||||
/* Check if data is already read */
|
||||
if (*ppos >= sizeof(dummy_string))
|
||||
return 0;
|
||||
|
||||
/* Second task waits on mutex, entering uninterruptible sleep */
|
||||
guard(mutex)(&dummy_mutex);
|
||||
|
||||
|
@ -46,6 +55,10 @@ static ssize_t read_dummy_mutex(struct file *file, char __user *user_buf,
|
|||
static ssize_t read_dummy_semaphore(struct file *file, char __user *user_buf,
|
||||
size_t count, loff_t *ppos)
|
||||
{
|
||||
/* Check if data is already read */
|
||||
if (*ppos >= sizeof(dummy_string))
|
||||
return 0;
|
||||
|
||||
/* Second task waits on semaphore, entering uninterruptible sleep */
|
||||
down(&dummy_sem);
|
||||
|
||||
|
@ -58,6 +71,46 @@ static ssize_t read_dummy_semaphore(struct file *file, char __user *user_buf,
|
|||
sizeof(dummy_string));
|
||||
}
|
||||
|
||||
/* Read-write semaphore read function */
|
||||
static ssize_t read_dummy_rwsem_read(struct file *file, char __user *user_buf,
|
||||
size_t count, loff_t *ppos)
|
||||
{
|
||||
/* Check if data is already read */
|
||||
if (*ppos >= sizeof(dummy_string))
|
||||
return 0;
|
||||
|
||||
/* Acquires read lock, allowing concurrent readers but blocks if write lock is held */
|
||||
down_read(&dummy_rwsem);
|
||||
|
||||
/* Sleeps here, potentially triggering hung task detection if lock is held too long */
|
||||
msleep_interruptible(SLEEP_SECOND * 1000);
|
||||
|
||||
up_read(&dummy_rwsem);
|
||||
|
||||
return simple_read_from_buffer(user_buf, count, ppos, dummy_string,
|
||||
sizeof(dummy_string));
|
||||
}
|
||||
|
||||
/* Read-write semaphore write function */
|
||||
static ssize_t read_dummy_rwsem_write(struct file *file, char __user *user_buf,
|
||||
size_t count, loff_t *ppos)
|
||||
{
|
||||
/* Check if data is already read */
|
||||
if (*ppos >= sizeof(dummy_string))
|
||||
return 0;
|
||||
|
||||
/* Acquires exclusive write lock, blocking all other readers and writers */
|
||||
down_write(&dummy_rwsem);
|
||||
|
||||
/* Sleeps here, potentially triggering hung task detection if lock is held too long */
|
||||
msleep_interruptible(SLEEP_SECOND * 1000);
|
||||
|
||||
up_write(&dummy_rwsem);
|
||||
|
||||
return simple_read_from_buffer(user_buf, count, ppos, dummy_string,
|
||||
sizeof(dummy_string));
|
||||
}
|
||||
|
||||
/* File operations for mutex */
|
||||
static const struct file_operations hung_task_mutex_fops = {
|
||||
.read = read_dummy_mutex,
|
||||
|
@ -68,6 +121,16 @@ static const struct file_operations hung_task_sem_fops = {
|
|||
.read = read_dummy_semaphore,
|
||||
};
|
||||
|
||||
/* File operations for rw_semaphore read */
|
||||
static const struct file_operations hung_task_rwsem_read_fops = {
|
||||
.read = read_dummy_rwsem_read,
|
||||
};
|
||||
|
||||
/* File operations for rw_semaphore write */
|
||||
static const struct file_operations hung_task_rwsem_write_fops = {
|
||||
.read = read_dummy_rwsem_write,
|
||||
};
|
||||
|
||||
static int __init hung_task_tests_init(void)
|
||||
{
|
||||
hung_task_dir = debugfs_create_dir(HUNG_TASK_DIR, NULL);
|
||||
|
@ -79,6 +142,10 @@ static int __init hung_task_tests_init(void)
|
|||
&hung_task_mutex_fops);
|
||||
debugfs_create_file(HUNG_TASK_SEM_FILE, 0400, hung_task_dir, NULL,
|
||||
&hung_task_sem_fops);
|
||||
debugfs_create_file(HUNG_TASK_RWSEM_READ_FILE, 0400, hung_task_dir, NULL,
|
||||
&hung_task_rwsem_read_fops);
|
||||
debugfs_create_file(HUNG_TASK_RWSEM_WRITE_FILE, 0400, hung_task_dir, NULL,
|
||||
&hung_task_rwsem_write_fops);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -685,6 +685,9 @@ our $tracing_logging_tags = qr{(?xi:
|
|||
[\.\!:\s]*
|
||||
)};
|
||||
|
||||
# Device ID types like found in include/linux/mod_devicetable.h.
|
||||
our $dev_id_types = qr{\b[a-z]\w*_device_id\b};
|
||||
|
||||
sub edit_distance_min {
|
||||
my (@arr) = @_;
|
||||
my $len = scalar @arr;
|
||||
|
@ -3500,9 +3503,10 @@ sub process {
|
|||
# Check for various typo / spelling mistakes
|
||||
if (defined($misspellings) &&
|
||||
($in_commit_log || $line =~ /^(?:\+|Subject:)/i)) {
|
||||
while ($rawline =~ /(?:^|[^\w\-'`])($misspellings)(?:[^\w\-'`]|$)/gi) {
|
||||
my $rawline_utf8 = decode("utf8", $rawline);
|
||||
while ($rawline_utf8 =~ /(?:^|[^\w\-'`])($misspellings)(?:[^\w\-'`]|$)/gi) {
|
||||
my $typo = $1;
|
||||
my $blank = copy_spacing($rawline);
|
||||
my $blank = copy_spacing($rawline_utf8);
|
||||
my $ptr = substr($blank, 0, $-[1]) . "^" x length($typo);
|
||||
my $hereptr = "$hereline$ptr\n";
|
||||
my $typo_fix = $spelling_fix{lc($typo)};
|
||||
|
@ -7688,6 +7692,31 @@ sub process {
|
|||
WARN("DUPLICATED_SYSCTL_CONST",
|
||||
"duplicated sysctl range checking value '$1', consider using the shared one in include/linux/sysctl.h\n" . $herecurr);
|
||||
}
|
||||
|
||||
# Check that *_device_id tables have sentinel entries.
|
||||
if (defined $stat && $line =~ /struct\s+$dev_id_types\s+\w+\s*\[\s*\]\s*=\s*\{/) {
|
||||
my $stripped = $stat;
|
||||
|
||||
# Strip diff line prefixes.
|
||||
$stripped =~ s/(^|\n)./$1/g;
|
||||
# Line continuations.
|
||||
$stripped =~ s/\\\n/\n/g;
|
||||
# Strip whitespace, empty strings, zeroes, and commas.
|
||||
$stripped =~ s/""//g;
|
||||
$stripped =~ s/0x0//g;
|
||||
$stripped =~ s/[\s$;,0]//g;
|
||||
# Strip field assignments.
|
||||
$stripped =~ s/\.$Ident=//g;
|
||||
|
||||
if (!(substr($stripped, -4) eq "{}};" ||
|
||||
substr($stripped, -6) eq "{{}}};" ||
|
||||
$stripped =~ /ISAPNP_DEVICE_SINGLE_END}};$/ ||
|
||||
$stripped =~ /ISAPNP_CARD_END}};$/ ||
|
||||
$stripped =~ /NULL};$/ ||
|
||||
$stripped =~ /PCMCIA_DEVICE_NULL};$/)) {
|
||||
ERROR("MISSING_SENTINEL", "missing sentinel in ID array\n" . "$here\n$stat\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# If we have no input at all, then there is nothing to report on
|
||||
|
|
|
@ -7,26 +7,65 @@
|
|||
// Confidence: High
|
||||
// Copyright: (C) 2024 Easwar Hariharan, Microsoft
|
||||
// Keywords: secs, seconds, jiffies
|
||||
//
|
||||
// Options: --include-headers
|
||||
|
||||
virtual patch
|
||||
virtual report
|
||||
virtual context
|
||||
|
||||
@depends on patch@ constant C; @@
|
||||
@pconst depends on patch@ constant C; @@
|
||||
|
||||
- msecs_to_jiffies(C * 1000)
|
||||
+ secs_to_jiffies(C)
|
||||
|
||||
@depends on patch@ constant C; @@
|
||||
@pconstms depends on patch@ constant C; @@
|
||||
|
||||
- msecs_to_jiffies(C * MSEC_PER_SEC)
|
||||
+ secs_to_jiffies(C)
|
||||
|
||||
@depends on patch@ expression E; @@
|
||||
@pexpr depends on patch@ expression E; @@
|
||||
|
||||
- msecs_to_jiffies(E * 1000)
|
||||
+ secs_to_jiffies(E)
|
||||
|
||||
@depends on patch@ expression E; @@
|
||||
@pexprms depends on patch@ expression E; @@
|
||||
|
||||
- msecs_to_jiffies(E * MSEC_PER_SEC)
|
||||
+ secs_to_jiffies(E)
|
||||
|
||||
@r depends on report && !patch@
|
||||
constant C;
|
||||
expression E;
|
||||
position p;
|
||||
@@
|
||||
|
||||
(
|
||||
msecs_to_jiffies(C@p * 1000)
|
||||
|
|
||||
msecs_to_jiffies(C@p * MSEC_PER_SEC)
|
||||
|
|
||||
msecs_to_jiffies(E@p * 1000)
|
||||
|
|
||||
msecs_to_jiffies(E@p * MSEC_PER_SEC)
|
||||
)
|
||||
|
||||
@c depends on context && !patch@
|
||||
constant C;
|
||||
expression E;
|
||||
@@
|
||||
|
||||
(
|
||||
* msecs_to_jiffies(C * 1000)
|
||||
|
|
||||
* msecs_to_jiffies(C * MSEC_PER_SEC)
|
||||
|
|
||||
* msecs_to_jiffies(E * 1000)
|
||||
|
|
||||
* msecs_to_jiffies(E * MSEC_PER_SEC)
|
||||
)
|
||||
|
||||
@script:python depends on report@
|
||||
p << r.p;
|
||||
@@
|
||||
|
||||
coccilib.report.print_report(p[0], "WARNING opportunity for secs_to_jiffies()")
|
||||
|
|
|
@ -74,12 +74,12 @@ if IS_BUILTIN(CONFIG_MODULES):
|
|||
LX_GDBPARSED(MOD_RO_AFTER_INIT)
|
||||
|
||||
/* linux/mount.h */
|
||||
LX_VALUE(MNT_NOSUID)
|
||||
LX_VALUE(MNT_NODEV)
|
||||
LX_VALUE(MNT_NOEXEC)
|
||||
LX_VALUE(MNT_NOATIME)
|
||||
LX_VALUE(MNT_NODIRATIME)
|
||||
LX_VALUE(MNT_RELATIME)
|
||||
LX_GDBPARSED(MNT_NOSUID)
|
||||
LX_GDBPARSED(MNT_NODEV)
|
||||
LX_GDBPARSED(MNT_NOEXEC)
|
||||
LX_GDBPARSED(MNT_NOATIME)
|
||||
LX_GDBPARSED(MNT_NODIRATIME)
|
||||
LX_GDBPARSED(MNT_RELATIME)
|
||||
|
||||
/* linux/threads.h */
|
||||
LX_VALUE(NR_CPUS)
|
||||
|
|
|
@ -1099,6 +1099,7 @@ notication||notification
|
|||
notications||notifications
|
||||
notifcations||notifications
|
||||
notifed||notified
|
||||
notifer||notifier
|
||||
notity||notify
|
||||
notfify||notify
|
||||
nubmer||number
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
CC := $(CROSS_COMPILE)gcc
|
||||
CFLAGS := -I../../usr/include
|
||||
|
||||
PROGS := getdelays procacct
|
||||
PROGS := getdelays procacct delaytop
|
||||
|
||||
all: $(PROGS)
|
||||
|
||||
|
|
862
tools/accounting/delaytop.c
Normal file
862
tools/accounting/delaytop.c
Normal file
|
@ -0,0 +1,862 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* delaytop.c - system-wide delay monitoring tool.
|
||||
*
|
||||
* This tool provides real-time monitoring and statistics of
|
||||
* system, container, and task-level delays, including CPU,
|
||||
* memory, IO, and IRQ. It supports both interactive (top-like),
|
||||
* and can output delay information for the whole system, specific
|
||||
* containers (cgroups), or individual tasks (PIDs).
|
||||
*
|
||||
* Key features:
|
||||
* - Collects per-task delay accounting statistics via taskstats.
|
||||
* - Collects system-wide PSI information.
|
||||
* - Supports sorting, filtering.
|
||||
* - Supports both interactive (screen refresh).
|
||||
*
|
||||
* Copyright (C) Fan Yu, ZTE Corp. 2025
|
||||
* Copyright (C) Wang Yaxin, ZTE Corp. 2025
|
||||
*
|
||||
* Compile with
|
||||
* gcc -I/usr/src/linux/include delaytop.c -o delaytop
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <errno.h>
|
||||
#include <unistd.h>
|
||||
#include <fcntl.h>
|
||||
#include <getopt.h>
|
||||
#include <signal.h>
|
||||
#include <time.h>
|
||||
#include <dirent.h>
|
||||
#include <ctype.h>
|
||||
#include <stdbool.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/socket.h>
|
||||
#include <sys/select.h>
|
||||
#include <termios.h>
|
||||
#include <limits.h>
|
||||
#include <linux/genetlink.h>
|
||||
#include <linux/taskstats.h>
|
||||
#include <linux/cgroupstats.h>
|
||||
|
||||
#define PSI_CPU_SOME "/proc/pressure/cpu"
|
||||
#define PSI_CPU_FULL "/proc/pressure/cpu"
|
||||
#define PSI_MEMORY_SOME "/proc/pressure/memory"
|
||||
#define PSI_MEMORY_FULL "/proc/pressure/memory"
|
||||
#define PSI_IO_SOME "/proc/pressure/io"
|
||||
#define PSI_IO_FULL "/proc/pressure/io"
|
||||
#define PSI_IRQ_FULL "/proc/pressure/irq"
|
||||
|
||||
#define NLA_NEXT(na) ((struct nlattr *)((char *)(na) + NLA_ALIGN((na)->nla_len)))
|
||||
#define NLA_DATA(na) ((void *)((char *)(na) + NLA_HDRLEN))
|
||||
#define NLA_PAYLOAD(len) (len - NLA_HDRLEN)
|
||||
|
||||
#define GENLMSG_DATA(glh) ((void *)(NLMSG_DATA(glh) + GENL_HDRLEN))
|
||||
#define GENLMSG_PAYLOAD(glh) (NLMSG_PAYLOAD(glh, 0) - GENL_HDRLEN)
|
||||
|
||||
#define TASK_COMM_LEN 16
|
||||
#define MAX_MSG_SIZE 1024
|
||||
#define MAX_TASKS 1000
|
||||
#define SET_TASK_STAT(task_count, field) tasks[task_count].field = stats.field
|
||||
#define BOOL_FPRINT(stream, fmt, ...) \
|
||||
({ \
|
||||
int ret = fprintf(stream, fmt, ##__VA_ARGS__); \
|
||||
ret >= 0; \
|
||||
})
|
||||
#define PSI_LINE_FORMAT "%-12s %6.1f%%/%6.1f%%/%6.1f%%/%8llu(ms)\n"
|
||||
|
||||
/* Program settings structure */
|
||||
struct config {
|
||||
int delay; /* Update interval in seconds */
|
||||
int iterations; /* Number of iterations, 0 == infinite */
|
||||
int max_processes; /* Maximum number of processes to show */
|
||||
char sort_field; /* Field to sort by */
|
||||
int output_one_time; /* Output once and exit */
|
||||
int monitor_pid; /* Monitor specific PID */
|
||||
char *container_path; /* Path to container cgroup */
|
||||
};
|
||||
|
||||
/* PSI statistics structure */
|
||||
struct psi_stats {
|
||||
double cpu_some_avg10, cpu_some_avg60, cpu_some_avg300;
|
||||
unsigned long long cpu_some_total;
|
||||
double cpu_full_avg10, cpu_full_avg60, cpu_full_avg300;
|
||||
unsigned long long cpu_full_total;
|
||||
double memory_some_avg10, memory_some_avg60, memory_some_avg300;
|
||||
unsigned long long memory_some_total;
|
||||
double memory_full_avg10, memory_full_avg60, memory_full_avg300;
|
||||
unsigned long long memory_full_total;
|
||||
double io_some_avg10, io_some_avg60, io_some_avg300;
|
||||
unsigned long long io_some_total;
|
||||
double io_full_avg10, io_full_avg60, io_full_avg300;
|
||||
unsigned long long io_full_total;
|
||||
double irq_full_avg10, irq_full_avg60, irq_full_avg300;
|
||||
unsigned long long irq_full_total;
|
||||
};
|
||||
|
||||
/* Task delay information structure */
|
||||
struct task_info {
|
||||
int pid;
|
||||
int tgid;
|
||||
char command[TASK_COMM_LEN];
|
||||
unsigned long long cpu_count;
|
||||
unsigned long long cpu_delay_total;
|
||||
unsigned long long blkio_count;
|
||||
unsigned long long blkio_delay_total;
|
||||
unsigned long long swapin_count;
|
||||
unsigned long long swapin_delay_total;
|
||||
unsigned long long freepages_count;
|
||||
unsigned long long freepages_delay_total;
|
||||
unsigned long long thrashing_count;
|
||||
unsigned long long thrashing_delay_total;
|
||||
unsigned long long compact_count;
|
||||
unsigned long long compact_delay_total;
|
||||
unsigned long long wpcopy_count;
|
||||
unsigned long long wpcopy_delay_total;
|
||||
unsigned long long irq_count;
|
||||
unsigned long long irq_delay_total;
|
||||
};
|
||||
|
||||
/* Container statistics structure */
|
||||
struct container_stats {
|
||||
int nr_sleeping; /* Number of sleeping processes */
|
||||
int nr_running; /* Number of running processes */
|
||||
int nr_stopped; /* Number of stopped processes */
|
||||
int nr_uninterruptible; /* Number of uninterruptible processes */
|
||||
int nr_io_wait; /* Number of processes in IO wait */
|
||||
};
|
||||
|
||||
/* Global variables */
|
||||
static struct config cfg;
|
||||
static struct psi_stats psi;
|
||||
static struct task_info tasks[MAX_TASKS];
|
||||
static int task_count;
|
||||
static int running = 1;
|
||||
static struct container_stats container_stats;
|
||||
|
||||
/* Netlink socket variables */
|
||||
static int nl_sd = -1;
|
||||
static int family_id;
|
||||
|
||||
/* Set terminal to non-canonical mode for q-to-quit */
|
||||
static struct termios orig_termios;
|
||||
static void enable_raw_mode(void)
|
||||
{
|
||||
struct termios raw;
|
||||
|
||||
tcgetattr(STDIN_FILENO, &orig_termios);
|
||||
raw = orig_termios;
|
||||
raw.c_lflag &= ~(ICANON | ECHO);
|
||||
tcsetattr(STDIN_FILENO, TCSAFLUSH, &raw);
|
||||
}
|
||||
static void disable_raw_mode(void)
|
||||
{
|
||||
tcsetattr(STDIN_FILENO, TCSAFLUSH, &orig_termios);
|
||||
}
|
||||
|
||||
/* Display usage information and command line options */
|
||||
static void usage(void)
|
||||
{
|
||||
printf("Usage: delaytop [Options]\n"
|
||||
"Options:\n"
|
||||
" -h, --help Show this help message and exit\n"
|
||||
" -d, --delay=SECONDS Set refresh interval (default: 2 seconds, min: 1)\n"
|
||||
" -n, --iterations=COUNT Set number of updates (default: 0 = infinite)\n"
|
||||
" -P, --processes=NUMBER Set maximum number of processes to show (default: 20, max: 1000)\n"
|
||||
" -o, --once Display once and exit\n"
|
||||
" -p, --pid=PID Monitor only the specified PID\n"
|
||||
" -C, --container=PATH Monitor the container at specified cgroup path\n");
|
||||
exit(0);
|
||||
}
|
||||
|
||||
/* Parse command line arguments and set configuration */
|
||||
static void parse_args(int argc, char **argv)
|
||||
{
|
||||
int c;
|
||||
struct option long_options[] = {
|
||||
{"help", no_argument, 0, 'h'},
|
||||
{"delay", required_argument, 0, 'd'},
|
||||
{"iterations", required_argument, 0, 'n'},
|
||||
{"pid", required_argument, 0, 'p'},
|
||||
{"once", no_argument, 0, 'o'},
|
||||
{"processes", required_argument, 0, 'P'},
|
||||
{"container", required_argument, 0, 'C'},
|
||||
{0, 0, 0, 0}
|
||||
};
|
||||
|
||||
/* Set defaults */
|
||||
cfg.delay = 2;
|
||||
cfg.iterations = 0;
|
||||
cfg.max_processes = 20;
|
||||
cfg.sort_field = 'c'; /* Default sort by CPU delay */
|
||||
cfg.output_one_time = 0;
|
||||
cfg.monitor_pid = 0; /* 0 means monitor all PIDs */
|
||||
cfg.container_path = NULL;
|
||||
|
||||
while (1) {
|
||||
int option_index = 0;
|
||||
|
||||
c = getopt_long(argc, argv, "hd:n:p:oP:C:", long_options, &option_index);
|
||||
if (c == -1)
|
||||
break;
|
||||
|
||||
switch (c) {
|
||||
case 'h':
|
||||
usage();
|
||||
break;
|
||||
case 'd':
|
||||
cfg.delay = atoi(optarg);
|
||||
if (cfg.delay < 1) {
|
||||
fprintf(stderr, "Error: delay must be >= 1.\n");
|
||||
exit(1);
|
||||
}
|
||||
break;
|
||||
case 'n':
|
||||
cfg.iterations = atoi(optarg);
|
||||
if (cfg.iterations < 0) {
|
||||
fprintf(stderr, "Error: iterations must be >= 0.\n");
|
||||
exit(1);
|
||||
}
|
||||
break;
|
||||
case 'p':
|
||||
cfg.monitor_pid = atoi(optarg);
|
||||
if (cfg.monitor_pid < 1) {
|
||||
fprintf(stderr, "Error: pid must be >= 1.\n");
|
||||
exit(1);
|
||||
}
|
||||
break;
|
||||
case 'o':
|
||||
cfg.output_one_time = 1;
|
||||
break;
|
||||
case 'P':
|
||||
cfg.max_processes = atoi(optarg);
|
||||
if (cfg.max_processes < 1) {
|
||||
fprintf(stderr, "Error: processes must be >= 1.\n");
|
||||
exit(1);
|
||||
}
|
||||
if (cfg.max_processes > MAX_TASKS) {
|
||||
fprintf(stderr, "Warning: processes capped to %d.\n",
|
||||
MAX_TASKS);
|
||||
cfg.max_processes = MAX_TASKS;
|
||||
}
|
||||
break;
|
||||
case 'C':
|
||||
cfg.container_path = strdup(optarg);
|
||||
break;
|
||||
default:
|
||||
fprintf(stderr, "Try 'delaytop --help' for more information.\n");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Create a raw netlink socket and bind */
|
||||
static int create_nl_socket(void)
|
||||
{
|
||||
int fd;
|
||||
struct sockaddr_nl local;
|
||||
|
||||
fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC);
|
||||
if (fd < 0)
|
||||
return -1;
|
||||
|
||||
memset(&local, 0, sizeof(local));
|
||||
local.nl_family = AF_NETLINK;
|
||||
|
||||
if (bind(fd, (struct sockaddr *) &local, sizeof(local)) < 0) {
|
||||
fprintf(stderr, "Failed to bind socket when create nl_socket\n");
|
||||
close(fd);
|
||||
return -1;
|
||||
}
|
||||
|
||||
return fd;
|
||||
}
|
||||
|
||||
/* Send a command via netlink */
|
||||
static int send_cmd(int sd, __u16 nlmsg_type, __u32 nlmsg_pid,
|
||||
__u8 genl_cmd, __u16 nla_type,
|
||||
void *nla_data, int nla_len)
|
||||
{
|
||||
struct sockaddr_nl nladdr;
|
||||
struct nlattr *na;
|
||||
int r, buflen;
|
||||
char *buf;
|
||||
|
||||
struct {
|
||||
struct nlmsghdr n;
|
||||
struct genlmsghdr g;
|
||||
char buf[MAX_MSG_SIZE];
|
||||
} msg;
|
||||
|
||||
msg.n.nlmsg_len = NLMSG_LENGTH(GENL_HDRLEN);
|
||||
msg.n.nlmsg_type = nlmsg_type;
|
||||
msg.n.nlmsg_flags = NLM_F_REQUEST;
|
||||
msg.n.nlmsg_seq = 0;
|
||||
msg.n.nlmsg_pid = nlmsg_pid;
|
||||
msg.g.cmd = genl_cmd;
|
||||
msg.g.version = 0x1;
|
||||
na = (struct nlattr *) GENLMSG_DATA(&msg);
|
||||
na->nla_type = nla_type;
|
||||
na->nla_len = nla_len + NLA_HDRLEN;
|
||||
memcpy(NLA_DATA(na), nla_data, nla_len);
|
||||
msg.n.nlmsg_len += NLMSG_ALIGN(na->nla_len);
|
||||
|
||||
buf = (char *) &msg;
|
||||
buflen = msg.n.nlmsg_len;
|
||||
memset(&nladdr, 0, sizeof(nladdr));
|
||||
nladdr.nl_family = AF_NETLINK;
|
||||
while ((r = sendto(sd, buf, buflen, 0, (struct sockaddr *) &nladdr,
|
||||
sizeof(nladdr))) < buflen) {
|
||||
if (r > 0) {
|
||||
buf += r;
|
||||
buflen -= r;
|
||||
} else if (errno != EAGAIN)
|
||||
return -1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Get family ID for taskstats via netlink */
|
||||
static int get_family_id(int sd)
|
||||
{
|
||||
struct {
|
||||
struct nlmsghdr n;
|
||||
struct genlmsghdr g;
|
||||
char buf[256];
|
||||
} ans;
|
||||
|
||||
int id = 0, rc;
|
||||
struct nlattr *na;
|
||||
int rep_len;
|
||||
char name[100];
|
||||
|
||||
strncpy(name, TASKSTATS_GENL_NAME, sizeof(name) - 1);
|
||||
name[sizeof(name) - 1] = '\0';
|
||||
rc = send_cmd(sd, GENL_ID_CTRL, getpid(), CTRL_CMD_GETFAMILY,
|
||||
CTRL_ATTR_FAMILY_NAME, (void *)name,
|
||||
strlen(TASKSTATS_GENL_NAME)+1);
|
||||
if (rc < 0) {
|
||||
fprintf(stderr, "Failed to send cmd for family id\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
rep_len = recv(sd, &ans, sizeof(ans), 0);
|
||||
if (ans.n.nlmsg_type == NLMSG_ERROR ||
|
||||
(rep_len < 0) || !NLMSG_OK((&ans.n), rep_len)) {
|
||||
fprintf(stderr, "Failed to receive response for family id\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
na = (struct nlattr *) GENLMSG_DATA(&ans);
|
||||
na = (struct nlattr *) ((char *) na + NLA_ALIGN(na->nla_len));
|
||||
if (na->nla_type == CTRL_ATTR_FAMILY_ID)
|
||||
id = *(__u16 *) NLA_DATA(na);
|
||||
return id;
|
||||
}
|
||||
|
||||
static void read_psi_stats(void)
|
||||
{
|
||||
FILE *fp;
|
||||
char line[256];
|
||||
int ret = 0;
|
||||
/* Zero all fields */
|
||||
memset(&psi, 0, sizeof(psi));
|
||||
/* CPU pressure */
|
||||
fp = fopen(PSI_CPU_SOME, "r");
|
||||
if (fp) {
|
||||
while (fgets(line, sizeof(line), fp)) {
|
||||
if (strncmp(line, "some", 4) == 0) {
|
||||
ret = sscanf(line, "some avg10=%lf avg60=%lf avg300=%lf total=%llu",
|
||||
&psi.cpu_some_avg10, &psi.cpu_some_avg60,
|
||||
&psi.cpu_some_avg300, &psi.cpu_some_total);
|
||||
if (ret != 4)
|
||||
fprintf(stderr, "Failed to parse CPU some PSI data\n");
|
||||
} else if (strncmp(line, "full", 4) == 0) {
|
||||
ret = sscanf(line, "full avg10=%lf avg60=%lf avg300=%lf total=%llu",
|
||||
&psi.cpu_full_avg10, &psi.cpu_full_avg60,
|
||||
&psi.cpu_full_avg300, &psi.cpu_full_total);
|
||||
if (ret != 4)
|
||||
fprintf(stderr, "Failed to parse CPU full PSI data\n");
|
||||
}
|
||||
}
|
||||
fclose(fp);
|
||||
}
|
||||
/* Memory pressure */
|
||||
fp = fopen(PSI_MEMORY_SOME, "r");
|
||||
if (fp) {
|
||||
while (fgets(line, sizeof(line), fp)) {
|
||||
if (strncmp(line, "some", 4) == 0) {
|
||||
ret = sscanf(line, "some avg10=%lf avg60=%lf avg300=%lf total=%llu",
|
||||
&psi.memory_some_avg10, &psi.memory_some_avg60,
|
||||
&psi.memory_some_avg300, &psi.memory_some_total);
|
||||
if (ret != 4)
|
||||
fprintf(stderr, "Failed to parse Memory some PSI data\n");
|
||||
} else if (strncmp(line, "full", 4) == 0) {
|
||||
ret = sscanf(line, "full avg10=%lf avg60=%lf avg300=%lf total=%llu",
|
||||
&psi.memory_full_avg10, &psi.memory_full_avg60,
|
||||
&psi.memory_full_avg300, &psi.memory_full_total);
|
||||
}
|
||||
if (ret != 4)
|
||||
fprintf(stderr, "Failed to parse Memory full PSI data\n");
|
||||
}
|
||||
fclose(fp);
|
||||
}
|
||||
/* IO pressure */
|
||||
fp = fopen(PSI_IO_SOME, "r");
|
||||
if (fp) {
|
||||
while (fgets(line, sizeof(line), fp)) {
|
||||
if (strncmp(line, "some", 4) == 0) {
|
||||
ret = sscanf(line, "some avg10=%lf avg60=%lf avg300=%lf total=%llu",
|
||||
&psi.io_some_avg10, &psi.io_some_avg60,
|
||||
&psi.io_some_avg300, &psi.io_some_total);
|
||||
if (ret != 4)
|
||||
fprintf(stderr, "Failed to parse IO some PSI data\n");
|
||||
} else if (strncmp(line, "full", 4) == 0) {
|
||||
ret = sscanf(line, "full avg10=%lf avg60=%lf avg300=%lf total=%llu",
|
||||
&psi.io_full_avg10, &psi.io_full_avg60,
|
||||
&psi.io_full_avg300, &psi.io_full_total);
|
||||
if (ret != 4)
|
||||
fprintf(stderr, "Failed to parse IO full PSI data\n");
|
||||
}
|
||||
}
|
||||
fclose(fp);
|
||||
}
|
||||
/* IRQ pressure (only full) */
|
||||
fp = fopen(PSI_IRQ_FULL, "r");
|
||||
if (fp) {
|
||||
while (fgets(line, sizeof(line), fp)) {
|
||||
if (strncmp(line, "full", 4) == 0) {
|
||||
ret = sscanf(line, "full avg10=%lf avg60=%lf avg300=%lf total=%llu",
|
||||
&psi.irq_full_avg10, &psi.irq_full_avg60,
|
||||
&psi.irq_full_avg300, &psi.irq_full_total);
|
||||
if (ret != 4)
|
||||
fprintf(stderr, "Failed to parse IRQ full PSI data\n");
|
||||
}
|
||||
}
|
||||
fclose(fp);
|
||||
}
|
||||
}
|
||||
|
||||
static int read_comm(int pid, char *comm_buf, size_t buf_size)
|
||||
{
|
||||
char path[64];
|
||||
int ret = -1;
|
||||
size_t len;
|
||||
FILE *fp;
|
||||
|
||||
snprintf(path, sizeof(path), "/proc/%d/comm", pid);
|
||||
fp = fopen(path, "r");
|
||||
if (!fp) {
|
||||
fprintf(stderr, "Failed to open comm file /proc/%d/comm\n", pid);
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (fgets(comm_buf, buf_size, fp)) {
|
||||
len = strlen(comm_buf);
|
||||
if (len > 0 && comm_buf[len - 1] == '\n')
|
||||
comm_buf[len - 1] = '\0';
|
||||
ret = 0;
|
||||
}
|
||||
|
||||
fclose(fp);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void fetch_and_fill_task_info(int pid, const char *comm)
|
||||
{
|
||||
struct {
|
||||
struct nlmsghdr n;
|
||||
struct genlmsghdr g;
|
||||
char buf[MAX_MSG_SIZE];
|
||||
} resp;
|
||||
struct taskstats stats;
|
||||
struct nlattr *nested;
|
||||
struct nlattr *na;
|
||||
int nested_len;
|
||||
int nl_len;
|
||||
int rc;
|
||||
|
||||
/* Send request for task stats */
|
||||
if (send_cmd(nl_sd, family_id, getpid(), TASKSTATS_CMD_GET,
|
||||
TASKSTATS_CMD_ATTR_PID, &pid, sizeof(pid)) < 0) {
|
||||
fprintf(stderr, "Failed to send request for task stats\n");
|
||||
return;
|
||||
}
|
||||
|
||||
/* Receive response */
|
||||
rc = recv(nl_sd, &resp, sizeof(resp), 0);
|
||||
if (rc < 0 || resp.n.nlmsg_type == NLMSG_ERROR) {
|
||||
fprintf(stderr, "Failed to receive response for task stats\n");
|
||||
return;
|
||||
}
|
||||
|
||||
/* Parse response */
|
||||
nl_len = GENLMSG_PAYLOAD(&resp.n);
|
||||
na = (struct nlattr *) GENLMSG_DATA(&resp);
|
||||
while (nl_len > 0) {
|
||||
if (na->nla_type == TASKSTATS_TYPE_AGGR_PID) {
|
||||
nested = (struct nlattr *) NLA_DATA(na);
|
||||
nested_len = NLA_PAYLOAD(na->nla_len);
|
||||
while (nested_len > 0) {
|
||||
if (nested->nla_type == TASKSTATS_TYPE_STATS) {
|
||||
memcpy(&stats, NLA_DATA(nested), sizeof(stats));
|
||||
if (task_count < MAX_TASKS) {
|
||||
tasks[task_count].pid = pid;
|
||||
tasks[task_count].tgid = pid;
|
||||
strncpy(tasks[task_count].command, comm,
|
||||
TASK_COMM_LEN - 1);
|
||||
tasks[task_count].command[TASK_COMM_LEN - 1] = '\0';
|
||||
SET_TASK_STAT(task_count, cpu_count);
|
||||
SET_TASK_STAT(task_count, cpu_delay_total);
|
||||
SET_TASK_STAT(task_count, blkio_count);
|
||||
SET_TASK_STAT(task_count, blkio_delay_total);
|
||||
SET_TASK_STAT(task_count, swapin_count);
|
||||
SET_TASK_STAT(task_count, swapin_delay_total);
|
||||
SET_TASK_STAT(task_count, freepages_count);
|
||||
SET_TASK_STAT(task_count, freepages_delay_total);
|
||||
SET_TASK_STAT(task_count, thrashing_count);
|
||||
SET_TASK_STAT(task_count, thrashing_delay_total);
|
||||
SET_TASK_STAT(task_count, compact_count);
|
||||
SET_TASK_STAT(task_count, compact_delay_total);
|
||||
SET_TASK_STAT(task_count, wpcopy_count);
|
||||
SET_TASK_STAT(task_count, wpcopy_delay_total);
|
||||
SET_TASK_STAT(task_count, irq_count);
|
||||
SET_TASK_STAT(task_count, irq_delay_total);
|
||||
task_count++;
|
||||
}
|
||||
break;
|
||||
}
|
||||
nested_len -= NLA_ALIGN(nested->nla_len);
|
||||
nested = NLA_NEXT(nested);
|
||||
}
|
||||
}
|
||||
nl_len -= NLA_ALIGN(na->nla_len);
|
||||
na = NLA_NEXT(na);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
static void get_task_delays(void)
|
||||
{
|
||||
char comm[TASK_COMM_LEN];
|
||||
struct dirent *entry;
|
||||
DIR *dir;
|
||||
int pid;
|
||||
|
||||
task_count = 0;
|
||||
if (cfg.monitor_pid > 0) {
|
||||
if (read_comm(cfg.monitor_pid, comm, sizeof(comm)) == 0)
|
||||
fetch_and_fill_task_info(cfg.monitor_pid, comm);
|
||||
return;
|
||||
}
|
||||
|
||||
dir = opendir("/proc");
|
||||
if (!dir) {
|
||||
fprintf(stderr, "Error opening /proc directory\n");
|
||||
return;
|
||||
}
|
||||
|
||||
while ((entry = readdir(dir)) != NULL && task_count < MAX_TASKS) {
|
||||
if (!isdigit(entry->d_name[0]))
|
||||
continue;
|
||||
pid = atoi(entry->d_name);
|
||||
if (pid == 0)
|
||||
continue;
|
||||
if (read_comm(pid, comm, sizeof(comm)) != 0)
|
||||
continue;
|
||||
fetch_and_fill_task_info(pid, comm);
|
||||
}
|
||||
closedir(dir);
|
||||
}
|
||||
|
||||
/* Calculate average delay in milliseconds */
|
||||
static double average_ms(unsigned long long total, unsigned long long count)
|
||||
{
|
||||
if (count == 0)
|
||||
return 0;
|
||||
return (double)total / 1000000.0 / count;
|
||||
}
|
||||
|
||||
/* Comparison function for sorting tasks */
|
||||
static int compare_tasks(const void *a, const void *b)
|
||||
{
|
||||
const struct task_info *t1 = (const struct task_info *)a;
|
||||
const struct task_info *t2 = (const struct task_info *)b;
|
||||
double avg1, avg2;
|
||||
|
||||
switch (cfg.sort_field) {
|
||||
case 'c': /* CPU */
|
||||
avg1 = average_ms(t1->cpu_delay_total, t1->cpu_count);
|
||||
avg2 = average_ms(t2->cpu_delay_total, t2->cpu_count);
|
||||
if (avg1 != avg2)
|
||||
return avg2 > avg1 ? 1 : -1;
|
||||
return t2->cpu_delay_total > t1->cpu_delay_total ? 1 : -1;
|
||||
|
||||
default:
|
||||
return t2->cpu_delay_total > t1->cpu_delay_total ? 1 : -1;
|
||||
}
|
||||
}
|
||||
|
||||
/* Sort tasks by selected field */
|
||||
static void sort_tasks(void)
|
||||
{
|
||||
if (task_count > 0)
|
||||
qsort(tasks, task_count, sizeof(struct task_info), compare_tasks);
|
||||
}
|
||||
|
||||
/* Get container statistics via cgroupstats */
|
||||
static void get_container_stats(void)
|
||||
{
|
||||
int rc, cfd;
|
||||
struct {
|
||||
struct nlmsghdr n;
|
||||
struct genlmsghdr g;
|
||||
char buf[MAX_MSG_SIZE];
|
||||
} req, resp;
|
||||
struct nlattr *na;
|
||||
int nl_len;
|
||||
struct cgroupstats stats;
|
||||
|
||||
/* Check if container path is set */
|
||||
if (!cfg.container_path)
|
||||
return;
|
||||
|
||||
/* Open container cgroup */
|
||||
cfd = open(cfg.container_path, O_RDONLY);
|
||||
if (cfd < 0) {
|
||||
fprintf(stderr, "Error opening container path: %s\n", cfg.container_path);
|
||||
return;
|
||||
}
|
||||
|
||||
/* Send request for container stats */
|
||||
if (send_cmd(nl_sd, family_id, getpid(), CGROUPSTATS_CMD_GET,
|
||||
CGROUPSTATS_CMD_ATTR_FD, &cfd, sizeof(__u32)) < 0) {
|
||||
fprintf(stderr, "Failed to send request for container stats\n");
|
||||
close(cfd);
|
||||
return;
|
||||
}
|
||||
|
||||
/* Receive response */
|
||||
rc = recv(nl_sd, &resp, sizeof(resp), 0);
|
||||
if (rc < 0 || resp.n.nlmsg_type == NLMSG_ERROR) {
|
||||
fprintf(stderr, "Failed to receive response for container stats\n");
|
||||
close(cfd);
|
||||
return;
|
||||
}
|
||||
|
||||
/* Parse response */
|
||||
nl_len = GENLMSG_PAYLOAD(&resp.n);
|
||||
na = (struct nlattr *) GENLMSG_DATA(&resp);
|
||||
while (nl_len > 0) {
|
||||
if (na->nla_type == CGROUPSTATS_TYPE_CGROUP_STATS) {
|
||||
/* Get the cgroupstats structure */
|
||||
memcpy(&stats, NLA_DATA(na), sizeof(stats));
|
||||
|
||||
/* Fill container stats */
|
||||
container_stats.nr_sleeping = stats.nr_sleeping;
|
||||
container_stats.nr_running = stats.nr_running;
|
||||
container_stats.nr_stopped = stats.nr_stopped;
|
||||
container_stats.nr_uninterruptible = stats.nr_uninterruptible;
|
||||
container_stats.nr_io_wait = stats.nr_io_wait;
|
||||
break;
|
||||
}
|
||||
nl_len -= NLA_ALIGN(na->nla_len);
|
||||
na = (struct nlattr *) ((char *) na + NLA_ALIGN(na->nla_len));
|
||||
}
|
||||
|
||||
close(cfd);
|
||||
}
|
||||
|
||||
/* Display results to stdout or log file */
|
||||
static void display_results(void)
|
||||
{
|
||||
time_t now = time(NULL);
|
||||
struct tm *tm_now = localtime(&now);
|
||||
FILE *out = stdout;
|
||||
char timestamp[32];
|
||||
bool suc = true;
|
||||
int i, count;
|
||||
|
||||
/* Clear terminal screen */
|
||||
suc &= BOOL_FPRINT(out, "\033[H\033[J");
|
||||
|
||||
/* PSI output (one-line, no cat style) */
|
||||
suc &= BOOL_FPRINT(out, "System Pressure Information: (avg10/avg60/avg300/total)\n");
|
||||
suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT,
|
||||
"CPU some:",
|
||||
psi.cpu_some_avg10,
|
||||
psi.cpu_some_avg60,
|
||||
psi.cpu_some_avg300,
|
||||
psi.cpu_some_total / 1000);
|
||||
suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT,
|
||||
"CPU full:",
|
||||
psi.cpu_full_avg10,
|
||||
psi.cpu_full_avg60,
|
||||
psi.cpu_full_avg300,
|
||||
psi.cpu_full_total / 1000);
|
||||
suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT,
|
||||
"Memory full:",
|
||||
psi.memory_full_avg10,
|
||||
psi.memory_full_avg60,
|
||||
psi.memory_full_avg300,
|
||||
psi.memory_full_total / 1000);
|
||||
suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT,
|
||||
"Memory some:",
|
||||
psi.memory_some_avg10,
|
||||
psi.memory_some_avg60,
|
||||
psi.memory_some_avg300,
|
||||
psi.memory_some_total / 1000);
|
||||
suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT,
|
||||
"IO full:",
|
||||
psi.io_full_avg10,
|
||||
psi.io_full_avg60,
|
||||
psi.io_full_avg300,
|
||||
psi.io_full_total / 1000);
|
||||
suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT,
|
||||
"IO some:",
|
||||
psi.io_some_avg10,
|
||||
psi.io_some_avg60,
|
||||
psi.io_some_avg300,
|
||||
psi.io_some_total / 1000);
|
||||
suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT,
|
||||
"IRQ full:",
|
||||
psi.irq_full_avg10,
|
||||
psi.irq_full_avg60,
|
||||
psi.irq_full_avg300,
|
||||
psi.irq_full_total / 1000);
|
||||
|
||||
if (cfg.container_path) {
|
||||
suc &= BOOL_FPRINT(out, "Container Information (%s):\n", cfg.container_path);
|
||||
suc &= BOOL_FPRINT(out, "Processes: running=%d, sleeping=%d, ",
|
||||
container_stats.nr_running, container_stats.nr_sleeping);
|
||||
suc &= BOOL_FPRINT(out, "stopped=%d, uninterruptible=%d, io_wait=%d\n\n",
|
||||
container_stats.nr_stopped, container_stats.nr_uninterruptible,
|
||||
container_stats.nr_io_wait);
|
||||
}
|
||||
suc &= BOOL_FPRINT(out, "Top %d processes (sorted by CPU delay):\n",
|
||||
cfg.max_processes);
|
||||
suc &= BOOL_FPRINT(out, "%5s %5s %-17s", "PID", "TGID", "COMMAND");
|
||||
suc &= BOOL_FPRINT(out, "%7s %7s %7s %7s %7s %7s %7s %7s\n",
|
||||
"CPU(ms)", "IO(ms)", "SWAP(ms)", "RCL(ms)",
|
||||
"THR(ms)", "CMP(ms)", "WP(ms)", "IRQ(ms)");
|
||||
|
||||
suc &= BOOL_FPRINT(out, "-----------------------------------------------");
|
||||
suc &= BOOL_FPRINT(out, "----------------------------------------------\n");
|
||||
count = task_count < cfg.max_processes ? task_count : cfg.max_processes;
|
||||
|
||||
for (i = 0; i < count; i++) {
|
||||
suc &= BOOL_FPRINT(out, "%5d %5d %-15s",
|
||||
tasks[i].pid, tasks[i].tgid, tasks[i].command);
|
||||
suc &= BOOL_FPRINT(out, "%7.2f %7.2f %7.2f %7.2f %7.2f %7.2f %7.2f %7.2f\n",
|
||||
average_ms(tasks[i].cpu_delay_total, tasks[i].cpu_count),
|
||||
average_ms(tasks[i].blkio_delay_total, tasks[i].blkio_count),
|
||||
average_ms(tasks[i].swapin_delay_total, tasks[i].swapin_count),
|
||||
average_ms(tasks[i].freepages_delay_total, tasks[i].freepages_count),
|
||||
average_ms(tasks[i].thrashing_delay_total, tasks[i].thrashing_count),
|
||||
average_ms(tasks[i].compact_delay_total, tasks[i].compact_count),
|
||||
average_ms(tasks[i].wpcopy_delay_total, tasks[i].wpcopy_count),
|
||||
average_ms(tasks[i].irq_delay_total, tasks[i].irq_count));
|
||||
}
|
||||
|
||||
suc &= BOOL_FPRINT(out, "\n");
|
||||
|
||||
if (!suc)
|
||||
perror("Error writing to output");
|
||||
}
|
||||
|
||||
/* Main function */
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
int iterations = 0;
|
||||
int use_q_quit = 0;
|
||||
|
||||
/* Parse command line arguments */
|
||||
parse_args(argc, argv);
|
||||
|
||||
/* Setup netlink socket */
|
||||
nl_sd = create_nl_socket();
|
||||
if (nl_sd < 0) {
|
||||
fprintf(stderr, "Error creating netlink socket\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
/* Get family ID for taskstats via netlink */
|
||||
family_id = get_family_id(nl_sd);
|
||||
if (!family_id) {
|
||||
fprintf(stderr, "Error getting taskstats family ID\n");
|
||||
close(nl_sd);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (!cfg.output_one_time) {
|
||||
use_q_quit = 1;
|
||||
enable_raw_mode();
|
||||
printf("Press 'q' to quit.\n");
|
||||
fflush(stdout);
|
||||
}
|
||||
|
||||
/* Main loop */
|
||||
while (running) {
|
||||
/* Read PSI statistics */
|
||||
read_psi_stats();
|
||||
|
||||
/* Get container stats if container path provided */
|
||||
if (cfg.container_path)
|
||||
get_container_stats();
|
||||
|
||||
/* Get task delays */
|
||||
get_task_delays();
|
||||
|
||||
/* Sort tasks */
|
||||
sort_tasks();
|
||||
|
||||
/* Display results to stdout or log file */
|
||||
display_results();
|
||||
|
||||
/* Check for iterations */
|
||||
if (cfg.iterations > 0 && ++iterations >= cfg.iterations)
|
||||
break;
|
||||
|
||||
/* Exit if output_one_time is set */
|
||||
if (cfg.output_one_time)
|
||||
break;
|
||||
|
||||
/* Check for 'q' key to quit */
|
||||
if (use_q_quit) {
|
||||
struct timeval tv = {cfg.delay, 0};
|
||||
fd_set readfds;
|
||||
|
||||
FD_ZERO(&readfds);
|
||||
FD_SET(STDIN_FILENO, &readfds);
|
||||
int r = select(STDIN_FILENO+1, &readfds, NULL, NULL, &tv);
|
||||
|
||||
if (r > 0 && FD_ISSET(STDIN_FILENO, &readfds)) {
|
||||
char ch = 0;
|
||||
|
||||
read(STDIN_FILENO, &ch, 1);
|
||||
if (ch == 'q' || ch == 'Q') {
|
||||
running = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
sleep(cfg.delay);
|
||||
}
|
||||
}
|
||||
|
||||
/* Restore terminal mode */
|
||||
if (use_q_quit)
|
||||
disable_raw_mode();
|
||||
|
||||
/* Cleanup */
|
||||
close(nl_sd);
|
||||
if (cfg.container_path)
|
||||
free(cfg.container_path);
|
||||
|
||||
return 0;
|
||||
}
|
|
@ -194,75 +194,108 @@ static int get_family_id(int sd)
|
|||
#define average_ms(t, c) (t / 1000000ULL / (c ? c : 1))
|
||||
#define delay_ms(t) (t / 1000000ULL)
|
||||
|
||||
/*
|
||||
* Version compatibility note:
|
||||
* Field availability depends on taskstats version (t->version),
|
||||
* corresponding to TASKSTATS_VERSION in kernel headers
|
||||
* see include/uapi/linux/taskstats.h
|
||||
*
|
||||
* Version feature mapping:
|
||||
* version >= 11 - supports COMPACT statistics
|
||||
* version >= 13 - supports WPCOPY statistics
|
||||
* version >= 14 - supports IRQ statistics
|
||||
* version >= 16 - supports *_max and *_min delay statistics
|
||||
*
|
||||
* Always verify version before accessing version-dependent fields
|
||||
* to maintain backward compatibility.
|
||||
*/
|
||||
#define PRINT_CPU_DELAY(version, t) \
|
||||
do { \
|
||||
if (version >= 16) { \
|
||||
printf("%-10s%15s%15s%15s%15s%15s%15s%15s\n", \
|
||||
"CPU", "count", "real total", "virtual total", \
|
||||
"delay total", "delay average", "delay max", "delay min"); \
|
||||
printf(" %15llu%15llu%15llu%15llu%15.3fms%13.6fms%13.6fms\n", \
|
||||
(unsigned long long)(t)->cpu_count, \
|
||||
(unsigned long long)(t)->cpu_run_real_total, \
|
||||
(unsigned long long)(t)->cpu_run_virtual_total, \
|
||||
(unsigned long long)(t)->cpu_delay_total, \
|
||||
average_ms((double)(t)->cpu_delay_total, (t)->cpu_count), \
|
||||
delay_ms((double)(t)->cpu_delay_max), \
|
||||
delay_ms((double)(t)->cpu_delay_min)); \
|
||||
} else { \
|
||||
printf("%-10s%15s%15s%15s%15s%15s\n", \
|
||||
"CPU", "count", "real total", "virtual total", \
|
||||
"delay total", "delay average"); \
|
||||
printf(" %15llu%15llu%15llu%15llu%15.3fms\n", \
|
||||
(unsigned long long)(t)->cpu_count, \
|
||||
(unsigned long long)(t)->cpu_run_real_total, \
|
||||
(unsigned long long)(t)->cpu_run_virtual_total, \
|
||||
(unsigned long long)(t)->cpu_delay_total, \
|
||||
average_ms((double)(t)->cpu_delay_total, (t)->cpu_count)); \
|
||||
} \
|
||||
} while (0)
|
||||
#define PRINT_FILED_DELAY(name, version, t, count, total, max, min) \
|
||||
do { \
|
||||
if (version >= 16) { \
|
||||
printf("%-10s%15s%15s%15s%15s%15s\n", \
|
||||
name, "count", "delay total", "delay average", \
|
||||
"delay max", "delay min"); \
|
||||
printf(" %15llu%15llu%15.3fms%13.6fms%13.6fms\n", \
|
||||
(unsigned long long)(t)->count, \
|
||||
(unsigned long long)(t)->total, \
|
||||
average_ms((double)(t)->total, (t)->count), \
|
||||
delay_ms((double)(t)->max), \
|
||||
delay_ms((double)(t)->min)); \
|
||||
} else { \
|
||||
printf("%-10s%15s%15s%15s\n", \
|
||||
name, "count", "delay total", "delay average"); \
|
||||
printf(" %15llu%15llu%15.3fms\n", \
|
||||
(unsigned long long)(t)->count, \
|
||||
(unsigned long long)(t)->total, \
|
||||
average_ms((double)(t)->total, (t)->count)); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
static void print_delayacct(struct taskstats *t)
|
||||
{
|
||||
printf("\n\nCPU %15s%15s%15s%15s%15s%15s%15s\n"
|
||||
" %15llu%15llu%15llu%15llu%15.3fms%13.6fms%13.6fms\n"
|
||||
"IO %15s%15s%15s%15s%15s\n"
|
||||
" %15llu%15llu%15.3fms%13.6fms%13.6fms\n"
|
||||
"SWAP %15s%15s%15s%15s%15s\n"
|
||||
" %15llu%15llu%15.3fms%13.6fms%13.6fms\n"
|
||||
"RECLAIM %12s%15s%15s%15s%15s\n"
|
||||
" %15llu%15llu%15.3fms%13.6fms%13.6fms\n"
|
||||
"THRASHING%12s%15s%15s%15s%15s\n"
|
||||
" %15llu%15llu%15.3fms%13.6fms%13.6fms\n"
|
||||
"COMPACT %12s%15s%15s%15s%15s\n"
|
||||
" %15llu%15llu%15.3fms%13.6fms%13.6fms\n"
|
||||
"WPCOPY %12s%15s%15s%15s%15s\n"
|
||||
" %15llu%15llu%15.3fms%13.6fms%13.6fms\n"
|
||||
"IRQ %15s%15s%15s%15s%15s\n"
|
||||
" %15llu%15llu%15.3fms%13.6fms%13.6fms\n",
|
||||
"count", "real total", "virtual total",
|
||||
"delay total", "delay average", "delay max", "delay min",
|
||||
(unsigned long long)t->cpu_count,
|
||||
(unsigned long long)t->cpu_run_real_total,
|
||||
(unsigned long long)t->cpu_run_virtual_total,
|
||||
(unsigned long long)t->cpu_delay_total,
|
||||
average_ms((double)t->cpu_delay_total, t->cpu_count),
|
||||
delay_ms((double)t->cpu_delay_max),
|
||||
delay_ms((double)t->cpu_delay_min),
|
||||
"count", "delay total", "delay average", "delay max", "delay min",
|
||||
(unsigned long long)t->blkio_count,
|
||||
(unsigned long long)t->blkio_delay_total,
|
||||
average_ms((double)t->blkio_delay_total, t->blkio_count),
|
||||
delay_ms((double)t->blkio_delay_max),
|
||||
delay_ms((double)t->blkio_delay_min),
|
||||
"count", "delay total", "delay average", "delay max", "delay min",
|
||||
(unsigned long long)t->swapin_count,
|
||||
(unsigned long long)t->swapin_delay_total,
|
||||
average_ms((double)t->swapin_delay_total, t->swapin_count),
|
||||
delay_ms((double)t->swapin_delay_max),
|
||||
delay_ms((double)t->swapin_delay_min),
|
||||
"count", "delay total", "delay average", "delay max", "delay min",
|
||||
(unsigned long long)t->freepages_count,
|
||||
(unsigned long long)t->freepages_delay_total,
|
||||
average_ms((double)t->freepages_delay_total, t->freepages_count),
|
||||
delay_ms((double)t->freepages_delay_max),
|
||||
delay_ms((double)t->freepages_delay_min),
|
||||
"count", "delay total", "delay average", "delay max", "delay min",
|
||||
(unsigned long long)t->thrashing_count,
|
||||
(unsigned long long)t->thrashing_delay_total,
|
||||
average_ms((double)t->thrashing_delay_total, t->thrashing_count),
|
||||
delay_ms((double)t->thrashing_delay_max),
|
||||
delay_ms((double)t->thrashing_delay_min),
|
||||
"count", "delay total", "delay average", "delay max", "delay min",
|
||||
(unsigned long long)t->compact_count,
|
||||
(unsigned long long)t->compact_delay_total,
|
||||
average_ms((double)t->compact_delay_total, t->compact_count),
|
||||
delay_ms((double)t->compact_delay_max),
|
||||
delay_ms((double)t->compact_delay_min),
|
||||
"count", "delay total", "delay average", "delay max", "delay min",
|
||||
(unsigned long long)t->wpcopy_count,
|
||||
(unsigned long long)t->wpcopy_delay_total,
|
||||
average_ms((double)t->wpcopy_delay_total, t->wpcopy_count),
|
||||
delay_ms((double)t->wpcopy_delay_max),
|
||||
delay_ms((double)t->wpcopy_delay_min),
|
||||
"count", "delay total", "delay average", "delay max", "delay min",
|
||||
(unsigned long long)t->irq_count,
|
||||
(unsigned long long)t->irq_delay_total,
|
||||
average_ms((double)t->irq_delay_total, t->irq_count),
|
||||
delay_ms((double)t->irq_delay_max),
|
||||
delay_ms((double)t->irq_delay_min));
|
||||
printf("\n\n");
|
||||
|
||||
PRINT_CPU_DELAY(t->version, t);
|
||||
|
||||
PRINT_FILED_DELAY("IO", t->version, t,
|
||||
blkio_count, blkio_delay_total,
|
||||
blkio_delay_max, blkio_delay_min);
|
||||
|
||||
PRINT_FILED_DELAY("SWAP", t->version, t,
|
||||
swapin_count, swapin_delay_total,
|
||||
swapin_delay_max, swapin_delay_min);
|
||||
|
||||
PRINT_FILED_DELAY("RECLAIM", t->version, t,
|
||||
freepages_count, freepages_delay_total,
|
||||
freepages_delay_max, freepages_delay_min);
|
||||
|
||||
PRINT_FILED_DELAY("THRASHING", t->version, t,
|
||||
thrashing_count, thrashing_delay_total,
|
||||
thrashing_delay_max, thrashing_delay_min);
|
||||
|
||||
if (t->version >= 11) {
|
||||
PRINT_FILED_DELAY("COMPACT", t->version, t,
|
||||
compact_count, compact_delay_total,
|
||||
compact_delay_max, compact_delay_min);
|
||||
}
|
||||
|
||||
if (t->version >= 13) {
|
||||
PRINT_FILED_DELAY("WPCOPY", t->version, t,
|
||||
wpcopy_count, wpcopy_delay_total,
|
||||
wpcopy_delay_max, wpcopy_delay_min);
|
||||
}
|
||||
|
||||
if (t->version >= 14) {
|
||||
PRINT_FILED_DELAY("IRQ", t->version, t,
|
||||
irq_count, irq_delay_total,
|
||||
irq_delay_max, irq_delay_min);
|
||||
}
|
||||
}
|
||||
|
||||
static void task_context_switch_counts(struct taskstats *t)
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue