linux/drivers/acpi/processor_perflib.c
Rafael J. Wysocki d405ec23df ACPI: processor: perflib: Move problematic pr->performance check
Commit d33bd88ac0 ("ACPI: processor: perflib: Fix initial _PPC limit
application") added a pr->performance check that prevents the frequency
QoS request from being added when the given processor has no performance
object.  Unfortunately, this causes a WARN() in freq_qos_remove_request()
to trigger on an attempt to take the given CPU offline later because the
frequency QoS object has not been added for it due to the missing
performance object.

Address this by moving the pr->performance check before calling
acpi_processor_get_platform_limit() so it only prevents a limit from
being set for the CPU if the performance object is not present.  This
way, the frequency QoS request is added as it was before the above
commit and it is present all the time along with the CPU's cpufreq
policy regardless of whether or not the CPU is online.

Fixes: d33bd88ac0 ("ACPI: processor: perflib: Fix initial _PPC limit application")
Tested-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: 5.4+ <stable@vger.kernel.org> # 5.4+
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Link: https://patch.msgid.link/2801421.mvXUDI8C0e@rafael.j.wysocki
2025-08-12 15:33:58 +02:00

799 lines
19 KiB
C

// SPDX-License-Identifier: GPL-2.0-or-later
/*
* processor_perflib.c - ACPI Processor P-States Library ($Revision: 71 $)
*
* Copyright (C) 2001, 2002 Andy Grover <andrew.grover@intel.com>
* Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
* Copyright (C) 2004 Dominik Brodowski <linux@brodo.de>
* Copyright (C) 2004 Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
* - Added processor hotplug support
*/
#define pr_fmt(fmt) "ACPI: " fmt
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/cpufreq.h>
#include <linux/slab.h>
#include <linux/acpi.h>
#include <acpi/processor.h>
#ifdef CONFIG_X86
#include <asm/cpufeature.h>
#include <asm/msr.h>
#endif
#define ACPI_PROCESSOR_FILE_PERFORMANCE "performance"
/*
* _PPC support is implemented as a CPUfreq policy notifier:
* This means each time a CPUfreq driver registered also with
* the ACPI core is asked to change the speed policy, the maximum
* value is adjusted so that it is within the platform limit.
*
* Also, when a new platform limit value is detected, the CPUfreq
* policy is adjusted accordingly.
*/
/* ignore_ppc:
* -1 -> cpufreq low level drivers not initialized -> _PSS, etc. not called yet
* ignore _PPC
* 0 -> cpufreq low level drivers initialized -> consider _PPC values
* 1 -> ignore _PPC totally -> forced by user through boot param
*/
static int ignore_ppc = -1;
module_param(ignore_ppc, int, 0644);
MODULE_PARM_DESC(ignore_ppc, "If the frequency of your machine gets wrongly" \
"limited by BIOS, this should help");
static bool acpi_processor_ppc_in_use;
static int acpi_processor_get_platform_limit(struct acpi_processor *pr)
{
acpi_status status = 0;
unsigned long long ppc = 0;
s32 qos_value;
int index;
int ret;
if (!pr)
return -EINVAL;
/*
* _PPC indicates the maximum state currently supported by the platform
* (e.g. 0 = states 0..n; 1 = states 1..n; etc.
*/
status = acpi_evaluate_integer(pr->handle, "_PPC", NULL, &ppc);
if (status != AE_NOT_FOUND) {
acpi_processor_ppc_in_use = true;
if (ACPI_FAILURE(status)) {
acpi_evaluation_failure_warn(pr->handle, "_PPC", status);
return -ENODEV;
}
}
index = ppc;
if (pr->performance_platform_limit == index ||
ppc >= pr->performance->state_count)
return 0;
pr_debug("CPU %d: _PPC is %d - frequency %s limited\n", pr->id,
index, index ? "is" : "is not");
pr->performance_platform_limit = index;
if (unlikely(!freq_qos_request_active(&pr->perflib_req)))
return 0;
/*
* If _PPC returns 0, it means that all of the available states can be
* used ("no limit").
*/
if (index == 0)
qos_value = FREQ_QOS_MAX_DEFAULT_VALUE;
else
qos_value = pr->performance->states[index].core_frequency * 1000;
ret = freq_qos_update_request(&pr->perflib_req, qos_value);
if (ret < 0) {
pr_warn("Failed to update perflib freq constraint: CPU%d (%d)\n",
pr->id, ret);
}
return 0;
}
#define ACPI_PROCESSOR_NOTIFY_PERFORMANCE 0x80
/*
* acpi_processor_ppc_ost: Notify firmware the _PPC evaluation status
* @handle: ACPI processor handle
* @status: the status code of _PPC evaluation
* 0: success. OSPM is now using the performance state specified.
* 1: failure. OSPM has not changed the number of P-states in use
*/
static void acpi_processor_ppc_ost(acpi_handle handle, int status)
{
if (acpi_has_method(handle, "_OST"))
acpi_evaluate_ost(handle, ACPI_PROCESSOR_NOTIFY_PERFORMANCE,
status, NULL);
}
void acpi_processor_ppc_has_changed(struct acpi_processor *pr, int event_flag)
{
int ret;
if (ignore_ppc || !pr->performance) {
/*
* Only when it is notification event, the _OST object
* will be evaluated. Otherwise it is skipped.
*/
if (event_flag)
acpi_processor_ppc_ost(pr->handle, 1);
return;
}
ret = acpi_processor_get_platform_limit(pr);
/*
* Only when it is notification event, the _OST object
* will be evaluated. Otherwise it is skipped.
*/
if (event_flag) {
if (ret < 0)
acpi_processor_ppc_ost(pr->handle, 1);
else
acpi_processor_ppc_ost(pr->handle, 0);
}
if (ret >= 0)
cpufreq_update_limits(pr->id);
}
int acpi_processor_get_bios_limit(int cpu, unsigned int *limit)
{
struct acpi_processor *pr;
pr = per_cpu(processors, cpu);
if (!pr || !pr->performance || !pr->performance->state_count)
return -ENODEV;
*limit = pr->performance->states[pr->performance_platform_limit].
core_frequency * 1000;
return 0;
}
EXPORT_SYMBOL(acpi_processor_get_bios_limit);
void acpi_processor_ignore_ppc_init(void)
{
if (ignore_ppc < 0)
ignore_ppc = 0;
}
void acpi_processor_ppc_init(struct cpufreq_policy *policy)
{
unsigned int cpu;
if (ignore_ppc == 1)
return;
for_each_cpu(cpu, policy->related_cpus) {
struct acpi_processor *pr = per_cpu(processors, cpu);
int ret;
if (!pr)
continue;
/*
* Reset performance_platform_limit in case there is a stale
* value in it, so as to make it match the "no limit" QoS value
* below.
*/
pr->performance_platform_limit = 0;
ret = freq_qos_add_request(&policy->constraints,
&pr->perflib_req, FREQ_QOS_MAX,
FREQ_QOS_MAX_DEFAULT_VALUE);
if (ret < 0)
pr_err("Failed to add freq constraint for CPU%d (%d)\n",
cpu, ret);
if (!pr->performance)
continue;
ret = acpi_processor_get_platform_limit(pr);
if (ret)
pr_err("Failed to update freq constraint for CPU%d (%d)\n",
cpu, ret);
}
}
void acpi_processor_ppc_exit(struct cpufreq_policy *policy)
{
unsigned int cpu;
for_each_cpu(cpu, policy->related_cpus) {
struct acpi_processor *pr = per_cpu(processors, cpu);
if (pr)
freq_qos_remove_request(&pr->perflib_req);
}
}
#ifdef CONFIG_X86
static DEFINE_MUTEX(performance_mutex);
static int acpi_processor_get_performance_control(struct acpi_processor *pr)
{
int result = 0;
acpi_status status = 0;
struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
union acpi_object *pct = NULL;
union acpi_object obj = { 0 };
status = acpi_evaluate_object(pr->handle, "_PCT", NULL, &buffer);
if (ACPI_FAILURE(status)) {
acpi_evaluation_failure_warn(pr->handle, "_PCT", status);
return -ENODEV;
}
pct = (union acpi_object *)buffer.pointer;
if (!pct || pct->type != ACPI_TYPE_PACKAGE || pct->package.count != 2) {
pr_err("Invalid _PCT data\n");
result = -EFAULT;
goto end;
}
/*
* control_register
*/
obj = pct->package.elements[0];
if (!obj.buffer.pointer || obj.type != ACPI_TYPE_BUFFER ||
obj.buffer.length < sizeof(struct acpi_pct_register)) {
pr_err("Invalid _PCT data (control_register)\n");
result = -EFAULT;
goto end;
}
memcpy(&pr->performance->control_register, obj.buffer.pointer,
sizeof(struct acpi_pct_register));
/*
* status_register
*/
obj = pct->package.elements[1];
if (!obj.buffer.pointer || obj.type != ACPI_TYPE_BUFFER ||
obj.buffer.length < sizeof(struct acpi_pct_register)) {
pr_err("Invalid _PCT data (status_register)\n");
result = -EFAULT;
goto end;
}
memcpy(&pr->performance->status_register, obj.buffer.pointer,
sizeof(struct acpi_pct_register));
end:
kfree(buffer.pointer);
return result;
}
/*
* Some AMDs have 50MHz frequency multiples, but only provide 100MHz rounding
* in their ACPI data. Calculate the real values and fix up the _PSS data.
*/
static void amd_fixup_frequency(struct acpi_processor_px *px, int i)
{
u32 hi, lo, fid, did;
int index = px->control & 0x00000007;
if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
return;
if ((boot_cpu_data.x86 == 0x10 && boot_cpu_data.x86_model < 10) ||
boot_cpu_data.x86 == 0x11) {
rdmsr(MSR_AMD_PSTATE_DEF_BASE + index, lo, hi);
/*
* MSR C001_0064+:
* Bit 63: PstateEn. Read-write. If set, the P-state is valid.
*/
if (!(hi & BIT(31)))
return;
fid = lo & 0x3f;
did = (lo >> 6) & 7;
if (boot_cpu_data.x86 == 0x10)
px->core_frequency = (100 * (fid + 0x10)) >> did;
else
px->core_frequency = (100 * (fid + 8)) >> did;
}
}
static int acpi_processor_get_performance_states(struct acpi_processor *pr)
{
int result = 0;
acpi_status status = AE_OK;
struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
struct acpi_buffer format = { sizeof("NNNNNN"), "NNNNNN" };
struct acpi_buffer state = { 0, NULL };
union acpi_object *pss = NULL;
int i;
int last_invalid = -1;
status = acpi_evaluate_object(pr->handle, "_PSS", NULL, &buffer);
if (ACPI_FAILURE(status)) {
acpi_evaluation_failure_warn(pr->handle, "_PSS", status);
return -ENODEV;
}
pss = buffer.pointer;
if (!pss || pss->type != ACPI_TYPE_PACKAGE) {
pr_err("Invalid _PSS data\n");
result = -EFAULT;
goto end;
}
acpi_handle_debug(pr->handle, "Found %d performance states\n",
pss->package.count);
pr->performance->state_count = pss->package.count;
pr->performance->states =
kmalloc_array(pss->package.count,
sizeof(struct acpi_processor_px),
GFP_KERNEL);
if (!pr->performance->states) {
result = -ENOMEM;
goto end;
}
for (i = 0; i < pr->performance->state_count; i++) {
struct acpi_processor_px *px = &(pr->performance->states[i]);
state.length = sizeof(struct acpi_processor_px);
state.pointer = px;
acpi_handle_debug(pr->handle, "Extracting state %d\n", i);
status = acpi_extract_package(&(pss->package.elements[i]),
&format, &state);
if (ACPI_FAILURE(status)) {
acpi_handle_warn(pr->handle, "Invalid _PSS data: %s\n",
acpi_format_exception(status));
result = -EFAULT;
kfree(pr->performance->states);
goto end;
}
amd_fixup_frequency(px, i);
acpi_handle_debug(pr->handle,
"State [%d]: core_frequency[%d] power[%d] transition_latency[%d] bus_master_latency[%d] control[0x%x] status[0x%x]\n",
i,
(u32) px->core_frequency,
(u32) px->power,
(u32) px->transition_latency,
(u32) px->bus_master_latency,
(u32) px->control, (u32) px->status);
/*
* Check that ACPI's u64 MHz will be valid as u32 KHz in cpufreq
*/
if (!px->core_frequency ||
(u32)(px->core_frequency * 1000) != px->core_frequency * 1000) {
pr_err(FW_BUG
"Invalid BIOS _PSS frequency found for processor %d: 0x%llx MHz\n",
pr->id, px->core_frequency);
if (last_invalid == -1)
last_invalid = i;
} else {
if (last_invalid != -1) {
/*
* Copy this valid entry over last_invalid entry
*/
memcpy(&(pr->performance->states[last_invalid]),
px, sizeof(struct acpi_processor_px));
++last_invalid;
}
}
}
if (last_invalid == 0) {
pr_err(FW_BUG
"No valid BIOS _PSS frequency found for processor %d\n", pr->id);
result = -EFAULT;
kfree(pr->performance->states);
pr->performance->states = NULL;
}
if (last_invalid > 0)
pr->performance->state_count = last_invalid;
end:
kfree(buffer.pointer);
return result;
}
int acpi_processor_get_performance_info(struct acpi_processor *pr)
{
int result = 0;
if (!pr || !pr->performance || !pr->handle)
return -EINVAL;
if (!acpi_has_method(pr->handle, "_PCT")) {
acpi_handle_debug(pr->handle,
"ACPI-based processor performance control unavailable\n");
return -ENODEV;
}
result = acpi_processor_get_performance_control(pr);
if (result)
goto update_bios;
result = acpi_processor_get_performance_states(pr);
if (result)
goto update_bios;
/* We need to call _PPC once when cpufreq starts */
if (ignore_ppc != 1)
result = acpi_processor_get_platform_limit(pr);
return result;
/*
* Having _PPC but missing frequencies (_PSS, _PCT) is a very good hint that
* the BIOS is older than the CPU and does not know its frequencies
*/
update_bios:
if (acpi_has_method(pr->handle, "_PPC")) {
if(boot_cpu_has(X86_FEATURE_EST))
pr_warn(FW_BUG "BIOS needs update for CPU "
"frequency support\n");
}
return result;
}
EXPORT_SYMBOL_GPL(acpi_processor_get_performance_info);
int acpi_processor_pstate_control(void)
{
acpi_status status;
if (!acpi_gbl_FADT.smi_command || !acpi_gbl_FADT.pstate_control)
return 0;
pr_debug("Writing pstate_control [0x%x] to smi_command [0x%x]\n",
acpi_gbl_FADT.pstate_control, acpi_gbl_FADT.smi_command);
status = acpi_os_write_port(acpi_gbl_FADT.smi_command,
(u32)acpi_gbl_FADT.pstate_control, 8);
if (ACPI_SUCCESS(status))
return 1;
pr_warn("Failed to write pstate_control [0x%x] to smi_command [0x%x]: %s\n",
acpi_gbl_FADT.pstate_control, acpi_gbl_FADT.smi_command,
acpi_format_exception(status));
return -EIO;
}
int acpi_processor_notify_smm(struct module *calling_module)
{
static int is_done;
int result = 0;
if (!acpi_processor_cpufreq_init)
return -EBUSY;
if (!try_module_get(calling_module))
return -EINVAL;
/*
* is_done is set to negative if an error occurs and to 1 if no error
* occurrs, but SMM has been notified already. This avoids repeated
* notification which might lead to unexpected results.
*/
if (is_done != 0) {
if (is_done < 0)
result = is_done;
goto out_put;
}
result = acpi_processor_pstate_control();
if (result <= 0) {
if (result) {
is_done = result;
} else {
pr_debug("No SMI port or pstate_control\n");
is_done = 1;
}
goto out_put;
}
is_done = 1;
/*
* Success. If there _PPC, unloading the cpufreq driver would be risky,
* so disallow it in that case.
*/
if (acpi_processor_ppc_in_use)
return 0;
out_put:
module_put(calling_module);
return result;
}
EXPORT_SYMBOL(acpi_processor_notify_smm);
int acpi_processor_get_psd(acpi_handle handle, struct acpi_psd_package *pdomain)
{
int result = 0;
acpi_status status = AE_OK;
struct acpi_buffer buffer = {ACPI_ALLOCATE_BUFFER, NULL};
struct acpi_buffer format = {sizeof("NNNNN"), "NNNNN"};
struct acpi_buffer state = {0, NULL};
union acpi_object *psd = NULL;
status = acpi_evaluate_object(handle, "_PSD", NULL, &buffer);
if (ACPI_FAILURE(status)) {
return -ENODEV;
}
psd = buffer.pointer;
if (!psd || psd->type != ACPI_TYPE_PACKAGE) {
pr_err("Invalid _PSD data\n");
result = -EFAULT;
goto end;
}
if (psd->package.count != 1) {
pr_err("Invalid _PSD data\n");
result = -EFAULT;
goto end;
}
state.length = sizeof(struct acpi_psd_package);
state.pointer = pdomain;
status = acpi_extract_package(&(psd->package.elements[0]), &format, &state);
if (ACPI_FAILURE(status)) {
pr_err("Invalid _PSD data\n");
result = -EFAULT;
goto end;
}
if (pdomain->num_entries != ACPI_PSD_REV0_ENTRIES) {
pr_err("Unknown _PSD:num_entries\n");
result = -EFAULT;
goto end;
}
if (pdomain->revision != ACPI_PSD_REV0_REVISION) {
pr_err("Unknown _PSD:revision\n");
result = -EFAULT;
goto end;
}
if (pdomain->coord_type != DOMAIN_COORD_TYPE_SW_ALL &&
pdomain->coord_type != DOMAIN_COORD_TYPE_SW_ANY &&
pdomain->coord_type != DOMAIN_COORD_TYPE_HW_ALL) {
pr_err("Invalid _PSD:coord_type\n");
result = -EFAULT;
goto end;
}
end:
kfree(buffer.pointer);
return result;
}
EXPORT_SYMBOL(acpi_processor_get_psd);
int acpi_processor_preregister_performance(
struct acpi_processor_performance __percpu *performance)
{
int count_target;
int retval = 0;
unsigned int i, j;
cpumask_var_t covered_cpus;
struct acpi_processor *pr;
struct acpi_psd_package *pdomain;
struct acpi_processor *match_pr;
struct acpi_psd_package *match_pdomain;
if (!zalloc_cpumask_var(&covered_cpus, GFP_KERNEL))
return -ENOMEM;
mutex_lock(&performance_mutex);
/*
* Check if another driver has already registered, and abort before
* changing pr->performance if it has. Check input data as well.
*/
for_each_possible_cpu(i) {
pr = per_cpu(processors, i);
if (!pr) {
/* Look only at processors in ACPI namespace */
continue;
}
if (pr->performance) {
retval = -EBUSY;
goto err_out;
}
if (!performance || !per_cpu_ptr(performance, i)) {
retval = -EINVAL;
goto err_out;
}
}
/* Call _PSD for all CPUs */
for_each_possible_cpu(i) {
pr = per_cpu(processors, i);
if (!pr)
continue;
pr->performance = per_cpu_ptr(performance, i);
pdomain = &(pr->performance->domain_info);
if (acpi_processor_get_psd(pr->handle, pdomain)) {
retval = -EINVAL;
continue;
}
}
if (retval)
goto err_ret;
/*
* Now that we have _PSD data from all CPUs, lets setup P-state
* domain info.
*/
for_each_possible_cpu(i) {
pr = per_cpu(processors, i);
if (!pr)
continue;
if (cpumask_test_cpu(i, covered_cpus))
continue;
pdomain = &(pr->performance->domain_info);
cpumask_set_cpu(i, pr->performance->shared_cpu_map);
cpumask_set_cpu(i, covered_cpus);
if (pdomain->num_processors <= 1)
continue;
/* Validate the Domain info */
count_target = pdomain->num_processors;
if (pdomain->coord_type == DOMAIN_COORD_TYPE_SW_ALL)
pr->performance->shared_type = CPUFREQ_SHARED_TYPE_ALL;
else if (pdomain->coord_type == DOMAIN_COORD_TYPE_HW_ALL)
pr->performance->shared_type = CPUFREQ_SHARED_TYPE_HW;
else if (pdomain->coord_type == DOMAIN_COORD_TYPE_SW_ANY)
pr->performance->shared_type = CPUFREQ_SHARED_TYPE_ANY;
for_each_possible_cpu(j) {
if (i == j)
continue;
match_pr = per_cpu(processors, j);
if (!match_pr)
continue;
match_pdomain = &(match_pr->performance->domain_info);
if (match_pdomain->domain != pdomain->domain)
continue;
/* Here i and j are in the same domain */
if (match_pdomain->num_processors != count_target) {
retval = -EINVAL;
goto err_ret;
}
if (pdomain->coord_type != match_pdomain->coord_type) {
retval = -EINVAL;
goto err_ret;
}
cpumask_set_cpu(j, covered_cpus);
cpumask_set_cpu(j, pr->performance->shared_cpu_map);
}
for_each_possible_cpu(j) {
if (i == j)
continue;
match_pr = per_cpu(processors, j);
if (!match_pr)
continue;
match_pdomain = &(match_pr->performance->domain_info);
if (match_pdomain->domain != pdomain->domain)
continue;
match_pr->performance->shared_type =
pr->performance->shared_type;
cpumask_copy(match_pr->performance->shared_cpu_map,
pr->performance->shared_cpu_map);
}
}
err_ret:
for_each_possible_cpu(i) {
pr = per_cpu(processors, i);
if (!pr || !pr->performance)
continue;
/* Assume no coordination on any error parsing domain info */
if (retval) {
cpumask_clear(pr->performance->shared_cpu_map);
cpumask_set_cpu(i, pr->performance->shared_cpu_map);
pr->performance->shared_type = CPUFREQ_SHARED_TYPE_NONE;
}
pr->performance = NULL; /* Will be set for real in register */
}
err_out:
mutex_unlock(&performance_mutex);
free_cpumask_var(covered_cpus);
return retval;
}
EXPORT_SYMBOL(acpi_processor_preregister_performance);
int acpi_processor_register_performance(struct acpi_processor_performance
*performance, unsigned int cpu)
{
struct acpi_processor *pr;
if (!acpi_processor_cpufreq_init)
return -EINVAL;
mutex_lock(&performance_mutex);
pr = per_cpu(processors, cpu);
if (!pr) {
mutex_unlock(&performance_mutex);
return -ENODEV;
}
if (pr->performance) {
mutex_unlock(&performance_mutex);
return -EBUSY;
}
WARN_ON(!performance);
pr->performance = performance;
if (acpi_processor_get_performance_info(pr)) {
pr->performance = NULL;
mutex_unlock(&performance_mutex);
return -EIO;
}
mutex_unlock(&performance_mutex);
return 0;
}
EXPORT_SYMBOL(acpi_processor_register_performance);
void acpi_processor_unregister_performance(unsigned int cpu)
{
struct acpi_processor *pr;
mutex_lock(&performance_mutex);
pr = per_cpu(processors, cpu);
if (!pr)
goto unlock;
if (pr->performance)
kfree(pr->performance->states);
pr->performance = NULL;
unlock:
mutex_unlock(&performance_mutex);
}
EXPORT_SYMBOL(acpi_processor_unregister_performance);
#endif