genirq: Introduce common irq_force_complete_move() implementation

CONFIG_GENERIC_PENDING_IRQ requires an architecture specific implementation
of irq_force_complete_move() for CPU hotplug. At the moment, only x86
implements this unconditionally, but for RISC-V irq_force_complete_move()
is only needed when the RISC-V IMSIC driver is in use and not needed
otherwise.

To allow runtime configuration of this mechanism, introduce a common
irq_force_complete_move() implementation in the interrupt core code, which
only invokes the completion function, when a interrupt chip in the
hierarchy implements it.

Switch X86 over to the new mechanism. No functional change intended.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Anup Patel <apatel@ventanamicro.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/all/20250217085657.789309-5-apatel@ventanamicro.com
This commit is contained in:
Thomas Gleixner 2025-02-17 14:26:50 +05:30
parent fe35ecee8e
commit 751dc837da
4 changed files with 123 additions and 125 deletions

View file

@ -888,8 +888,109 @@ static int apic_set_affinity(struct irq_data *irqd,
return err ? err : IRQ_SET_MASK_OK;
}
static void free_moved_vector(struct apic_chip_data *apicd)
{
unsigned int vector = apicd->prev_vector;
unsigned int cpu = apicd->prev_cpu;
bool managed = apicd->is_managed;
/*
* Managed interrupts are usually not migrated away
* from an online CPU, but CPU isolation 'managed_irq'
* can make that happen.
* 1) Activation does not take the isolation into account
* to keep the code simple
* 2) Migration away from an isolated CPU can happen when
* a non-isolated CPU which is in the calculated
* affinity mask comes online.
*/
trace_vector_free_moved(apicd->irq, cpu, vector, managed);
irq_matrix_free(vector_matrix, cpu, vector, managed);
per_cpu(vector_irq, cpu)[vector] = VECTOR_UNUSED;
hlist_del_init(&apicd->clist);
apicd->prev_vector = 0;
apicd->move_in_progress = 0;
}
/*
* Called from fixup_irqs() with @desc->lock held and interrupts disabled.
*/
static void apic_force_complete_move(struct irq_data *irqd)
{
unsigned int cpu = smp_processor_id();
struct apic_chip_data *apicd;
unsigned int vector;
guard(raw_spinlock)(&vector_lock);
apicd = apic_chip_data(irqd);
if (!apicd)
return;
/*
* If prev_vector is empty or the descriptor is neither currently
* nor previously on the outgoing CPU no action required.
*/
vector = apicd->prev_vector;
if (!vector || (apicd->cpu != cpu && apicd->prev_cpu != cpu))
return;
/*
* This is tricky. If the cleanup of the old vector has not been
* done yet, then the following setaffinity call will fail with
* -EBUSY. This can leave the interrupt in a stale state.
*
* All CPUs are stuck in stop machine with interrupts disabled so
* calling __irq_complete_move() would be completely pointless.
*
* 1) The interrupt is in move_in_progress state. That means that we
* have not seen an interrupt since the io_apic was reprogrammed to
* the new vector.
*
* 2) The interrupt has fired on the new vector, but the cleanup IPIs
* have not been processed yet.
*/
if (apicd->move_in_progress) {
/*
* In theory there is a race:
*
* set_ioapic(new_vector) <-- Interrupt is raised before update
* is effective, i.e. it's raised on
* the old vector.
*
* So if the target cpu cannot handle that interrupt before
* the old vector is cleaned up, we get a spurious interrupt
* and in the worst case the ioapic irq line becomes stale.
*
* But in case of cpu hotplug this should be a non issue
* because if the affinity update happens right before all
* cpus rendezvous in stop machine, there is no way that the
* interrupt can be blocked on the target cpu because all cpus
* loops first with interrupts enabled in stop machine, so the
* old vector is not yet cleaned up when the interrupt fires.
*
* So the only way to run into this issue is if the delivery
* of the interrupt on the apic/system bus would be delayed
* beyond the point where the target cpu disables interrupts
* in stop machine. I doubt that it can happen, but at least
* there is a theoretical chance. Virtualization might be
* able to expose this, but AFAICT the IOAPIC emulation is not
* as stupid as the real hardware.
*
* Anyway, there is nothing we can do about that at this point
* w/o refactoring the whole fixup_irq() business completely.
* We print at least the irq number and the old vector number,
* so we have the necessary information when a problem in that
* area arises.
*/
pr_warn("IRQ fixup: irq %d move in progress, old vector %d\n",
irqd->irq, vector);
}
free_moved_vector(apicd);
}
#else
# define apic_set_affinity NULL
# define apic_force_complete_move NULL
#endif
static int apic_retrigger_irq(struct irq_data *irqd)
@ -927,35 +1028,12 @@ static struct irq_chip lapic_controller = {
.irq_ack = apic_ack_edge,
.irq_set_affinity = apic_set_affinity,
.irq_compose_msi_msg = x86_vector_msi_compose_msg,
.irq_force_complete_move = apic_force_complete_move,
.irq_retrigger = apic_retrigger_irq,
};
#ifdef CONFIG_SMP
static void free_moved_vector(struct apic_chip_data *apicd)
{
unsigned int vector = apicd->prev_vector;
unsigned int cpu = apicd->prev_cpu;
bool managed = apicd->is_managed;
/*
* Managed interrupts are usually not migrated away
* from an online CPU, but CPU isolation 'managed_irq'
* can make that happen.
* 1) Activation does not take the isolation into account
* to keep the code simple
* 2) Migration away from an isolated CPU can happen when
* a non-isolated CPU which is in the calculated
* affinity mask comes online.
*/
trace_vector_free_moved(apicd->irq, cpu, vector, managed);
irq_matrix_free(vector_matrix, cpu, vector, managed);
per_cpu(vector_irq, cpu)[vector] = VECTOR_UNUSED;
hlist_del_init(&apicd->clist);
apicd->prev_vector = 0;
apicd->move_in_progress = 0;
}
static void __vector_cleanup(struct vector_cleanup *cl, bool check_irr)
{
struct apic_chip_data *apicd;
@ -1068,99 +1146,6 @@ void irq_complete_move(struct irq_cfg *cfg)
__vector_schedule_cleanup(apicd);
}
/*
* Called from fixup_irqs() with @desc->lock held and interrupts disabled.
*/
void irq_force_complete_move(struct irq_desc *desc)
{
unsigned int cpu = smp_processor_id();
struct apic_chip_data *apicd;
struct irq_data *irqd;
unsigned int vector;
/*
* The function is called for all descriptors regardless of which
* irqdomain they belong to. For example if an IRQ is provided by
* an irq_chip as part of a GPIO driver, the chip data for that
* descriptor is specific to the irq_chip in question.
*
* Check first that the chip_data is what we expect
* (apic_chip_data) before touching it any further.
*/
irqd = irq_domain_get_irq_data(x86_vector_domain,
irq_desc_get_irq(desc));
if (!irqd)
return;
raw_spin_lock(&vector_lock);
apicd = apic_chip_data(irqd);
if (!apicd)
goto unlock;
/*
* If prev_vector is empty or the descriptor is neither currently
* nor previously on the outgoing CPU no action required.
*/
vector = apicd->prev_vector;
if (!vector || (apicd->cpu != cpu && apicd->prev_cpu != cpu))
goto unlock;
/*
* This is tricky. If the cleanup of the old vector has not been
* done yet, then the following setaffinity call will fail with
* -EBUSY. This can leave the interrupt in a stale state.
*
* All CPUs are stuck in stop machine with interrupts disabled so
* calling __irq_complete_move() would be completely pointless.
*
* 1) The interrupt is in move_in_progress state. That means that we
* have not seen an interrupt since the io_apic was reprogrammed to
* the new vector.
*
* 2) The interrupt has fired on the new vector, but the cleanup IPIs
* have not been processed yet.
*/
if (apicd->move_in_progress) {
/*
* In theory there is a race:
*
* set_ioapic(new_vector) <-- Interrupt is raised before update
* is effective, i.e. it's raised on
* the old vector.
*
* So if the target cpu cannot handle that interrupt before
* the old vector is cleaned up, we get a spurious interrupt
* and in the worst case the ioapic irq line becomes stale.
*
* But in case of cpu hotplug this should be a non issue
* because if the affinity update happens right before all
* cpus rendezvous in stop machine, there is no way that the
* interrupt can be blocked on the target cpu because all cpus
* loops first with interrupts enabled in stop machine, so the
* old vector is not yet cleaned up when the interrupt fires.
*
* So the only way to run into this issue is if the delivery
* of the interrupt on the apic/system bus would be delayed
* beyond the point where the target cpu disables interrupts
* in stop machine. I doubt that it can happen, but at least
* there is a theoretical chance. Virtualization might be
* able to expose this, but AFAICT the IOAPIC emulation is not
* as stupid as the real hardware.
*
* Anyway, there is nothing we can do about that at this point
* w/o refactoring the whole fixup_irq() business completely.
* We print at least the irq number and the old vector number,
* so we have the necessary information when a problem in that
* area arises.
*/
pr_warn("IRQ fixup: irq %d move in progress, old vector %d\n",
irqd->irq, vector);
}
free_moved_vector(apicd);
unlock:
raw_spin_unlock(&vector_lock);
}
#ifdef CONFIG_HOTPLUG_CPU
/*
* Note, this is not accurate accounting, but at least good enough to

View file

@ -486,6 +486,7 @@ static inline irq_hw_number_t irqd_to_hwirq(struct irq_data *d)
* @ipi_send_mask: send an IPI to destination cpus in cpumask
* @irq_nmi_setup: function called from core code before enabling an NMI
* @irq_nmi_teardown: function called from core code after disabling an NMI
* @irq_force_complete_move: optional function to force complete pending irq move
* @flags: chip specific flags
*/
struct irq_chip {
@ -537,6 +538,8 @@ struct irq_chip {
int (*irq_nmi_setup)(struct irq_data *data);
void (*irq_nmi_teardown)(struct irq_data *data);
void (*irq_force_complete_move)(struct irq_data *data);
unsigned long flags;
};
@ -619,11 +622,9 @@ static inline void irq_move_irq(struct irq_data *data)
__irq_move_irq(data);
}
void irq_move_masked_irq(struct irq_data *data);
void irq_force_complete_move(struct irq_desc *desc);
#else
static inline void irq_move_irq(struct irq_data *data) { }
static inline void irq_move_masked_irq(struct irq_data *data) { }
static inline void irq_force_complete_move(struct irq_desc *desc) { }
#endif
extern int no_irq_affinity;

View file

@ -442,6 +442,7 @@ static inline struct cpumask *irq_desc_get_pending_mask(struct irq_desc *desc)
return desc->pending_mask;
}
bool irq_fixup_move_pending(struct irq_desc *desc, bool force_clear);
void irq_force_complete_move(struct irq_desc *desc);
#else /* CONFIG_GENERIC_PENDING_IRQ */
static inline bool irq_can_move_pcntxt(struct irq_data *data)
{
@ -467,6 +468,7 @@ static inline bool irq_fixup_move_pending(struct irq_desc *desc, bool fclear)
{
return false;
}
static inline void irq_force_complete_move(struct irq_desc *desc) { }
#endif /* !CONFIG_GENERIC_PENDING_IRQ */
#if !defined(CONFIG_IRQ_DOMAIN) || !defined(CONFIG_IRQ_DOMAIN_HIERARCHY)

View file

@ -35,6 +35,16 @@ bool irq_fixup_move_pending(struct irq_desc *desc, bool force_clear)
return true;
}
void irq_force_complete_move(struct irq_desc *desc)
{
for (struct irq_data *d = irq_desc_get_irq_data(desc); d; d = d->parent_data) {
if (d->chip && d->chip->irq_force_complete_move) {
d->chip->irq_force_complete_move(d);
return;
}
}
}
void irq_move_masked_irq(struct irq_data *idata)
{
struct irq_desc *desc = irq_data_to_desc(idata);