mirror of
https://github.com/torvalds/linux.git
synced 2025-08-15 22:21:42 +02:00
genirq: Introduce common irq_force_complete_move() implementation
CONFIG_GENERIC_PENDING_IRQ requires an architecture specific implementation of irq_force_complete_move() for CPU hotplug. At the moment, only x86 implements this unconditionally, but for RISC-V irq_force_complete_move() is only needed when the RISC-V IMSIC driver is in use and not needed otherwise. To allow runtime configuration of this mechanism, introduce a common irq_force_complete_move() implementation in the interrupt core code, which only invokes the completion function, when a interrupt chip in the hierarchy implements it. Switch X86 over to the new mechanism. No functional change intended. Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Anup Patel <apatel@ventanamicro.com> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Link: https://lore.kernel.org/all/20250217085657.789309-5-apatel@ventanamicro.com
This commit is contained in:
parent
fe35ecee8e
commit
751dc837da
4 changed files with 123 additions and 125 deletions
|
@ -888,8 +888,109 @@ static int apic_set_affinity(struct irq_data *irqd,
|
||||||
return err ? err : IRQ_SET_MASK_OK;
|
return err ? err : IRQ_SET_MASK_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void free_moved_vector(struct apic_chip_data *apicd)
|
||||||
|
{
|
||||||
|
unsigned int vector = apicd->prev_vector;
|
||||||
|
unsigned int cpu = apicd->prev_cpu;
|
||||||
|
bool managed = apicd->is_managed;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Managed interrupts are usually not migrated away
|
||||||
|
* from an online CPU, but CPU isolation 'managed_irq'
|
||||||
|
* can make that happen.
|
||||||
|
* 1) Activation does not take the isolation into account
|
||||||
|
* to keep the code simple
|
||||||
|
* 2) Migration away from an isolated CPU can happen when
|
||||||
|
* a non-isolated CPU which is in the calculated
|
||||||
|
* affinity mask comes online.
|
||||||
|
*/
|
||||||
|
trace_vector_free_moved(apicd->irq, cpu, vector, managed);
|
||||||
|
irq_matrix_free(vector_matrix, cpu, vector, managed);
|
||||||
|
per_cpu(vector_irq, cpu)[vector] = VECTOR_UNUSED;
|
||||||
|
hlist_del_init(&apicd->clist);
|
||||||
|
apicd->prev_vector = 0;
|
||||||
|
apicd->move_in_progress = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Called from fixup_irqs() with @desc->lock held and interrupts disabled.
|
||||||
|
*/
|
||||||
|
static void apic_force_complete_move(struct irq_data *irqd)
|
||||||
|
{
|
||||||
|
unsigned int cpu = smp_processor_id();
|
||||||
|
struct apic_chip_data *apicd;
|
||||||
|
unsigned int vector;
|
||||||
|
|
||||||
|
guard(raw_spinlock)(&vector_lock);
|
||||||
|
apicd = apic_chip_data(irqd);
|
||||||
|
if (!apicd)
|
||||||
|
return;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If prev_vector is empty or the descriptor is neither currently
|
||||||
|
* nor previously on the outgoing CPU no action required.
|
||||||
|
*/
|
||||||
|
vector = apicd->prev_vector;
|
||||||
|
if (!vector || (apicd->cpu != cpu && apicd->prev_cpu != cpu))
|
||||||
|
return;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This is tricky. If the cleanup of the old vector has not been
|
||||||
|
* done yet, then the following setaffinity call will fail with
|
||||||
|
* -EBUSY. This can leave the interrupt in a stale state.
|
||||||
|
*
|
||||||
|
* All CPUs are stuck in stop machine with interrupts disabled so
|
||||||
|
* calling __irq_complete_move() would be completely pointless.
|
||||||
|
*
|
||||||
|
* 1) The interrupt is in move_in_progress state. That means that we
|
||||||
|
* have not seen an interrupt since the io_apic was reprogrammed to
|
||||||
|
* the new vector.
|
||||||
|
*
|
||||||
|
* 2) The interrupt has fired on the new vector, but the cleanup IPIs
|
||||||
|
* have not been processed yet.
|
||||||
|
*/
|
||||||
|
if (apicd->move_in_progress) {
|
||||||
|
/*
|
||||||
|
* In theory there is a race:
|
||||||
|
*
|
||||||
|
* set_ioapic(new_vector) <-- Interrupt is raised before update
|
||||||
|
* is effective, i.e. it's raised on
|
||||||
|
* the old vector.
|
||||||
|
*
|
||||||
|
* So if the target cpu cannot handle that interrupt before
|
||||||
|
* the old vector is cleaned up, we get a spurious interrupt
|
||||||
|
* and in the worst case the ioapic irq line becomes stale.
|
||||||
|
*
|
||||||
|
* But in case of cpu hotplug this should be a non issue
|
||||||
|
* because if the affinity update happens right before all
|
||||||
|
* cpus rendezvous in stop machine, there is no way that the
|
||||||
|
* interrupt can be blocked on the target cpu because all cpus
|
||||||
|
* loops first with interrupts enabled in stop machine, so the
|
||||||
|
* old vector is not yet cleaned up when the interrupt fires.
|
||||||
|
*
|
||||||
|
* So the only way to run into this issue is if the delivery
|
||||||
|
* of the interrupt on the apic/system bus would be delayed
|
||||||
|
* beyond the point where the target cpu disables interrupts
|
||||||
|
* in stop machine. I doubt that it can happen, but at least
|
||||||
|
* there is a theoretical chance. Virtualization might be
|
||||||
|
* able to expose this, but AFAICT the IOAPIC emulation is not
|
||||||
|
* as stupid as the real hardware.
|
||||||
|
*
|
||||||
|
* Anyway, there is nothing we can do about that at this point
|
||||||
|
* w/o refactoring the whole fixup_irq() business completely.
|
||||||
|
* We print at least the irq number and the old vector number,
|
||||||
|
* so we have the necessary information when a problem in that
|
||||||
|
* area arises.
|
||||||
|
*/
|
||||||
|
pr_warn("IRQ fixup: irq %d move in progress, old vector %d\n",
|
||||||
|
irqd->irq, vector);
|
||||||
|
}
|
||||||
|
free_moved_vector(apicd);
|
||||||
|
}
|
||||||
|
|
||||||
#else
|
#else
|
||||||
# define apic_set_affinity NULL
|
# define apic_set_affinity NULL
|
||||||
|
# define apic_force_complete_move NULL
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
static int apic_retrigger_irq(struct irq_data *irqd)
|
static int apic_retrigger_irq(struct irq_data *irqd)
|
||||||
|
@ -927,35 +1028,12 @@ static struct irq_chip lapic_controller = {
|
||||||
.irq_ack = apic_ack_edge,
|
.irq_ack = apic_ack_edge,
|
||||||
.irq_set_affinity = apic_set_affinity,
|
.irq_set_affinity = apic_set_affinity,
|
||||||
.irq_compose_msi_msg = x86_vector_msi_compose_msg,
|
.irq_compose_msi_msg = x86_vector_msi_compose_msg,
|
||||||
|
.irq_force_complete_move = apic_force_complete_move,
|
||||||
.irq_retrigger = apic_retrigger_irq,
|
.irq_retrigger = apic_retrigger_irq,
|
||||||
};
|
};
|
||||||
|
|
||||||
#ifdef CONFIG_SMP
|
#ifdef CONFIG_SMP
|
||||||
|
|
||||||
static void free_moved_vector(struct apic_chip_data *apicd)
|
|
||||||
{
|
|
||||||
unsigned int vector = apicd->prev_vector;
|
|
||||||
unsigned int cpu = apicd->prev_cpu;
|
|
||||||
bool managed = apicd->is_managed;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Managed interrupts are usually not migrated away
|
|
||||||
* from an online CPU, but CPU isolation 'managed_irq'
|
|
||||||
* can make that happen.
|
|
||||||
* 1) Activation does not take the isolation into account
|
|
||||||
* to keep the code simple
|
|
||||||
* 2) Migration away from an isolated CPU can happen when
|
|
||||||
* a non-isolated CPU which is in the calculated
|
|
||||||
* affinity mask comes online.
|
|
||||||
*/
|
|
||||||
trace_vector_free_moved(apicd->irq, cpu, vector, managed);
|
|
||||||
irq_matrix_free(vector_matrix, cpu, vector, managed);
|
|
||||||
per_cpu(vector_irq, cpu)[vector] = VECTOR_UNUSED;
|
|
||||||
hlist_del_init(&apicd->clist);
|
|
||||||
apicd->prev_vector = 0;
|
|
||||||
apicd->move_in_progress = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void __vector_cleanup(struct vector_cleanup *cl, bool check_irr)
|
static void __vector_cleanup(struct vector_cleanup *cl, bool check_irr)
|
||||||
{
|
{
|
||||||
struct apic_chip_data *apicd;
|
struct apic_chip_data *apicd;
|
||||||
|
@ -1068,99 +1146,6 @@ void irq_complete_move(struct irq_cfg *cfg)
|
||||||
__vector_schedule_cleanup(apicd);
|
__vector_schedule_cleanup(apicd);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* Called from fixup_irqs() with @desc->lock held and interrupts disabled.
|
|
||||||
*/
|
|
||||||
void irq_force_complete_move(struct irq_desc *desc)
|
|
||||||
{
|
|
||||||
unsigned int cpu = smp_processor_id();
|
|
||||||
struct apic_chip_data *apicd;
|
|
||||||
struct irq_data *irqd;
|
|
||||||
unsigned int vector;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* The function is called for all descriptors regardless of which
|
|
||||||
* irqdomain they belong to. For example if an IRQ is provided by
|
|
||||||
* an irq_chip as part of a GPIO driver, the chip data for that
|
|
||||||
* descriptor is specific to the irq_chip in question.
|
|
||||||
*
|
|
||||||
* Check first that the chip_data is what we expect
|
|
||||||
* (apic_chip_data) before touching it any further.
|
|
||||||
*/
|
|
||||||
irqd = irq_domain_get_irq_data(x86_vector_domain,
|
|
||||||
irq_desc_get_irq(desc));
|
|
||||||
if (!irqd)
|
|
||||||
return;
|
|
||||||
|
|
||||||
raw_spin_lock(&vector_lock);
|
|
||||||
apicd = apic_chip_data(irqd);
|
|
||||||
if (!apicd)
|
|
||||||
goto unlock;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* If prev_vector is empty or the descriptor is neither currently
|
|
||||||
* nor previously on the outgoing CPU no action required.
|
|
||||||
*/
|
|
||||||
vector = apicd->prev_vector;
|
|
||||||
if (!vector || (apicd->cpu != cpu && apicd->prev_cpu != cpu))
|
|
||||||
goto unlock;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* This is tricky. If the cleanup of the old vector has not been
|
|
||||||
* done yet, then the following setaffinity call will fail with
|
|
||||||
* -EBUSY. This can leave the interrupt in a stale state.
|
|
||||||
*
|
|
||||||
* All CPUs are stuck in stop machine with interrupts disabled so
|
|
||||||
* calling __irq_complete_move() would be completely pointless.
|
|
||||||
*
|
|
||||||
* 1) The interrupt is in move_in_progress state. That means that we
|
|
||||||
* have not seen an interrupt since the io_apic was reprogrammed to
|
|
||||||
* the new vector.
|
|
||||||
*
|
|
||||||
* 2) The interrupt has fired on the new vector, but the cleanup IPIs
|
|
||||||
* have not been processed yet.
|
|
||||||
*/
|
|
||||||
if (apicd->move_in_progress) {
|
|
||||||
/*
|
|
||||||
* In theory there is a race:
|
|
||||||
*
|
|
||||||
* set_ioapic(new_vector) <-- Interrupt is raised before update
|
|
||||||
* is effective, i.e. it's raised on
|
|
||||||
* the old vector.
|
|
||||||
*
|
|
||||||
* So if the target cpu cannot handle that interrupt before
|
|
||||||
* the old vector is cleaned up, we get a spurious interrupt
|
|
||||||
* and in the worst case the ioapic irq line becomes stale.
|
|
||||||
*
|
|
||||||
* But in case of cpu hotplug this should be a non issue
|
|
||||||
* because if the affinity update happens right before all
|
|
||||||
* cpus rendezvous in stop machine, there is no way that the
|
|
||||||
* interrupt can be blocked on the target cpu because all cpus
|
|
||||||
* loops first with interrupts enabled in stop machine, so the
|
|
||||||
* old vector is not yet cleaned up when the interrupt fires.
|
|
||||||
*
|
|
||||||
* So the only way to run into this issue is if the delivery
|
|
||||||
* of the interrupt on the apic/system bus would be delayed
|
|
||||||
* beyond the point where the target cpu disables interrupts
|
|
||||||
* in stop machine. I doubt that it can happen, but at least
|
|
||||||
* there is a theoretical chance. Virtualization might be
|
|
||||||
* able to expose this, but AFAICT the IOAPIC emulation is not
|
|
||||||
* as stupid as the real hardware.
|
|
||||||
*
|
|
||||||
* Anyway, there is nothing we can do about that at this point
|
|
||||||
* w/o refactoring the whole fixup_irq() business completely.
|
|
||||||
* We print at least the irq number and the old vector number,
|
|
||||||
* so we have the necessary information when a problem in that
|
|
||||||
* area arises.
|
|
||||||
*/
|
|
||||||
pr_warn("IRQ fixup: irq %d move in progress, old vector %d\n",
|
|
||||||
irqd->irq, vector);
|
|
||||||
}
|
|
||||||
free_moved_vector(apicd);
|
|
||||||
unlock:
|
|
||||||
raw_spin_unlock(&vector_lock);
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifdef CONFIG_HOTPLUG_CPU
|
#ifdef CONFIG_HOTPLUG_CPU
|
||||||
/*
|
/*
|
||||||
* Note, this is not accurate accounting, but at least good enough to
|
* Note, this is not accurate accounting, but at least good enough to
|
||||||
|
|
|
@ -486,6 +486,7 @@ static inline irq_hw_number_t irqd_to_hwirq(struct irq_data *d)
|
||||||
* @ipi_send_mask: send an IPI to destination cpus in cpumask
|
* @ipi_send_mask: send an IPI to destination cpus in cpumask
|
||||||
* @irq_nmi_setup: function called from core code before enabling an NMI
|
* @irq_nmi_setup: function called from core code before enabling an NMI
|
||||||
* @irq_nmi_teardown: function called from core code after disabling an NMI
|
* @irq_nmi_teardown: function called from core code after disabling an NMI
|
||||||
|
* @irq_force_complete_move: optional function to force complete pending irq move
|
||||||
* @flags: chip specific flags
|
* @flags: chip specific flags
|
||||||
*/
|
*/
|
||||||
struct irq_chip {
|
struct irq_chip {
|
||||||
|
@ -537,6 +538,8 @@ struct irq_chip {
|
||||||
int (*irq_nmi_setup)(struct irq_data *data);
|
int (*irq_nmi_setup)(struct irq_data *data);
|
||||||
void (*irq_nmi_teardown)(struct irq_data *data);
|
void (*irq_nmi_teardown)(struct irq_data *data);
|
||||||
|
|
||||||
|
void (*irq_force_complete_move)(struct irq_data *data);
|
||||||
|
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -619,11 +622,9 @@ static inline void irq_move_irq(struct irq_data *data)
|
||||||
__irq_move_irq(data);
|
__irq_move_irq(data);
|
||||||
}
|
}
|
||||||
void irq_move_masked_irq(struct irq_data *data);
|
void irq_move_masked_irq(struct irq_data *data);
|
||||||
void irq_force_complete_move(struct irq_desc *desc);
|
|
||||||
#else
|
#else
|
||||||
static inline void irq_move_irq(struct irq_data *data) { }
|
static inline void irq_move_irq(struct irq_data *data) { }
|
||||||
static inline void irq_move_masked_irq(struct irq_data *data) { }
|
static inline void irq_move_masked_irq(struct irq_data *data) { }
|
||||||
static inline void irq_force_complete_move(struct irq_desc *desc) { }
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
extern int no_irq_affinity;
|
extern int no_irq_affinity;
|
||||||
|
|
|
@ -442,6 +442,7 @@ static inline struct cpumask *irq_desc_get_pending_mask(struct irq_desc *desc)
|
||||||
return desc->pending_mask;
|
return desc->pending_mask;
|
||||||
}
|
}
|
||||||
bool irq_fixup_move_pending(struct irq_desc *desc, bool force_clear);
|
bool irq_fixup_move_pending(struct irq_desc *desc, bool force_clear);
|
||||||
|
void irq_force_complete_move(struct irq_desc *desc);
|
||||||
#else /* CONFIG_GENERIC_PENDING_IRQ */
|
#else /* CONFIG_GENERIC_PENDING_IRQ */
|
||||||
static inline bool irq_can_move_pcntxt(struct irq_data *data)
|
static inline bool irq_can_move_pcntxt(struct irq_data *data)
|
||||||
{
|
{
|
||||||
|
@ -467,6 +468,7 @@ static inline bool irq_fixup_move_pending(struct irq_desc *desc, bool fclear)
|
||||||
{
|
{
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
static inline void irq_force_complete_move(struct irq_desc *desc) { }
|
||||||
#endif /* !CONFIG_GENERIC_PENDING_IRQ */
|
#endif /* !CONFIG_GENERIC_PENDING_IRQ */
|
||||||
|
|
||||||
#if !defined(CONFIG_IRQ_DOMAIN) || !defined(CONFIG_IRQ_DOMAIN_HIERARCHY)
|
#if !defined(CONFIG_IRQ_DOMAIN) || !defined(CONFIG_IRQ_DOMAIN_HIERARCHY)
|
||||||
|
|
|
@ -35,6 +35,16 @@ bool irq_fixup_move_pending(struct irq_desc *desc, bool force_clear)
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void irq_force_complete_move(struct irq_desc *desc)
|
||||||
|
{
|
||||||
|
for (struct irq_data *d = irq_desc_get_irq_data(desc); d; d = d->parent_data) {
|
||||||
|
if (d->chip && d->chip->irq_force_complete_move) {
|
||||||
|
d->chip->irq_force_complete_move(d);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void irq_move_masked_irq(struct irq_data *idata)
|
void irq_move_masked_irq(struct irq_data *idata)
|
||||||
{
|
{
|
||||||
struct irq_desc *desc = irq_data_to_desc(idata);
|
struct irq_desc *desc = irq_data_to_desc(idata);
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue