Skip to content

Commit

Permalink
iommu/vt-d: Enable posted mode for device MSIs
Browse files Browse the repository at this point in the history
With posted MSI feature enabled on the CPU side, iommu interrupt
remapping table entries (IRTEs) for device MSI/x can be allocated,
activated, and programed in posted mode. This means that IRTEs are
linked with their respective PIDs of the target CPU.

Handlers for the posted MSI notification vector will de-multiplex
device MSI handlers. CPU notifications are coalesced if interrupts
arrive at a high frequency.

Posted interrupts are only used for device MSI and not for legacy devices
(IO/APIC, HPET).

Introduce a new irq_chip for posted MSIs, which has a dummy irq_ack()
callback as EOI is performed in the notification handler once.

When posted MSI is enabled, MSI domain/chip hierarchy will look like
this example:

domain:  IR-PCI-MSIX-0000:50:00.0-12
 hwirq:   0x29
 chip:    IR-PCI-MSIX-0000:50:00.0
  flags:   0x430
             IRQCHIP_SKIP_SET_WAKE
             IRQCHIP_ONESHOT_SAFE
 parent:
    domain:  INTEL-IR-10-13
     hwirq:   0x2d0000
     chip:    INTEL-IR-POST
      flags:   0x0
     parent:
        domain:  VECTOR
         hwirq:   0x77
         chip:    APIC

Suggested-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/20240423174114.526704-13-jacob.jun.pan@linux.intel.com
  • Loading branch information
Jacob Pan authored and Thomas Gleixner committed Apr 29, 2024
1 parent be9be07 commit ed1e48e
Showing 1 changed file with 109 additions and 4 deletions.
113 changes: 109 additions & 4 deletions drivers/iommu/intel/irq_remapping.c
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include <asm/cpu.h>
#include <asm/irq_remapping.h>
#include <asm/pci-direct.h>
#include <asm/posted_intr.h>

#include "iommu.h"
#include "../irq_remapping.h"
Expand Down Expand Up @@ -49,6 +50,7 @@ struct irq_2_iommu {
u16 sub_handle;
u8 irte_mask;
enum irq_mode mode;
bool posted_msi;
};

struct intel_ir_data {
Expand Down Expand Up @@ -1118,6 +1120,14 @@ static void prepare_irte(struct irte *irte, int vector, unsigned int dest)
irte->redir_hint = 1;
}

static void prepare_irte_posted(struct irte *irte)
{
memset(irte, 0, sizeof(*irte));

irte->present = 1;
irte->p_pst = 1;
}

struct irq_remap_ops intel_irq_remap_ops = {
.prepare = intel_prepare_irq_remapping,
.enable = intel_enable_irq_remapping,
Expand All @@ -1126,6 +1136,47 @@ struct irq_remap_ops intel_irq_remap_ops = {
.enable_faulting = enable_drhd_fault_handling,
};

#ifdef CONFIG_X86_POSTED_MSI

static phys_addr_t get_pi_desc_addr(struct irq_data *irqd)
{
int cpu = cpumask_first(irq_data_get_effective_affinity_mask(irqd));

if (WARN_ON(cpu >= nr_cpu_ids))
return 0;

return __pa(per_cpu_ptr(&posted_msi_pi_desc, cpu));
}

static void intel_ir_reconfigure_irte_posted(struct irq_data *irqd)
{
struct intel_ir_data *ir_data = irqd->chip_data;
struct irte *irte = &ir_data->irte_entry;
struct irte irte_pi;
u64 pid_addr;

pid_addr = get_pi_desc_addr(irqd);

if (!pid_addr) {
pr_warn("Failed to setup IRQ %d for posted mode", irqd->irq);
return;
}

memset(&irte_pi, 0, sizeof(irte_pi));

/* The shared IRTE already be set up as posted during alloc_irte */
dmar_copy_shared_irte(&irte_pi, irte);

irte_pi.pda_l = (pid_addr >> (32 - PDA_LOW_BIT)) & ~(-1UL << PDA_LOW_BIT);
irte_pi.pda_h = (pid_addr >> 32) & ~(-1UL << PDA_HIGH_BIT);

modify_irte(&ir_data->irq_2_iommu, &irte_pi);
}

#else
static inline void intel_ir_reconfigure_irte_posted(struct irq_data *irqd) {}
#endif

static void intel_ir_reconfigure_irte(struct irq_data *irqd, bool force)
{
struct intel_ir_data *ir_data = irqd->chip_data;
Expand All @@ -1139,8 +1190,9 @@ static void intel_ir_reconfigure_irte(struct irq_data *irqd, bool force)
irte->vector = cfg->vector;
irte->dest_id = IRTE_DEST(cfg->dest_apicid);

/* Update the hardware only if the interrupt is in remapped mode. */
if (force || ir_data->irq_2_iommu.mode == IRQ_REMAPPING)
if (ir_data->irq_2_iommu.posted_msi)
intel_ir_reconfigure_irte_posted(irqd);
else if (force || ir_data->irq_2_iommu.mode == IRQ_REMAPPING)
modify_irte(&ir_data->irq_2_iommu, irte);
}

Expand Down Expand Up @@ -1194,7 +1246,7 @@ static int intel_ir_set_vcpu_affinity(struct irq_data *data, void *info)
struct intel_ir_data *ir_data = data->chip_data;
struct vcpu_data *vcpu_pi_info = info;

/* stop posting interrupts, back to remapping mode */
/* stop posting interrupts, back to the default mode */
if (!vcpu_pi_info) {
modify_irte(&ir_data->irq_2_iommu, &ir_data->irte_entry);
} else {
Expand Down Expand Up @@ -1233,6 +1285,49 @@ static struct irq_chip intel_ir_chip = {
.irq_set_vcpu_affinity = intel_ir_set_vcpu_affinity,
};

/*
* With posted MSIs, all vectors are multiplexed into a single notification
* vector. Devices MSIs are then dispatched in a demux loop where
* EOIs can be coalesced as well.
*
* "INTEL-IR-POST" IRQ chip does not do EOI on ACK, thus the dummy irq_ack()
* function. Instead EOI is performed by the posted interrupt notification
* handler.
*
* For the example below, 3 MSIs are coalesced into one CPU notification. Only
* one apic_eoi() is needed.
*
* __sysvec_posted_msi_notification()
* irq_enter();
* handle_edge_irq()
* irq_chip_ack_parent()
* dummy(); // No EOI
* handle_irq_event()
* driver_handler()
* handle_edge_irq()
* irq_chip_ack_parent()
* dummy(); // No EOI
* handle_irq_event()
* driver_handler()
* handle_edge_irq()
* irq_chip_ack_parent()
* dummy(); // No EOI
* handle_irq_event()
* driver_handler()
* apic_eoi()
* irq_exit()
*/

static void dummy_ack(struct irq_data *d) { }

static struct irq_chip intel_ir_chip_post_msi = {
.name = "INTEL-IR-POST",
.irq_ack = dummy_ack,
.irq_set_affinity = intel_ir_set_affinity,
.irq_compose_msi_msg = intel_ir_compose_msi_msg,
.irq_set_vcpu_affinity = intel_ir_set_vcpu_affinity,
};

static void fill_msi_msg(struct msi_msg *msg, u32 index, u32 subhandle)
{
memset(msg, 0, sizeof(*msg));
Expand Down Expand Up @@ -1274,6 +1369,11 @@ static void intel_irq_remapping_prepare_irte(struct intel_ir_data *data,
break;
case X86_IRQ_ALLOC_TYPE_PCI_MSI:
case X86_IRQ_ALLOC_TYPE_PCI_MSIX:
if (posted_msi_supported()) {
prepare_irte_posted(irte);
data->irq_2_iommu.posted_msi = 1;
}

set_msi_sid(irte,
pci_real_dma_dev(msi_desc_to_pci_dev(info->desc)));
break;
Expand Down Expand Up @@ -1361,7 +1461,12 @@ static int intel_irq_remapping_alloc(struct irq_domain *domain,

irq_data->hwirq = (index << 16) + i;
irq_data->chip_data = ird;
irq_data->chip = &intel_ir_chip;
if (posted_msi_supported() &&
((info->type == X86_IRQ_ALLOC_TYPE_PCI_MSI) ||
(info->type == X86_IRQ_ALLOC_TYPE_PCI_MSIX)))
irq_data->chip = &intel_ir_chip_post_msi;
else
irq_data->chip = &intel_ir_chip;
intel_irq_remapping_prepare_irte(ird, irq_cfg, info, index, i);
irq_set_status_flags(virq + i, IRQ_MOVE_PCNTXT);
}
Expand Down

0 comments on commit ed1e48e

Please sign in to comment.