Skip to content

Commit a2f67d5

Browse files
Ian Munsiempe
authored andcommitted
cxl: Add support for interrupts on the Mellanox CX4
The Mellanox CX4 in cxl mode uses a hybrid interrupt model, where interrupts are routed from the networking hardware to the XSL using the MSIX table, and from there will be transformed back into an MSIX interrupt using the cxl style interrupts (i.e. using IVTE entries and ranges to map a PE and AFU interrupt number to an MSIX address). We want to hide the implementation details of cxl interrupts as much as possible. To this end, we use a special version of the MSI setup & teardown routines in the PHB while in cxl mode to allocate the cxl interrupts and configure the IVTE entries in the process element. This function does not configure the MSIX table - the CX4 card uses a custom format in that table and it would not be appropriate to fill that out in generic code. The rest of the functionality is similar to the "Full MSI-X mode" described in the CAIA, and this could be easily extended to support other adapters that use that mode in the future. The interrupts will be associated with the default context. If the maximum number of interrupts per context has been limited (e.g. by the mlx5 driver), it will automatically allocate additional kernel contexts to associate extra interrupts as required. These contexts will be started using the same WED that was used to start the default context. Signed-off-by: Ian Munsie <[email protected]> Reviewed-by: Andrew Donnellan <[email protected]> Signed-off-by: Michael Ellerman <[email protected]>
1 parent cbce091 commit a2f67d5

File tree

8 files changed

+202
-0
lines changed

8 files changed

+202
-0
lines changed

arch/powerpc/platforms/powernv/pci-cxl.c

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
*/
99

1010
#include <linux/module.h>
11+
#include <linux/msi.h>
1112
#include <asm/pci-bridge.h>
1213
#include <asm/pnv-pci.h>
1314
#include <asm/opal.h>
@@ -281,3 +282,86 @@ void pnv_cxl_disable_device(struct pci_dev *dev)
281282
cxl_pci_disable_device(dev);
282283
cxl_afu_put(afu);
283284
}
285+
286+
/*
287+
* This is a special version of pnv_setup_msi_irqs for cards in cxl mode. This
288+
* function handles setting up the IVTE entries for the XSL to use.
289+
*
290+
* We are currently not filling out the MSIX table, since the only currently
291+
* supported adapter (CX4) uses a custom MSIX table format in cxl mode and it
292+
* is up to their driver to fill that out. In the future we may fill out the
293+
* MSIX table (and change the IVTE entries to be an index to the MSIX table)
294+
* for adapters implementing the Full MSI-X mode described in the CAIA.
295+
*/
296+
int pnv_cxl_cx4_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
297+
{
298+
struct pci_controller *hose = pci_bus_to_host(pdev->bus);
299+
struct pnv_phb *phb = hose->private_data;
300+
struct msi_desc *entry;
301+
struct cxl_context *ctx = NULL;
302+
unsigned int virq;
303+
int hwirq;
304+
int afu_irq = 0;
305+
int rc;
306+
307+
if (WARN_ON(!phb) || !phb->msi_bmp.bitmap)
308+
return -ENODEV;
309+
310+
if (pdev->no_64bit_msi && !phb->msi32_support)
311+
return -ENODEV;
312+
313+
rc = cxl_cx4_setup_msi_irqs(pdev, nvec, type);
314+
if (rc)
315+
return rc;
316+
317+
for_each_pci_msi_entry(entry, pdev) {
318+
if (!entry->msi_attrib.is_64 && !phb->msi32_support) {
319+
pr_warn("%s: Supports only 64-bit MSIs\n",
320+
pci_name(pdev));
321+
return -ENXIO;
322+
}
323+
324+
hwirq = cxl_next_msi_hwirq(pdev, &ctx, &afu_irq);
325+
if (WARN_ON(hwirq <= 0))
326+
return (hwirq ? hwirq : -ENOMEM);
327+
328+
virq = irq_create_mapping(NULL, hwirq);
329+
if (virq == NO_IRQ) {
330+
pr_warn("%s: Failed to map cxl mode MSI to linux irq\n",
331+
pci_name(pdev));
332+
return -ENOMEM;
333+
}
334+
335+
rc = pnv_cxl_ioda_msi_setup(pdev, hwirq, virq);
336+
if (rc) {
337+
pr_warn("%s: Failed to setup cxl mode MSI\n", pci_name(pdev));
338+
irq_dispose_mapping(virq);
339+
return rc;
340+
}
341+
342+
irq_set_msi_desc(virq, entry);
343+
}
344+
345+
return 0;
346+
}
347+
348+
void pnv_cxl_cx4_teardown_msi_irqs(struct pci_dev *pdev)
349+
{
350+
struct pci_controller *hose = pci_bus_to_host(pdev->bus);
351+
struct pnv_phb *phb = hose->private_data;
352+
struct msi_desc *entry;
353+
irq_hw_number_t hwirq;
354+
355+
if (WARN_ON(!phb))
356+
return;
357+
358+
for_each_pci_msi_entry(entry, pdev) {
359+
if (entry->irq == NO_IRQ)
360+
continue;
361+
hwirq = virq_to_hw(entry->irq);
362+
irq_set_msi_desc(entry->irq, NULL);
363+
irq_dispose_mapping(entry->irq);
364+
}
365+
366+
cxl_cx4_teardown_msi_irqs(pdev);
367+
}

arch/powerpc/platforms/powernv/pci-ioda.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3465,6 +3465,10 @@ static const struct pci_controller_ops pnv_npu_ioda_controller_ops = {
34653465
const struct pci_controller_ops pnv_cxl_cx4_ioda_controller_ops = {
34663466
.dma_dev_setup = pnv_pci_dma_dev_setup,
34673467
.dma_bus_setup = pnv_pci_dma_bus_setup,
3468+
#ifdef CONFIG_PCI_MSI
3469+
.setup_msi_irqs = pnv_cxl_cx4_setup_msi_irqs,
3470+
.teardown_msi_irqs = pnv_cxl_cx4_teardown_msi_irqs,
3471+
#endif
34683472
.enable_device_hook = pnv_cxl_enable_device_hook,
34693473
.disable_device = pnv_cxl_disable_device,
34703474
.release_device = pnv_pci_release_device,

arch/powerpc/platforms/powernv/pci.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -247,6 +247,8 @@ extern void pnv_npu_release_ownership(struct pnv_ioda_pe *npe);
247247
/* cxl functions */
248248
extern bool pnv_cxl_enable_device_hook(struct pci_dev *dev);
249249
extern void pnv_cxl_disable_device(struct pci_dev *dev);
250+
extern int pnv_cxl_cx4_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type);
251+
extern void pnv_cxl_cx4_teardown_msi_irqs(struct pci_dev *pdev);
250252

251253

252254
/* phb ops (cxl switches these when enabling the kernel api on the phb) */

drivers/misc/cxl/api.c

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#include <misc/cxl.h>
1515
#include <linux/fs.h>
1616
#include <asm/pnv-pci.h>
17+
#include <linux/msi.h>
1718

1819
#include "cxl.h"
1920

@@ -489,3 +490,73 @@ int cxl_get_max_irqs_per_process(struct pci_dev *dev)
489490
return afu->irqs_max;
490491
}
491492
EXPORT_SYMBOL_GPL(cxl_get_max_irqs_per_process);
493+
494+
/*
495+
* This is a special interrupt allocation routine called from the PHB's MSI
496+
* setup function. When capi interrupts are allocated in this manner they must
497+
* still be associated with a running context, but since the MSI APIs have no
498+
* way to specify this we use the default context associated with the device.
499+
*
500+
* The Mellanox CX4 has a hardware limitation that restricts the maximum AFU
501+
* interrupt number, so in order to overcome this their driver informs us of
502+
* the restriction by setting the maximum interrupts per context, and we
503+
* allocate additional contexts as necessary so that we can keep the AFU
504+
* interrupt number within the supported range.
505+
*/
506+
int _cxl_cx4_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
507+
{
508+
struct cxl_context *ctx, *new_ctx, *default_ctx;
509+
int remaining;
510+
int rc;
511+
512+
ctx = default_ctx = cxl_get_context(pdev);
513+
if (WARN_ON(!default_ctx))
514+
return -ENODEV;
515+
516+
remaining = nvec;
517+
while (remaining > 0) {
518+
rc = cxl_allocate_afu_irqs(ctx, min(remaining, ctx->afu->irqs_max));
519+
if (rc) {
520+
pr_warn("%s: Failed to find enough free MSIs\n", pci_name(pdev));
521+
return rc;
522+
}
523+
remaining -= ctx->afu->irqs_max;
524+
525+
if (ctx != default_ctx && default_ctx->status == STARTED) {
526+
WARN_ON(cxl_start_context(ctx,
527+
be64_to_cpu(default_ctx->elem->common.wed),
528+
NULL));
529+
}
530+
531+
if (remaining > 0) {
532+
new_ctx = cxl_dev_context_init(pdev);
533+
if (!new_ctx) {
534+
pr_warn("%s: Failed to allocate enough contexts for MSIs\n", pci_name(pdev));
535+
return -ENOSPC;
536+
}
537+
list_add(&new_ctx->extra_irq_contexts, &ctx->extra_irq_contexts);
538+
ctx = new_ctx;
539+
}
540+
}
541+
542+
return 0;
543+
}
544+
/* Exported via cxl_base */
545+
546+
void _cxl_cx4_teardown_msi_irqs(struct pci_dev *pdev)
547+
{
548+
struct cxl_context *ctx, *pos, *tmp;
549+
550+
ctx = cxl_get_context(pdev);
551+
if (WARN_ON(!ctx))
552+
return;
553+
554+
cxl_free_afu_irqs(ctx);
555+
list_for_each_entry_safe(pos, tmp, &ctx->extra_irq_contexts, extra_irq_contexts) {
556+
cxl_stop_context(pos);
557+
cxl_free_afu_irqs(pos);
558+
list_del(&pos->extra_irq_contexts);
559+
cxl_release_context(pos);
560+
}
561+
}
562+
/* Exported via cxl_base */

drivers/misc/cxl/base.c

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,37 @@ int cxl_next_msi_hwirq(struct pci_dev *pdev, struct cxl_context **ctx, int *afu_
158158
}
159159
EXPORT_SYMBOL_GPL(cxl_next_msi_hwirq);
160160

161+
int cxl_cx4_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
162+
{
163+
int ret;
164+
struct cxl_calls *calls;
165+
166+
calls = cxl_calls_get();
167+
if (!calls)
168+
return false;
169+
170+
ret = calls->cxl_cx4_setup_msi_irqs(pdev, nvec, type);
171+
172+
cxl_calls_put(calls);
173+
174+
return ret;
175+
}
176+
EXPORT_SYMBOL_GPL(cxl_cx4_setup_msi_irqs);
177+
178+
void cxl_cx4_teardown_msi_irqs(struct pci_dev *pdev)
179+
{
180+
struct cxl_calls *calls;
181+
182+
calls = cxl_calls_get();
183+
if (!calls)
184+
return;
185+
186+
calls->cxl_cx4_teardown_msi_irqs(pdev);
187+
188+
cxl_calls_put(calls);
189+
}
190+
EXPORT_SYMBOL_GPL(cxl_cx4_teardown_msi_irqs);
191+
161192
static int __init cxl_base_init(void)
162193
{
163194
struct device_node *np;

drivers/misc/cxl/cxl.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -731,12 +731,16 @@ ssize_t cxl_pci_afu_read_err_buffer(struct cxl_afu *afu, char *buf,
731731
bool _cxl_pci_associate_default_context(struct pci_dev *dev, struct cxl_afu *afu);
732732
void _cxl_pci_disable_device(struct pci_dev *dev);
733733
int _cxl_next_msi_hwirq(struct pci_dev *pdev, struct cxl_context **ctx, int *afu_irq);
734+
int _cxl_cx4_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type);
735+
void _cxl_cx4_teardown_msi_irqs(struct pci_dev *pdev);
734736

735737
struct cxl_calls {
736738
void (*cxl_slbia)(struct mm_struct *mm);
737739
bool (*cxl_pci_associate_default_context)(struct pci_dev *dev, struct cxl_afu *afu);
738740
void (*cxl_pci_disable_device)(struct pci_dev *dev);
739741
int (*cxl_next_msi_hwirq)(struct pci_dev *pdev, struct cxl_context **ctx, int *afu_irq);
742+
int (*cxl_cx4_setup_msi_irqs)(struct pci_dev *pdev, int nvec, int type);
743+
void (*cxl_cx4_teardown_msi_irqs)(struct pci_dev *pdev);
740744

741745
struct module *owner;
742746
};

drivers/misc/cxl/main.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,8 @@ static struct cxl_calls cxl_calls = {
113113
.cxl_pci_associate_default_context = _cxl_pci_associate_default_context,
114114
.cxl_pci_disable_device = _cxl_pci_disable_device,
115115
.cxl_next_msi_hwirq = _cxl_next_msi_hwirq,
116+
.cxl_cx4_setup_msi_irqs = _cxl_cx4_setup_msi_irqs,
117+
.cxl_cx4_teardown_msi_irqs = _cxl_cx4_teardown_msi_irqs,
116118
.owner = THIS_MODULE,
117119
};
118120

include/misc/cxl-base.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,8 @@ void cxl_afu_put(struct cxl_afu *afu);
4343
void cxl_slbia(struct mm_struct *mm);
4444
bool cxl_pci_associate_default_context(struct pci_dev *dev, struct cxl_afu *afu);
4545
void cxl_pci_disable_device(struct pci_dev *dev);
46+
int cxl_cx4_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type);
47+
void cxl_cx4_teardown_msi_irqs(struct pci_dev *pdev);
4648

4749
#else /* CONFIG_CXL_BASE */
4850

@@ -52,6 +54,8 @@ static inline void cxl_afu_put(struct cxl_afu *afu) {}
5254
static inline void cxl_slbia(struct mm_struct *mm) {}
5355
static inline bool cxl_pci_associate_default_context(struct pci_dev *dev, struct cxl_afu *afu) { return false; }
5456
static inline void cxl_pci_disable_device(struct pci_dev *dev) {}
57+
static inline int cxl_cx4_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) { return -ENODEV; }
58+
static inline void cxl_cx4_teardown_msi_irqs(struct pci_dev *pdev) {}
5559

5660
#endif /* CONFIG_CXL_BASE */
5761

0 commit comments

Comments
 (0)