From 51906e779f2b13b38f8153774c4c7163d412ffd9 Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Mon, 19 Nov 2012 16:01:29 +0100 Subject: x86/MSI: Support multiple MSIs in presense of IRQ remapping The MSI specification has several constraints in comparison with MSI-X, most notable of them is the inability to configure MSIs independently. As a result, it is impossible to dispatch interrupts from different queues to different CPUs. This is largely devalues the support of multiple MSIs in SMP systems. Also, a necessity to allocate a contiguous block of vector numbers for devices capable of multiple MSIs might cause a considerable pressure on x86 interrupt vector allocator and could lead to fragmentation of the interrupt vectors space. This patch overcomes both drawbacks in presense of IRQ remapping and lets devices take advantage of multiple queues and per-IRQ affinity assignments. Signed-off-by: Alexander Gordeev Cc: Bjorn Helgaas Cc: Suresh Siddha Cc: Yinghai Lu Cc: Matthew Wilcox Cc: Jeff Garzik Cc: Linus Torvalds Cc: Andrew Morton Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/c8bd86ff56b5fc118257436768aaa04489ac0a4c.1353324359.git.agordeev@redhat.com Signed-off-by: Ingo Molnar --- include/linux/irq.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/irq.h b/include/linux/irq.h index fdf2c4a238cc..1eab99111e94 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -528,6 +528,8 @@ extern int irq_set_handler_data(unsigned int irq, void *data); extern int irq_set_chip_data(unsigned int irq, void *data); extern int irq_set_irq_type(unsigned int irq, unsigned int type); extern int irq_set_msi_desc(unsigned int irq, struct msi_desc *entry); +extern int irq_set_msi_desc_off(unsigned int irq_base, unsigned int irq_offset, + struct msi_desc *entry); extern struct irq_data *irq_get_irq_data(unsigned int irq); static inline struct irq_chip *irq_get_chip(unsigned int irq) @@ -590,6 +592,9 @@ int __irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node, #define irq_alloc_desc_from(from, node) \ irq_alloc_descs(-1, from, 1, node) +#define irq_alloc_descs_from(from, cnt, node) \ + irq_alloc_descs(-1, from, cnt, node) + void irq_free_descs(unsigned int irq, unsigned int cnt); int irq_reserve_irqs(unsigned int from, unsigned int cnt); -- cgit v1.2.3 From 08261d87f7d1b6253ab3223756625a5c74532293 Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Mon, 19 Nov 2012 16:02:10 +0100 Subject: PCI/MSI: Enable multiple MSIs with pci_enable_msi_block_auto() The new function pci_enable_msi_block_auto() tries to allocate maximum possible number of MSIs up to the number the device supports. It generalizes a pattern when pci_enable_msi_block() is contiguously called until it succeeds or fails. Opposite to pci_enable_msi_block() which takes the number of MSIs to allocate as a input parameter, pci_enable_msi_block_auto() could be used by device drivers to obtain the number of assigned MSIs and the number of MSIs the device supports. Signed-off-by: Alexander Gordeev Acked-by: Bjorn Helgaas Cc: Suresh Siddha Cc: Yinghai Lu Cc: Matthew Wilcox Cc: Jeff Garzik Cc: Linus Torvalds Cc: Andrew Morton Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/c3de2419df94a0f95ca1a6f755afc421486455e6.1353324359.git.agordeev@redhat.com Signed-off-by: Ingo Molnar --- Documentation/PCI/MSI-HOWTO.txt | 37 ++++++++++++++++++++++++++++++++----- drivers/pci/msi.c | 26 ++++++++++++++++++++++++++ include/linux/pci.h | 7 +++++++ 3 files changed, 65 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/Documentation/PCI/MSI-HOWTO.txt b/Documentation/PCI/MSI-HOWTO.txt index 53e6fca146d7..a09178086c30 100644 --- a/Documentation/PCI/MSI-HOWTO.txt +++ b/Documentation/PCI/MSI-HOWTO.txt @@ -127,15 +127,42 @@ on the number of vectors that can be allocated; pci_enable_msi_block() returns as soon as it finds any constraint that doesn't allow the call to succeed. -4.2.3 pci_disable_msi +4.2.3 pci_enable_msi_block_auto + +int pci_enable_msi_block_auto(struct pci_dev *dev, unsigned int *count) + +This variation on pci_enable_msi() call allows a device driver to request +the maximum possible number of MSIs. The MSI specification only allows +interrupts to be allocated in powers of two, up to a maximum of 2^5 (32). + +If this function returns a positive number, it indicates that it has +succeeded and the returned value is the number of allocated interrupts. In +this case, the function enables MSI on this device and updates dev->irq to +be the lowest of the new interrupts assigned to it. The other interrupts +assigned to the device are in the range dev->irq to dev->irq + returned +value - 1. + +If this function returns a negative number, it indicates an error and +the driver should not attempt to request any more MSI interrupts for +this device. + +If the device driver needs to know the number of interrupts the device +supports it can pass the pointer count where that number is stored. The +device driver must decide what action to take if pci_enable_msi_block_auto() +succeeds, but returns a value less than the number of interrupts supported. +If the device driver does not need to know the number of interrupts +supported, it can set the pointer count to NULL. + +4.2.4 pci_disable_msi void pci_disable_msi(struct pci_dev *dev) This function should be used to undo the effect of pci_enable_msi() or -pci_enable_msi_block(). Calling it restores dev->irq to the pin-based -interrupt number and frees the previously allocated message signaled -interrupt(s). The interrupt may subsequently be assigned to another -device, so drivers should not cache the value of dev->irq. +pci_enable_msi_block() or pci_enable_msi_block_auto(). Calling it restores +dev->irq to the pin-based interrupt number and frees the previously +allocated message signaled interrupt(s). The interrupt may subsequently be +assigned to another device, so drivers should not cache the value of +dev->irq. Before calling this function, a device driver must always call free_irq() on any interrupt for which it previously called request_irq(). diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index 5099636a6e5f..00cc78c7aa04 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -845,6 +845,32 @@ int pci_enable_msi_block(struct pci_dev *dev, unsigned int nvec) } EXPORT_SYMBOL(pci_enable_msi_block); +int pci_enable_msi_block_auto(struct pci_dev *dev, unsigned int *maxvec) +{ + int ret, pos, nvec; + u16 msgctl; + + pos = pci_find_capability(dev, PCI_CAP_ID_MSI); + if (!pos) + return -EINVAL; + + pci_read_config_word(dev, pos + PCI_MSI_FLAGS, &msgctl); + ret = 1 << ((msgctl & PCI_MSI_FLAGS_QMASK) >> 1); + + if (maxvec) + *maxvec = ret; + + do { + nvec = ret; + ret = pci_enable_msi_block(dev, nvec); + } while (ret > 0); + + if (ret < 0) + return ret; + return nvec; +} +EXPORT_SYMBOL(pci_enable_msi_block_auto); + void pci_msi_shutdown(struct pci_dev *dev) { struct msi_desc *desc; diff --git a/include/linux/pci.h b/include/linux/pci.h index 15472d691ee6..6fa4dd2a3b9e 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1101,6 +1101,12 @@ static inline int pci_enable_msi_block(struct pci_dev *dev, unsigned int nvec) return -1; } +static inline int +pci_enable_msi_block_auto(struct pci_dev *dev, unsigned int *maxvec) +{ + return -1; +} + static inline void pci_msi_shutdown(struct pci_dev *dev) { } static inline void pci_disable_msi(struct pci_dev *dev) @@ -1132,6 +1138,7 @@ static inline int pci_msi_enabled(void) } #else extern int pci_enable_msi_block(struct pci_dev *dev, unsigned int nvec); +extern int pci_enable_msi_block_auto(struct pci_dev *dev, unsigned int *maxvec); extern void pci_msi_shutdown(struct pci_dev *dev); extern void pci_disable_msi(struct pci_dev *dev); extern int pci_msix_table_size(struct pci_dev *dev); -- cgit v1.2.3 From 5afba62cc8a16716508605e02c1b02ee5f969184 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 26 Sep 2012 12:44:38 +0200 Subject: x86, msi: Use IRQ remapping specific setup_msi_irqs routine Use seperate routines to setup MSI IRQs for both irq_remapping_enabled cases. Signed-off-by: Joerg Roedel Acked-by: Sebastian Andrzej Siewior Reviewed-by: Konrad Rzeszutek Wilk --- arch/x86/include/asm/irq_remapping.h | 12 ---- arch/x86/include/asm/pci.h | 3 + arch/x86/kernel/apic/io_apic.c | 104 ++++---------------------------- drivers/iommu/irq_remapping.c | 112 ++++++++++++++++++++++++++++++++++- include/linux/irq.h | 3 + 5 files changed, 125 insertions(+), 109 deletions(-) (limited to 'include') diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h index 5fb9bbbd2f14..0ee1e88bd17a 100644 --- a/arch/x86/include/asm/irq_remapping.h +++ b/arch/x86/include/asm/irq_remapping.h @@ -47,9 +47,6 @@ extern void free_remapped_irq(int irq); extern void compose_remapped_msi_msg(struct pci_dev *pdev, unsigned int irq, unsigned int dest, struct msi_msg *msg, u8 hpet_id); -extern int msi_alloc_remapped_irq(struct pci_dev *pdev, int irq, int nvec); -extern int msi_setup_remapped_irq(struct pci_dev *pdev, unsigned int irq, - int index, int sub_handle); extern int setup_hpet_msi_remapped(unsigned int irq, unsigned int id); #else /* CONFIG_IRQ_REMAP */ @@ -83,15 +80,6 @@ static inline void compose_remapped_msi_msg(struct pci_dev *pdev, struct msi_msg *msg, u8 hpet_id) { } -static inline int msi_alloc_remapped_irq(struct pci_dev *pdev, int irq, int nvec) -{ - return -ENODEV; -} -static inline int msi_setup_remapped_irq(struct pci_dev *pdev, unsigned int irq, - int index, int sub_handle) -{ - return -ENODEV; -} static inline int setup_hpet_msi_remapped(unsigned int irq, unsigned int id) { return -ENODEV; diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h index dba7805176bf..c28fd02f4bf7 100644 --- a/arch/x86/include/asm/pci.h +++ b/arch/x86/include/asm/pci.h @@ -121,9 +121,12 @@ static inline void x86_restore_msi_irqs(struct pci_dev *dev, int irq) #define arch_teardown_msi_irq x86_teardown_msi_irq #define arch_restore_msi_irqs x86_restore_msi_irqs /* implemented in arch/x86/kernel/apic/io_apic. */ +struct msi_desc; int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type); void native_teardown_msi_irq(unsigned int irq); void native_restore_msi_irqs(struct pci_dev *dev, int irq); +int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, + unsigned int irq_base, unsigned int irq_offset); /* default to the implementation in drivers/lib/msi.c */ #define HAVE_DEFAULT_MSI_TEARDOWN_IRQS #define HAVE_DEFAULT_MSI_RESTORE_IRQS diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index e7b87630c13d..d4b045e018fb 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -3066,7 +3066,7 @@ void destroy_irq(unsigned int irq) free_irq_at(irq, cfg); } -static inline void destroy_irqs(unsigned int irq, unsigned int count) +void destroy_irqs(unsigned int irq, unsigned int count) { unsigned int i; @@ -3165,8 +3165,8 @@ static struct irq_chip msi_chip = { .irq_retrigger = ioapic_retrigger_irq, }; -static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, - unsigned int irq_base, unsigned int irq_offset) +int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, + unsigned int irq_base, unsigned int irq_offset) { struct irq_chip *chip = &msi_chip; struct msi_msg msg; @@ -3198,44 +3198,28 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, return 0; } -int setup_msix_irqs(struct pci_dev *dev, int nvec) +int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) { - int node, ret, sub_handle, index = 0; unsigned int irq, irq_want; struct msi_desc *msidesc; + int node, ret; + + /* Multiple MSI vectors only supported with interrupt remapping */ + if (type == PCI_CAP_ID_MSI && nvec > 1) + return 1; node = dev_to_node(&dev->dev); irq_want = nr_irqs_gsi; - sub_handle = 0; list_for_each_entry(msidesc, &dev->msi_list, list) { irq = create_irq_nr(irq_want, node); if (irq == 0) return -ENOSPC; + irq_want = irq + 1; - if (!irq_remapping_enabled) - goto no_ir; - if (!sub_handle) { - /* - * allocate the consecutive block of IRTE's - * for 'nvec' - */ - index = msi_alloc_remapped_irq(dev, irq, nvec); - if (index < 0) { - ret = index; - goto error; - } - } else { - ret = msi_setup_remapped_irq(dev, irq, index, - sub_handle); - if (ret < 0) - goto error; - } -no_ir: ret = setup_msi_irq(dev, msidesc, irq, 0); if (ret < 0) goto error; - sub_handle++; } return 0; @@ -3244,74 +3228,6 @@ error: return ret; } -int setup_msi_irqs(struct pci_dev *dev, int nvec) -{ - int node, ret, sub_handle, index = 0; - unsigned int irq; - struct msi_desc *msidesc; - - if (nvec > 1 && !irq_remapping_enabled) - return 1; - - nvec = __roundup_pow_of_two(nvec); - - WARN_ON(!list_is_singular(&dev->msi_list)); - msidesc = list_entry(dev->msi_list.next, struct msi_desc, list); - WARN_ON(msidesc->irq); - WARN_ON(msidesc->msi_attrib.multiple); - - node = dev_to_node(&dev->dev); - irq = __create_irqs(nr_irqs_gsi, nvec, node); - if (irq == 0) - return -ENOSPC; - - if (!irq_remapping_enabled) { - ret = setup_msi_irq(dev, msidesc, irq, 0); - if (ret < 0) - goto error; - return 0; - } - - msidesc->msi_attrib.multiple = ilog2(nvec); - for (sub_handle = 0; sub_handle < nvec; sub_handle++) { - if (!sub_handle) { - index = msi_alloc_remapped_irq(dev, irq, nvec); - if (index < 0) { - ret = index; - goto error; - } - } else { - ret = msi_setup_remapped_irq(dev, irq + sub_handle, - index, sub_handle); - if (ret < 0) - goto error; - } - ret = setup_msi_irq(dev, msidesc, irq, sub_handle); - if (ret < 0) - goto error; - } - return 0; - -error: - destroy_irqs(irq, nvec); - - /* - * Restore altered MSI descriptor fields and prevent just destroyed - * IRQs from tearing down again in default_teardown_msi_irqs() - */ - msidesc->irq = 0; - msidesc->msi_attrib.multiple = 0; - - return ret; -} - -int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) -{ - if (type == PCI_CAP_ID_MSI) - return setup_msi_irqs(dev, nvec); - return setup_msix_irqs(dev, nvec); -} - void native_teardown_msi_irq(unsigned int irq) { destroy_irq(irq); diff --git a/drivers/iommu/irq_remapping.c b/drivers/iommu/irq_remapping.c index 0baad3b9ecba..20f04b67efd2 100644 --- a/drivers/iommu/irq_remapping.c +++ b/drivers/iommu/irq_remapping.c @@ -4,6 +4,8 @@ #include #include #include +#include +#include #include #include @@ -21,6 +23,10 @@ int no_x2apic_optout; static struct irq_remap_ops *remap_ops; +static int msi_alloc_remapped_irq(struct pci_dev *pdev, int irq, int nvec); +static int msi_setup_remapped_irq(struct pci_dev *pdev, unsigned int irq, + int index, int sub_handle); + static void irq_remapping_disable_io_apic(void) { /* @@ -34,9 +40,109 @@ static void irq_remapping_disable_io_apic(void) disconnect_bsp_APIC(0); } +static int do_setup_msi_irqs(struct pci_dev *dev, int nvec) +{ + int node, ret, sub_handle, index = 0; + unsigned int irq; + struct msi_desc *msidesc; + + nvec = __roundup_pow_of_two(nvec); + + WARN_ON(!list_is_singular(&dev->msi_list)); + msidesc = list_entry(dev->msi_list.next, struct msi_desc, list); + WARN_ON(msidesc->irq); + WARN_ON(msidesc->msi_attrib.multiple); + + node = dev_to_node(&dev->dev); + irq = __create_irqs(get_nr_irqs_gsi(), nvec, node); + if (irq == 0) + return -ENOSPC; + + msidesc->msi_attrib.multiple = ilog2(nvec); + for (sub_handle = 0; sub_handle < nvec; sub_handle++) { + if (!sub_handle) { + index = msi_alloc_remapped_irq(dev, irq, nvec); + if (index < 0) { + ret = index; + goto error; + } + } else { + ret = msi_setup_remapped_irq(dev, irq + sub_handle, + index, sub_handle); + if (ret < 0) + goto error; + } + ret = setup_msi_irq(dev, msidesc, irq, sub_handle); + if (ret < 0) + goto error; + } + return 0; + +error: + destroy_irqs(irq, nvec); + + /* + * Restore altered MSI descriptor fields and prevent just destroyed + * IRQs from tearing down again in default_teardown_msi_irqs() + */ + msidesc->irq = 0; + msidesc->msi_attrib.multiple = 0; + + return ret; +} + +static int do_setup_msix_irqs(struct pci_dev *dev, int nvec) +{ + int node, ret, sub_handle, index = 0; + struct msi_desc *msidesc; + unsigned int irq; + + node = dev_to_node(&dev->dev); + irq = get_nr_irqs_gsi(); + sub_handle = 0; + + list_for_each_entry(msidesc, &dev->msi_list, list) { + + irq = create_irq_nr(irq, node); + if (irq == 0) + return -1; + + if (sub_handle == 0) + ret = index = msi_alloc_remapped_irq(dev, irq, nvec); + else + ret = msi_setup_remapped_irq(dev, irq, index, sub_handle); + + if (ret < 0) + goto error; + + ret = setup_msi_irq(dev, msidesc, irq, 0); + if (ret < 0) + goto error; + + sub_handle += 1; + irq += 1; + } + + return 0; + +error: + destroy_irq(irq); + return ret; +} + +static int irq_remapping_setup_msi_irqs(struct pci_dev *dev, + int nvec, int type) +{ + if (type == PCI_CAP_ID_MSI) + return do_setup_msi_irqs(dev, nvec); + else + return do_setup_msix_irqs(dev, nvec); +} + static void __init irq_remapping_modify_x86_ops(void) { x86_io_apic_ops.disable = irq_remapping_disable_io_apic; + x86_msi.setup_msi_irqs = irq_remapping_setup_msi_irqs; x86_msi.setup_hpet_msi = setup_hpet_msi_remapped; } @@ -186,7 +292,7 @@ void compose_remapped_msi_msg(struct pci_dev *pdev, remap_ops->compose_msi_msg(pdev, irq, dest, msg, hpet_id); } -int msi_alloc_remapped_irq(struct pci_dev *pdev, int irq, int nvec) +static int msi_alloc_remapped_irq(struct pci_dev *pdev, int irq, int nvec) { if (!remap_ops || !remap_ops->msi_alloc_irq) return -ENODEV; @@ -194,8 +300,8 @@ int msi_alloc_remapped_irq(struct pci_dev *pdev, int irq, int nvec) return remap_ops->msi_alloc_irq(pdev, irq, nvec); } -int msi_setup_remapped_irq(struct pci_dev *pdev, unsigned int irq, - int index, int sub_handle) +static int msi_setup_remapped_irq(struct pci_dev *pdev, unsigned int irq, + int index, int sub_handle) { if (!remap_ops || !remap_ops->msi_setup_irq) return -ENODEV; diff --git a/include/linux/irq.h b/include/linux/irq.h index 1eab99111e94..bc4e06611958 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -509,8 +509,11 @@ static inline void irq_set_percpu_devid_flags(unsigned int irq) /* Handle dynamic irq creation and destruction */ extern unsigned int create_irq_nr(unsigned int irq_want, int node); +extern unsigned int __create_irqs(unsigned int from, unsigned int count, + int node); extern int create_irq(void); extern void destroy_irq(unsigned int irq); +extern void destroy_irqs(unsigned int irq, unsigned int count); /* * Dynamic irq helper functions. Obsolete. Use irq_alloc_desc* and -- cgit v1.2.3