summaryrefslogtreecommitdiff
path: root/drivers/pci
diff options
context:
space:
mode:
authorLeon Romanovsky <leonro@nvidia.com>2021-04-04 10:22:18 +0300
committerLeon Romanovsky <leonro@nvidia.com>2021-04-04 10:26:30 +0300
commitc3d5c2d96d69f2578d6fbf66e39cc2cf840d9812 (patch)
tree04d5f784853bafe12c034f35a9a013431a746217 /drivers/pci
parent26bf30902c10473ba38f220d3401a61c56d8db3b (diff)
PCI/IOV: Add sysfs MSI-X vector assignment interface
A typical cloud provider SR-IOV use case is to create many VFs for use by guest VMs. The VFs may not be assigned to a VM until a customer requests a VM of a certain size, e.g., number of CPUs. A VF may need MSI-X vectors proportional to the number of CPUs in the VM, but there is no standard way to change the number of MSI-X vectors supported by a VF. Some Mellanox ConnectX devices support dynamic assignment of MSI-X vectors to SR-IOV VFs. This can be done by the PF driver after VFs are enabled, and it can be done without affecting VFs that are already in use. The hardware supports a limited pool of MSI-X vectors that can be assigned to the PF or to individual VFs. This is device-specific behavior that requires support in the PF driver. Add a read-only "sriov_vf_total_msix" sysfs file for the PF and a writable "sriov_vf_msix_count" file for each VF. Management software may use these to learn how many MSI-X vectors are available and to dynamically assign them to VFs before the VFs are passed through to a VM. If the PF driver implements the ->sriov_get_vf_total_msix() callback, "sriov_vf_total_msix" contains the total number of MSI-X vectors available for distribution among VFs. If no driver is bound to the VF, writing "N" to "sriov_vf_msix_count" uses the PF driver ->sriov_set_msix_vec_count() callback to assign "N" MSI-X vectors to the VF. When a VF driver subsequently reads the MSI-X Message Control register, it will see the new Table Size "N". Link: https://lore.kernel.org/linux-pci/20210314124256.70253-2-leon@kernel.org Acked-by: Bjorn Helgaas <bhelgaas@google.com> Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
Diffstat (limited to 'drivers/pci')
-rw-r--r--drivers/pci/iov.c102
-rw-r--r--drivers/pci/pci-sysfs.c3
-rw-r--r--drivers/pci/pci.h3
3 files changed, 100 insertions, 8 deletions
diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c
index 4afd4ee4f7f0..afc06e6ce115 100644
--- a/drivers/pci/iov.c
+++ b/drivers/pci/iov.c
@@ -31,6 +31,7 @@ int pci_iov_virtfn_devfn(struct pci_dev *dev, int vf_id)
return (dev->devfn + dev->sriov->offset +
dev->sriov->stride * vf_id) & 0xff;
}
+EXPORT_SYMBOL_GPL(pci_iov_virtfn_devfn);
/*
* Per SR-IOV spec sec 3.3.10 and 3.3.11, First VF Offset and VF Stride may
@@ -157,6 +158,92 @@ failed:
return rc;
}
+#ifdef CONFIG_PCI_MSI
+static ssize_t sriov_vf_total_msix_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct pci_dev *pdev = to_pci_dev(dev);
+ u32 vf_total_msix = 0;
+
+ device_lock(dev);
+ if (!pdev->driver || !pdev->driver->sriov_get_vf_total_msix)
+ goto unlock;
+
+ vf_total_msix = pdev->driver->sriov_get_vf_total_msix(pdev);
+unlock:
+ device_unlock(dev);
+ return sysfs_emit(buf, "%u\n", vf_total_msix);
+}
+static DEVICE_ATTR_RO(sriov_vf_total_msix);
+
+static ssize_t sriov_vf_msix_count_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct pci_dev *vf_dev = to_pci_dev(dev);
+ struct pci_dev *pdev = pci_physfn(vf_dev);
+ int val, ret;
+
+ ret = kstrtoint(buf, 0, &val);
+ if (ret)
+ return ret;
+
+ if (val < 0)
+ return -EINVAL;
+
+ device_lock(&pdev->dev);
+ if (!pdev->driver || !pdev->driver->sriov_set_msix_vec_count) {
+ ret = -EOPNOTSUPP;
+ goto err_pdev;
+ }
+
+ device_lock(&vf_dev->dev);
+ if (vf_dev->driver) {
+ /*
+ * A driver is already attached to this VF and has configured
+ * itself based on the current MSI-X vector count. Changing
+ * the vector size could mess up the driver, so block it.
+ */
+ ret = -EBUSY;
+ goto err_dev;
+ }
+
+ ret = pdev->driver->sriov_set_msix_vec_count(vf_dev, val);
+
+err_dev:
+ device_unlock(&vf_dev->dev);
+err_pdev:
+ device_unlock(&pdev->dev);
+ return ret ? : count;
+}
+static DEVICE_ATTR_WO(sriov_vf_msix_count);
+#endif
+
+static struct attribute *sriov_vf_dev_attrs[] = {
+#ifdef CONFIG_PCI_MSI
+ &dev_attr_sriov_vf_msix_count.attr,
+#endif
+ NULL,
+};
+
+static umode_t sriov_vf_attrs_are_visible(struct kobject *kobj,
+ struct attribute *a, int n)
+{
+ struct device *dev = kobj_to_dev(kobj);
+ struct pci_dev *pdev = to_pci_dev(dev);
+
+ if (!pdev->is_virtfn)
+ return 0;
+
+ return a->mode;
+}
+
+const struct attribute_group sriov_vf_dev_attr_group = {
+ .attrs = sriov_vf_dev_attrs,
+ .is_visible = sriov_vf_attrs_are_visible,
+};
+
int pci_iov_add_virtfn(struct pci_dev *dev, int id)
{
int i;
@@ -400,18 +487,21 @@ static DEVICE_ATTR_RO(sriov_stride);
static DEVICE_ATTR_RO(sriov_vf_device);
static DEVICE_ATTR_RW(sriov_drivers_autoprobe);
-static struct attribute *sriov_dev_attrs[] = {
+static struct attribute *sriov_pf_dev_attrs[] = {
&dev_attr_sriov_totalvfs.attr,
&dev_attr_sriov_numvfs.attr,
&dev_attr_sriov_offset.attr,
&dev_attr_sriov_stride.attr,
&dev_attr_sriov_vf_device.attr,
&dev_attr_sriov_drivers_autoprobe.attr,
+#ifdef CONFIG_PCI_MSI
+ &dev_attr_sriov_vf_total_msix.attr,
+#endif
NULL,
};
-static umode_t sriov_attrs_are_visible(struct kobject *kobj,
- struct attribute *a, int n)
+static umode_t sriov_pf_attrs_are_visible(struct kobject *kobj,
+ struct attribute *a, int n)
{
struct device *dev = kobj_to_dev(kobj);
@@ -421,9 +511,9 @@ static umode_t sriov_attrs_are_visible(struct kobject *kobj,
return a->mode;
}
-const struct attribute_group sriov_dev_attr_group = {
- .attrs = sriov_dev_attrs,
- .is_visible = sriov_attrs_are_visible,
+const struct attribute_group sriov_pf_dev_attr_group = {
+ .attrs = sriov_pf_dev_attrs,
+ .is_visible = sriov_pf_attrs_are_visible,
};
int __weak pcibios_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c
index f8afd54ca3e1..a6b8fbbba6d2 100644
--- a/drivers/pci/pci-sysfs.c
+++ b/drivers/pci/pci-sysfs.c
@@ -1567,7 +1567,8 @@ static const struct attribute_group *pci_dev_attr_groups[] = {
&pci_dev_attr_group,
&pci_dev_hp_attr_group,
#ifdef CONFIG_PCI_IOV
- &sriov_dev_attr_group,
+ &sriov_pf_dev_attr_group,
+ &sriov_vf_dev_attr_group,
#endif
&pci_bridge_attr_group,
&pcie_dev_attr_group,
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index ef7c4661314f..afb87b917f07 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -501,7 +501,8 @@ void pci_iov_update_resource(struct pci_dev *dev, int resno);
resource_size_t pci_sriov_resource_alignment(struct pci_dev *dev, int resno);
void pci_restore_iov_state(struct pci_dev *dev);
int pci_iov_bus_range(struct pci_bus *bus);
-extern const struct attribute_group sriov_dev_attr_group;
+extern const struct attribute_group sriov_pf_dev_attr_group;
+extern const struct attribute_group sriov_vf_dev_attr_group;
#else
static inline int pci_iov_init(struct pci_dev *dev)
{