summaryrefslogtreecommitdiff
path: root/drivers/infiniband/hw/mlx4/main.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband/hw/mlx4/main.c')
-rw-r--r--drivers/infiniband/hw/mlx4/main.c891
1 files changed, 400 insertions, 491 deletions
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 8be6db816460..efecdf0216d8 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -45,6 +45,9 @@
#include <rdma/ib_smi.h>
#include <rdma/ib_user_verbs.h>
#include <rdma/ib_addr.h>
+#include <rdma/ib_cache.h>
+
+#include <net/bonding.h>
#include <linux/mlx4/driver.h>
#include <linux/mlx4/cmd.h>
@@ -74,13 +77,6 @@ static const char mlx4_ib_version[] =
DRV_NAME ": Mellanox ConnectX InfiniBand driver v"
DRV_VERSION " (" DRV_RELDATE ")\n";
-struct update_gid_work {
- struct work_struct work;
- union ib_gid gids[128];
- struct mlx4_ib_dev *dev;
- int port;
-};
-
static void do_slave_init(struct mlx4_ib_dev *ibdev, int slave, int do_init);
static struct workqueue_struct *wq;
@@ -93,8 +89,6 @@ static void init_query_mad(struct ib_smp *mad)
mad->method = IB_MGMT_METHOD_GET;
}
-static union ib_gid zgid;
-
static int check_flow_steering_support(struct mlx4_dev *dev)
{
int eth_num_ports = 0;
@@ -131,6 +125,237 @@ static int num_ib_ports(struct mlx4_dev *dev)
return ib_ports;
}
+static struct net_device *mlx4_ib_get_netdev(struct ib_device *device, u8 port_num)
+{
+ struct mlx4_ib_dev *ibdev = to_mdev(device);
+ struct net_device *dev;
+
+ rcu_read_lock();
+ dev = mlx4_get_protocol_dev(ibdev->dev, MLX4_PROT_ETH, port_num);
+
+ if (dev) {
+ if (mlx4_is_bonded(ibdev->dev)) {
+ struct net_device *upper = NULL;
+
+ upper = netdev_master_upper_dev_get_rcu(dev);
+ if (upper) {
+ struct net_device *active;
+
+ active = bond_option_active_slave_get_rcu(netdev_priv(upper));
+ if (active)
+ dev = active;
+ }
+ }
+ }
+ if (dev)
+ dev_hold(dev);
+
+ rcu_read_unlock();
+ return dev;
+}
+
+static int mlx4_ib_update_gids(struct gid_entry *gids,
+ struct mlx4_ib_dev *ibdev,
+ u8 port_num)
+{
+ struct mlx4_cmd_mailbox *mailbox;
+ int err;
+ struct mlx4_dev *dev = ibdev->dev;
+ int i;
+ union ib_gid *gid_tbl;
+
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(mailbox))
+ return -ENOMEM;
+
+ gid_tbl = mailbox->buf;
+
+ for (i = 0; i < MLX4_MAX_PORT_GIDS; ++i)
+ memcpy(&gid_tbl[i], &gids[i].gid, sizeof(union ib_gid));
+
+ err = mlx4_cmd(dev, mailbox->dma,
+ MLX4_SET_PORT_GID_TABLE << 8 | port_num,
+ 1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
+ MLX4_CMD_WRAPPED);
+ if (mlx4_is_bonded(dev))
+ err += mlx4_cmd(dev, mailbox->dma,
+ MLX4_SET_PORT_GID_TABLE << 8 | 2,
+ 1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
+ MLX4_CMD_WRAPPED);
+
+ mlx4_free_cmd_mailbox(dev, mailbox);
+ return err;
+}
+
+static int mlx4_ib_add_gid(struct ib_device *device,
+ u8 port_num,
+ unsigned int index,
+ const union ib_gid *gid,
+ const struct ib_gid_attr *attr,
+ void **context)
+{
+ struct mlx4_ib_dev *ibdev = to_mdev(device);
+ struct mlx4_ib_iboe *iboe = &ibdev->iboe;
+ struct mlx4_port_gid_table *port_gid_table;
+ int free = -1, found = -1;
+ int ret = 0;
+ int hw_update = 0;
+ int i;
+ struct gid_entry *gids = NULL;
+
+ if (!rdma_cap_roce_gid_table(device, port_num))
+ return -EINVAL;
+
+ if (port_num > MLX4_MAX_PORTS)
+ return -EINVAL;
+
+ if (!context)
+ return -EINVAL;
+
+ port_gid_table = &iboe->gids[port_num - 1];
+ spin_lock_bh(&iboe->lock);
+ for (i = 0; i < MLX4_MAX_PORT_GIDS; ++i) {
+ if (!memcmp(&port_gid_table->gids[i].gid, gid, sizeof(*gid))) {
+ found = i;
+ break;
+ }
+ if (free < 0 && !memcmp(&port_gid_table->gids[i].gid, &zgid, sizeof(*gid)))
+ free = i; /* HW has space */
+ }
+
+ if (found < 0) {
+ if (free < 0) {
+ ret = -ENOSPC;
+ } else {
+ port_gid_table->gids[free].ctx = kmalloc(sizeof(*port_gid_table->gids[free].ctx), GFP_ATOMIC);
+ if (!port_gid_table->gids[free].ctx) {
+ ret = -ENOMEM;
+ } else {
+ *context = port_gid_table->gids[free].ctx;
+ memcpy(&port_gid_table->gids[free].gid, gid, sizeof(*gid));
+ port_gid_table->gids[free].ctx->real_index = free;
+ port_gid_table->gids[free].ctx->refcount = 1;
+ hw_update = 1;
+ }
+ }
+ } else {
+ struct gid_cache_context *ctx = port_gid_table->gids[found].ctx;
+ *context = ctx;
+ ctx->refcount++;
+ }
+ if (!ret && hw_update) {
+ gids = kmalloc(sizeof(*gids) * MLX4_MAX_PORT_GIDS, GFP_ATOMIC);
+ if (!gids) {
+ ret = -ENOMEM;
+ } else {
+ for (i = 0; i < MLX4_MAX_PORT_GIDS; i++)
+ memcpy(&gids[i].gid, &port_gid_table->gids[i].gid, sizeof(union ib_gid));
+ }
+ }
+ spin_unlock_bh(&iboe->lock);
+
+ if (!ret && hw_update) {
+ ret = mlx4_ib_update_gids(gids, ibdev, port_num);
+ kfree(gids);
+ }
+
+ return ret;
+}
+
+static int mlx4_ib_del_gid(struct ib_device *device,
+ u8 port_num,
+ unsigned int index,
+ void **context)
+{
+ struct gid_cache_context *ctx = *context;
+ struct mlx4_ib_dev *ibdev = to_mdev(device);
+ struct mlx4_ib_iboe *iboe = &ibdev->iboe;
+ struct mlx4_port_gid_table *port_gid_table;
+ int ret = 0;
+ int hw_update = 0;
+ struct gid_entry *gids = NULL;
+
+ if (!rdma_cap_roce_gid_table(device, port_num))
+ return -EINVAL;
+
+ if (port_num > MLX4_MAX_PORTS)
+ return -EINVAL;
+
+ port_gid_table = &iboe->gids[port_num - 1];
+ spin_lock_bh(&iboe->lock);
+ if (ctx) {
+ ctx->refcount--;
+ if (!ctx->refcount) {
+ unsigned int real_index = ctx->real_index;
+
+ memcpy(&port_gid_table->gids[real_index].gid, &zgid, sizeof(zgid));
+ kfree(port_gid_table->gids[real_index].ctx);
+ port_gid_table->gids[real_index].ctx = NULL;
+ hw_update = 1;
+ }
+ }
+ if (!ret && hw_update) {
+ int i;
+
+ gids = kmalloc(sizeof(*gids) * MLX4_MAX_PORT_GIDS, GFP_ATOMIC);
+ if (!gids) {
+ ret = -ENOMEM;
+ } else {
+ for (i = 0; i < MLX4_MAX_PORT_GIDS; i++)
+ memcpy(&gids[i].gid, &port_gid_table->gids[i].gid, sizeof(union ib_gid));
+ }
+ }
+ spin_unlock_bh(&iboe->lock);
+
+ if (!ret && hw_update) {
+ ret = mlx4_ib_update_gids(gids, ibdev, port_num);
+ kfree(gids);
+ }
+ return ret;
+}
+
+int mlx4_ib_gid_index_to_real_index(struct mlx4_ib_dev *ibdev,
+ u8 port_num, int index)
+{
+ struct mlx4_ib_iboe *iboe = &ibdev->iboe;
+ struct gid_cache_context *ctx = NULL;
+ union ib_gid gid;
+ struct mlx4_port_gid_table *port_gid_table;
+ int real_index = -EINVAL;
+ int i;
+ int ret;
+ unsigned long flags;
+
+ if (port_num > MLX4_MAX_PORTS)
+ return -EINVAL;
+
+ if (mlx4_is_bonded(ibdev->dev))
+ port_num = 1;
+
+ if (!rdma_cap_roce_gid_table(&ibdev->ib_dev, port_num))
+ return index;
+
+ ret = ib_get_cached_gid(&ibdev->ib_dev, port_num, index, &gid);
+ if (ret)
+ return ret;
+
+ if (!memcmp(&gid, &zgid, sizeof(gid)))
+ return -EINVAL;
+
+ spin_lock_irqsave(&iboe->lock, flags);
+ port_gid_table = &iboe->gids[port_num - 1];
+
+ for (i = 0; i < MLX4_MAX_PORT_GIDS; ++i)
+ if (!memcmp(&port_gid_table->gids[i].gid, &gid, sizeof(gid))) {
+ ctx = port_gid_table->gids[i].ctx;
+ break;
+ }
+ if (ctx)
+ real_index = ctx->real_index;
+ spin_unlock_irqrestore(&iboe->lock, flags);
+ return real_index;
+}
+
static int mlx4_ib_query_device(struct ib_device *ibdev,
struct ib_device_attr *props,
struct ib_udata *uhw)
@@ -229,6 +454,7 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
props->max_qp_wr = dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE;
props->max_sge = min(dev->dev->caps.max_sq_sg,
dev->dev->caps.max_rq_sg);
+ props->max_sge_rd = props->max_sge;
props->max_cq = dev->dev->quotas.cq;
props->max_cqe = dev->dev->caps.max_cqes;
props->max_mr = dev->dev->quotas.mpt;
@@ -414,12 +640,13 @@ static int eth_link_query_port(struct ib_device *ibdev, u8 port,
props->state = IB_PORT_DOWN;
props->phys_state = state_to_phys_state(props->state);
props->active_mtu = IB_MTU_256;
- if (is_bonded)
- rtnl_lock(); /* required to get upper dev */
spin_lock_bh(&iboe->lock);
ndev = iboe->netdevs[port - 1];
- if (ndev && is_bonded)
- ndev = netdev_master_upper_dev_get(ndev);
+ if (ndev && is_bonded) {
+ rcu_read_lock(); /* required to get upper dev */
+ ndev = netdev_master_upper_dev_get_rcu(ndev);
+ rcu_read_unlock();
+ }
if (!ndev)
goto out_unlock;
@@ -431,8 +658,6 @@ static int eth_link_query_port(struct ib_device *ibdev, u8 port,
props->phys_state = state_to_phys_state(props->state);
out_unlock:
spin_unlock_bh(&iboe->lock);
- if (is_bonded)
- rtnl_unlock();
out:
mlx4_free_cmd_mailbox(mdev->dev, mailbox);
return err;
@@ -515,23 +740,27 @@ out:
return err;
}
-static int iboe_query_gid(struct ib_device *ibdev, u8 port, int index,
- union ib_gid *gid)
-{
- struct mlx4_ib_dev *dev = to_mdev(ibdev);
-
- *gid = dev->iboe.gid_table[port - 1][index];
-
- return 0;
-}
-
static int mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
union ib_gid *gid)
{
- if (rdma_port_get_link_layer(ibdev, port) == IB_LINK_LAYER_INFINIBAND)
+ int ret;
+
+ if (rdma_protocol_ib(ibdev, port))
return __mlx4_ib_query_gid(ibdev, port, index, gid, 0);
- else
- return iboe_query_gid(ibdev, port, index, gid);
+
+ if (!rdma_protocol_roce(ibdev, port))
+ return -ENODEV;
+
+ if (!rdma_cap_roce_gid_table(ibdev, port))
+ return -ENODEV;
+
+ ret = ib_get_cached_gid(ibdev, port, index, gid);
+ if (ret == -EAGAIN) {
+ memcpy(gid, &zgid, sizeof(*gid));
+ return 0;
+ }
+
+ return ret;
}
int __mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
@@ -692,7 +921,7 @@ static struct ib_ucontext *mlx4_ib_alloc_ucontext(struct ib_device *ibdev,
resp.cqe_size = dev->dev->caps.cqe_size;
}
- context = kmalloc(sizeof *context, GFP_KERNEL);
+ context = kzalloc(sizeof(*context), GFP_KERNEL);
if (!context)
return ERR_PTR(-ENOMEM);
@@ -729,21 +958,143 @@ static int mlx4_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
return 0;
}
+static void mlx4_ib_vma_open(struct vm_area_struct *area)
+{
+ /* vma_open is called when a new VMA is created on top of our VMA.
+ * This is done through either mremap flow or split_vma (usually due
+ * to mlock, madvise, munmap, etc.). We do not support a clone of the
+ * vma, as this VMA is strongly hardware related. Therefore we set the
+ * vm_ops of the newly created/cloned VMA to NULL, to prevent it from
+ * calling us again and trying to do incorrect actions. We assume that
+ * the original vma size is exactly a single page that there will be no
+ * "splitting" operations on.
+ */
+ area->vm_ops = NULL;
+}
+
+static void mlx4_ib_vma_close(struct vm_area_struct *area)
+{
+ struct mlx4_ib_vma_private_data *mlx4_ib_vma_priv_data;
+
+ /* It's guaranteed that all VMAs opened on a FD are closed before the
+ * file itself is closed, therefore no sync is needed with the regular
+ * closing flow. (e.g. mlx4_ib_dealloc_ucontext) However need a sync
+ * with accessing the vma as part of mlx4_ib_disassociate_ucontext.
+ * The close operation is usually called under mm->mmap_sem except when
+ * process is exiting. The exiting case is handled explicitly as part
+ * of mlx4_ib_disassociate_ucontext.
+ */
+ mlx4_ib_vma_priv_data = (struct mlx4_ib_vma_private_data *)
+ area->vm_private_data;
+
+ /* set the vma context pointer to null in the mlx4_ib driver's private
+ * data to protect against a race condition in mlx4_ib_dissassociate_ucontext().
+ */
+ mlx4_ib_vma_priv_data->vma = NULL;
+}
+
+static const struct vm_operations_struct mlx4_ib_vm_ops = {
+ .open = mlx4_ib_vma_open,
+ .close = mlx4_ib_vma_close
+};
+
+static void mlx4_ib_disassociate_ucontext(struct ib_ucontext *ibcontext)
+{
+ int i;
+ int ret = 0;
+ struct vm_area_struct *vma;
+ struct mlx4_ib_ucontext *context = to_mucontext(ibcontext);
+ struct task_struct *owning_process = NULL;
+ struct mm_struct *owning_mm = NULL;
+
+ owning_process = get_pid_task(ibcontext->tgid, PIDTYPE_PID);
+ if (!owning_process)
+ return;
+
+ owning_mm = get_task_mm(owning_process);
+ if (!owning_mm) {
+ pr_info("no mm, disassociate ucontext is pending task termination\n");
+ while (1) {
+ /* make sure that task is dead before returning, it may
+ * prevent a rare case of module down in parallel to a
+ * call to mlx4_ib_vma_close.
+ */
+ put_task_struct(owning_process);
+ msleep(1);
+ owning_process = get_pid_task(ibcontext->tgid,
+ PIDTYPE_PID);
+ if (!owning_process ||
+ owning_process->state == TASK_DEAD) {
+ pr_info("disassociate ucontext done, task was terminated\n");
+ /* in case task was dead need to release the task struct */
+ if (owning_process)
+ put_task_struct(owning_process);
+ return;
+ }
+ }
+ }
+
+ /* need to protect from a race on closing the vma as part of
+ * mlx4_ib_vma_close().
+ */
+ down_read(&owning_mm->mmap_sem);
+ for (i = 0; i < HW_BAR_COUNT; i++) {
+ vma = context->hw_bar_info[i].vma;
+ if (!vma)
+ continue;
+
+ ret = zap_vma_ptes(context->hw_bar_info[i].vma,
+ context->hw_bar_info[i].vma->vm_start,
+ PAGE_SIZE);
+ if (ret) {
+ pr_err("Error: zap_vma_ptes failed for index=%d, ret=%d\n", i, ret);
+ BUG_ON(1);
+ }
+
+ /* context going to be destroyed, should not access ops any more */
+ context->hw_bar_info[i].vma->vm_ops = NULL;
+ }
+
+ up_read(&owning_mm->mmap_sem);
+ mmput(owning_mm);
+ put_task_struct(owning_process);
+}
+
+static void mlx4_ib_set_vma_data(struct vm_area_struct *vma,
+ struct mlx4_ib_vma_private_data *vma_private_data)
+{
+ vma_private_data->vma = vma;
+ vma->vm_private_data = vma_private_data;
+ vma->vm_ops = &mlx4_ib_vm_ops;
+}
+
static int mlx4_ib_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
{
struct mlx4_ib_dev *dev = to_mdev(context->device);
+ struct mlx4_ib_ucontext *mucontext = to_mucontext(context);
if (vma->vm_end - vma->vm_start != PAGE_SIZE)
return -EINVAL;
if (vma->vm_pgoff == 0) {
+ /* We prevent double mmaping on same context */
+ if (mucontext->hw_bar_info[HW_BAR_DB].vma)
+ return -EINVAL;
+
vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
if (io_remap_pfn_range(vma, vma->vm_start,
to_mucontext(context)->uar.pfn,
PAGE_SIZE, vma->vm_page_prot))
return -EAGAIN;
+
+ mlx4_ib_set_vma_data(vma, &mucontext->hw_bar_info[HW_BAR_DB]);
+
} else if (vma->vm_pgoff == 1 && dev->dev->caps.bf_reg_size != 0) {
+ /* We prevent double mmaping on same context */
+ if (mucontext->hw_bar_info[HW_BAR_BF].vma)
+ return -EINVAL;
+
vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
if (io_remap_pfn_range(vma, vma->vm_start,
@@ -751,9 +1102,18 @@ static int mlx4_ib_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
dev->dev->caps.num_uars,
PAGE_SIZE, vma->vm_page_prot))
return -EAGAIN;
+
+ mlx4_ib_set_vma_data(vma, &mucontext->hw_bar_info[HW_BAR_BF]);
+
} else if (vma->vm_pgoff == 3) {
struct mlx4_clock_params params;
- int ret = mlx4_get_internal_clock_params(dev->dev, &params);
+ int ret;
+
+ /* We prevent double mmaping on same context */
+ if (mucontext->hw_bar_info[HW_BAR_CLOCK].vma)
+ return -EINVAL;
+
+ ret = mlx4_get_internal_clock_params(dev->dev, &params);
if (ret)
return ret;
@@ -766,6 +1126,9 @@ static int mlx4_ib_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
>> PAGE_SHIFT,
PAGE_SIZE, vma->vm_page_prot))
return -EAGAIN;
+
+ mlx4_ib_set_vma_data(vma,
+ &mucontext->hw_bar_info[HW_BAR_CLOCK]);
} else {
return -EINVAL;
}
@@ -1547,272 +1910,6 @@ static struct device_attribute *mlx4_class_attributes[] = {
&dev_attr_board_id
};
-static void mlx4_addrconf_ifid_eui48(u8 *eui, u16 vlan_id,
- struct net_device *dev)
-{
- memcpy(eui, dev->dev_addr, 3);
- memcpy(eui + 5, dev->dev_addr + 3, 3);
- if (vlan_id < 0x1000) {
- eui[3] = vlan_id >> 8;
- eui[4] = vlan_id & 0xff;
- } else {
- eui[3] = 0xff;
- eui[4] = 0xfe;
- }
- eui[0] ^= 2;
-}
-
-static void update_gids_task(struct work_struct *work)
-{
- struct update_gid_work *gw = container_of(work, struct update_gid_work, work);
- struct mlx4_cmd_mailbox *mailbox;
- union ib_gid *gids;
- int err;
- struct mlx4_dev *dev = gw->dev->dev;
- int is_bonded = mlx4_is_bonded(dev);
-
- if (!gw->dev->ib_active)
- return;
-
- mailbox = mlx4_alloc_cmd_mailbox(dev);
- if (IS_ERR(mailbox)) {
- pr_warn("update gid table failed %ld\n", PTR_ERR(mailbox));
- return;
- }
-
- gids = mailbox->buf;
- memcpy(gids, gw->gids, sizeof gw->gids);
-
- err = mlx4_cmd(dev, mailbox->dma, MLX4_SET_PORT_GID_TABLE << 8 | gw->port,
- MLX4_SET_PORT_ETH_OPCODE, MLX4_CMD_SET_PORT,
- MLX4_CMD_TIME_CLASS_B, MLX4_CMD_WRAPPED);
- if (err)
- pr_warn("set port command failed\n");
- else
- if ((gw->port == 1) || !is_bonded)
- mlx4_ib_dispatch_event(gw->dev,
- is_bonded ? 1 : gw->port,
- IB_EVENT_GID_CHANGE);
-
- mlx4_free_cmd_mailbox(dev, mailbox);
- kfree(gw);
-}
-
-static void reset_gids_task(struct work_struct *work)
-{
- struct update_gid_work *gw =
- container_of(work, struct update_gid_work, work);
- struct mlx4_cmd_mailbox *mailbox;
- union ib_gid *gids;
- int err;
- struct mlx4_dev *dev = gw->dev->dev;
-
- if (!gw->dev->ib_active)
- return;
-
- mailbox = mlx4_alloc_cmd_mailbox(dev);
- if (IS_ERR(mailbox)) {
- pr_warn("reset gid table failed\n");
- goto free;
- }
-
- gids = mailbox->buf;
- memcpy(gids, gw->gids, sizeof(gw->gids));
-
- if (mlx4_ib_port_link_layer(&gw->dev->ib_dev, gw->port) ==
- IB_LINK_LAYER_ETHERNET) {
- err = mlx4_cmd(dev, mailbox->dma,
- MLX4_SET_PORT_GID_TABLE << 8 | gw->port,
- MLX4_SET_PORT_ETH_OPCODE, MLX4_CMD_SET_PORT,
- MLX4_CMD_TIME_CLASS_B,
- MLX4_CMD_WRAPPED);
- if (err)
- pr_warn("set port %d command failed\n", gw->port);
- }
-
- mlx4_free_cmd_mailbox(dev, mailbox);
-free:
- kfree(gw);
-}
-
-static int update_gid_table(struct mlx4_ib_dev *dev, int port,
- union ib_gid *gid, int clear,
- int default_gid)
-{
- struct update_gid_work *work;
- int i;
- int need_update = 0;
- int free = -1;
- int found = -1;
- int max_gids;
-
- if (default_gid) {
- free = 0;
- } else {
- max_gids = dev->dev->caps.gid_table_len[port];
- for (i = 1; i < max_gids; ++i) {
- if (!memcmp(&dev->iboe.gid_table[port - 1][i], gid,
- sizeof(*gid)))
- found = i;
-
- if (clear) {
- if (found >= 0) {
- need_update = 1;
- dev->iboe.gid_table[port - 1][found] =
- zgid;
- break;
- }
- } else {
- if (found >= 0)
- break;
-
- if (free < 0 &&
- !memcmp(&dev->iboe.gid_table[port - 1][i],
- &zgid, sizeof(*gid)))
- free = i;
- }
- }
- }
-
- if (found == -1 && !clear && free >= 0) {
- dev->iboe.gid_table[port - 1][free] = *gid;
- need_update = 1;
- }
-
- if (!need_update)
- return 0;
-
- work = kzalloc(sizeof(*work), GFP_ATOMIC);
- if (!work)
- return -ENOMEM;
-
- memcpy(work->gids, dev->iboe.gid_table[port - 1], sizeof(work->gids));
- INIT_WORK(&work->work, update_gids_task);
- work->port = port;
- work->dev = dev;
- queue_work(wq, &work->work);
-
- return 0;
-}
-
-static void mlx4_make_default_gid(struct net_device *dev, union ib_gid *gid)
-{
- gid->global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL);
- mlx4_addrconf_ifid_eui48(&gid->raw[8], 0xffff, dev);
-}
-
-
-static int reset_gid_table(struct mlx4_ib_dev *dev, u8 port)
-{
- struct update_gid_work *work;
-
- work = kzalloc(sizeof(*work), GFP_ATOMIC);
- if (!work)
- return -ENOMEM;
-
- memset(dev->iboe.gid_table[port - 1], 0, sizeof(work->gids));
- memset(work->gids, 0, sizeof(work->gids));
- INIT_WORK(&work->work, reset_gids_task);
- work->dev = dev;
- work->port = port;
- queue_work(wq, &work->work);
- return 0;
-}
-
-static int mlx4_ib_addr_event(int event, struct net_device *event_netdev,
- struct mlx4_ib_dev *ibdev, union ib_gid *gid)
-{
- struct mlx4_ib_iboe *iboe;
- int port = 0;
- struct net_device *real_dev = rdma_vlan_dev_real_dev(event_netdev) ?
- rdma_vlan_dev_real_dev(event_netdev) :
- event_netdev;
- union ib_gid default_gid;
-
- mlx4_make_default_gid(real_dev, &default_gid);
-
- if (!memcmp(gid, &default_gid, sizeof(*gid)))
- return 0;
-
- if (event != NETDEV_DOWN && event != NETDEV_UP)
- return 0;
-
- if ((real_dev != event_netdev) &&
- (event == NETDEV_DOWN) &&
- rdma_link_local_addr((struct in6_addr *)gid))
- return 0;
-
- iboe = &ibdev->iboe;
- spin_lock_bh(&iboe->lock);
-
- for (port = 1; port <= ibdev->dev->caps.num_ports; ++port)
- if ((netif_is_bond_master(real_dev) &&
- (real_dev == iboe->masters[port - 1])) ||
- (!netif_is_bond_master(real_dev) &&
- (real_dev == iboe->netdevs[port - 1])))
- update_gid_table(ibdev, port, gid,
- event == NETDEV_DOWN, 0);
-
- spin_unlock_bh(&iboe->lock);
- return 0;
-
-}
-
-static u8 mlx4_ib_get_dev_port(struct net_device *dev,
- struct mlx4_ib_dev *ibdev)
-{
- u8 port = 0;
- struct mlx4_ib_iboe *iboe;
- struct net_device *real_dev = rdma_vlan_dev_real_dev(dev) ?
- rdma_vlan_dev_real_dev(dev) : dev;
-
- iboe = &ibdev->iboe;
-
- for (port = 1; port <= ibdev->dev->caps.num_ports; ++port)
- if ((netif_is_bond_master(real_dev) &&
- (real_dev == iboe->masters[port - 1])) ||
- (!netif_is_bond_master(real_dev) &&
- (real_dev == iboe->netdevs[port - 1])))
- break;
-
- if ((port == 0) || (port > ibdev->dev->caps.num_ports))
- return 0;
- else
- return port;
-}
-
-static int mlx4_ib_inet_event(struct notifier_block *this, unsigned long event,
- void *ptr)
-{
- struct mlx4_ib_dev *ibdev;
- struct in_ifaddr *ifa = ptr;
- union ib_gid gid;
- struct net_device *event_netdev = ifa->ifa_dev->dev;
-
- ipv6_addr_set_v4mapped(ifa->ifa_address, (struct in6_addr *)&gid);
-
- ibdev = container_of(this, struct mlx4_ib_dev, iboe.nb_inet);
-
- mlx4_ib_addr_event(event, event_netdev, ibdev, &gid);
- return NOTIFY_DONE;
-}
-
-#if IS_ENABLED(CONFIG_IPV6)
-static int mlx4_ib_inet6_event(struct notifier_block *this, unsigned long event,
- void *ptr)
-{
- struct mlx4_ib_dev *ibdev;
- struct inet6_ifaddr *ifa = ptr;
- union ib_gid *gid = (union ib_gid *)&ifa->addr;
- struct net_device *event_netdev = ifa->idev->dev;
-
- ibdev = container_of(this, struct mlx4_ib_dev, iboe.nb_inet6);
-
- mlx4_ib_addr_event(event, event_netdev, ibdev, gid);
- return NOTIFY_DONE;
-}
-#endif
-
#define MLX4_IB_INVALID_MAC ((u64)-1)
static void mlx4_ib_update_qps(struct mlx4_ib_dev *ibdev,
struct net_device *dev,
@@ -1871,94 +1968,6 @@ unlock:
mutex_unlock(&ibdev->qp1_proxy_lock[port - 1]);
}
-static void mlx4_ib_get_dev_addr(struct net_device *dev,
- struct mlx4_ib_dev *ibdev, u8 port)
-{
- struct in_device *in_dev;
-#if IS_ENABLED(CONFIG_IPV6)
- struct inet6_dev *in6_dev;
- union ib_gid *pgid;
- struct inet6_ifaddr *ifp;
- union ib_gid default_gid;
-#endif
- union ib_gid gid;
-
-
- if ((port == 0) || (port > ibdev->dev->caps.num_ports))
- return;
-
- /* IPv4 gids */
- in_dev = in_dev_get(dev);
- if (in_dev) {
- for_ifa(in_dev) {
- /*ifa->ifa_address;*/
- ipv6_addr_set_v4mapped(ifa->ifa_address,
- (struct in6_addr *)&gid);
- update_gid_table(ibdev, port, &gid, 0, 0);
- }
- endfor_ifa(in_dev);
- in_dev_put(in_dev);
- }
-#if IS_ENABLED(CONFIG_IPV6)
- mlx4_make_default_gid(dev, &default_gid);
- /* IPv6 gids */
- in6_dev = in6_dev_get(dev);
- if (in6_dev) {
- read_lock_bh(&in6_dev->lock);
- list_for_each_entry(ifp, &in6_dev->addr_list, if_list) {
- pgid = (union ib_gid *)&ifp->addr;
- if (!memcmp(pgid, &default_gid, sizeof(*pgid)))
- continue;
- update_gid_table(ibdev, port, pgid, 0, 0);
- }
- read_unlock_bh(&in6_dev->lock);
- in6_dev_put(in6_dev);
- }
-#endif
-}
-
-static void mlx4_ib_set_default_gid(struct mlx4_ib_dev *ibdev,
- struct net_device *dev, u8 port)
-{
- union ib_gid gid;
- mlx4_make_default_gid(dev, &gid);
- update_gid_table(ibdev, port, &gid, 0, 1);
-}
-
-static int mlx4_ib_init_gid_table(struct mlx4_ib_dev *ibdev)
-{
- struct net_device *dev;
- struct mlx4_ib_iboe *iboe = &ibdev->iboe;
- int i;
- int err = 0;
-
- for (i = 1; i <= ibdev->num_ports; ++i) {
- if (rdma_port_get_link_layer(&ibdev->ib_dev, i) ==
- IB_LINK_LAYER_ETHERNET) {
- err = reset_gid_table(ibdev, i);
- if (err)
- goto out;
- }
- }
-
- read_lock(&dev_base_lock);
- spin_lock_bh(&iboe->lock);
-
- for_each_netdev(&init_net, dev) {
- u8 port = mlx4_ib_get_dev_port(dev, ibdev);
- /* port will be non-zero only for ETH ports */
- if (port) {
- mlx4_ib_set_default_gid(ibdev, dev, port);
- mlx4_ib_get_dev_addr(dev, ibdev, port);
- }
- }
-
- spin_unlock_bh(&iboe->lock);
- read_unlock(&dev_base_lock);
-out:
- return err;
-}
-
static void mlx4_ib_scan_netdevs(struct mlx4_ib_dev *ibdev,
struct net_device *dev,
unsigned long event)
@@ -1968,81 +1977,22 @@ static void mlx4_ib_scan_netdevs(struct mlx4_ib_dev *ibdev,
int update_qps_port = -1;
int port;
+ ASSERT_RTNL();
+
iboe = &ibdev->iboe;
spin_lock_bh(&iboe->lock);
mlx4_foreach_ib_transport_port(port, ibdev->dev) {
- enum ib_port_state port_state = IB_PORT_NOP;
- struct net_device *old_master = iboe->masters[port - 1];
- struct net_device *curr_netdev;
- struct net_device *curr_master;
iboe->netdevs[port - 1] =
mlx4_get_protocol_dev(ibdev->dev, MLX4_PROT_ETH, port);
- if (iboe->netdevs[port - 1])
- mlx4_ib_set_default_gid(ibdev,
- iboe->netdevs[port - 1], port);
- curr_netdev = iboe->netdevs[port - 1];
-
- if (iboe->netdevs[port - 1] &&
- netif_is_bond_slave(iboe->netdevs[port - 1])) {
- iboe->masters[port - 1] = netdev_master_upper_dev_get(
- iboe->netdevs[port - 1]);
- } else {
- iboe->masters[port - 1] = NULL;
- }
- curr_master = iboe->masters[port - 1];
if (dev == iboe->netdevs[port - 1] &&
(event == NETDEV_CHANGEADDR || event == NETDEV_REGISTER ||
event == NETDEV_UP || event == NETDEV_CHANGE))
update_qps_port = port;
- if (curr_netdev) {
- port_state = (netif_running(curr_netdev) && netif_carrier_ok(curr_netdev)) ?
- IB_PORT_ACTIVE : IB_PORT_DOWN;
- mlx4_ib_set_default_gid(ibdev, curr_netdev, port);
- if (curr_master) {
- /* if using bonding/team and a slave port is down, we
- * don't want the bond IP based gids in the table since
- * flows that select port by gid may get the down port.
- */
- if (port_state == IB_PORT_DOWN &&
- !mlx4_is_bonded(ibdev->dev)) {
- reset_gid_table(ibdev, port);
- mlx4_ib_set_default_gid(ibdev,
- curr_netdev,
- port);
- } else {
- /* gids from the upper dev (bond/team)
- * should appear in port's gid table
- */
- mlx4_ib_get_dev_addr(curr_master,
- ibdev, port);
- }
- }
- /* if bonding is used it is possible that we add it to
- * masters only after IP address is assigned to the
- * net bonding interface.
- */
- if (curr_master && (old_master != curr_master)) {
- reset_gid_table(ibdev, port);
- mlx4_ib_set_default_gid(ibdev,
- curr_netdev, port);
- mlx4_ib_get_dev_addr(curr_master, ibdev, port);
- }
-
- if (!curr_master && (old_master != curr_master)) {
- reset_gid_table(ibdev, port);
- mlx4_ib_set_default_gid(ibdev,
- curr_netdev, port);
- mlx4_ib_get_dev_addr(curr_netdev, ibdev, port);
- }
- } else {
- reset_gid_table(ibdev, port);
- }
}
-
spin_unlock_bh(&iboe->lock);
if (update_qps_port > 0)
@@ -2225,6 +2175,9 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
1 : ibdev->num_ports;
ibdev->ib_dev.num_comp_vectors = dev->caps.num_comp_vectors;
ibdev->ib_dev.dma_device = &dev->persist->pdev->dev;
+ ibdev->ib_dev.get_netdev = mlx4_ib_get_netdev;
+ ibdev->ib_dev.add_gid = mlx4_ib_add_gid;
+ ibdev->ib_dev.del_gid = mlx4_ib_del_gid;
if (dev->caps.userspace_caps)
ibdev->ib_dev.uverbs_abi_ver = MLX4_IB_UVERBS_ABI_VERSION;
@@ -2293,13 +2246,14 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
ibdev->ib_dev.reg_user_mr = mlx4_ib_reg_user_mr;
ibdev->ib_dev.rereg_user_mr = mlx4_ib_rereg_user_mr;
ibdev->ib_dev.dereg_mr = mlx4_ib_dereg_mr;
- ibdev->ib_dev.alloc_fast_reg_mr = mlx4_ib_alloc_fast_reg_mr;
+ ibdev->ib_dev.alloc_mr = mlx4_ib_alloc_mr;
ibdev->ib_dev.alloc_fast_reg_page_list = mlx4_ib_alloc_fast_reg_page_list;
ibdev->ib_dev.free_fast_reg_page_list = mlx4_ib_free_fast_reg_page_list;
ibdev->ib_dev.attach_mcast = mlx4_ib_mcg_attach;
ibdev->ib_dev.detach_mcast = mlx4_ib_mcg_detach;
ibdev->ib_dev.process_mad = mlx4_ib_process_mad;
ibdev->ib_dev.get_port_immutable = mlx4_port_immutable;
+ ibdev->ib_dev.disassociate_ucontext = mlx4_ib_disassociate_ucontext;
if (!mlx4_is_slave(ibdev->dev)) {
ibdev->ib_dev.alloc_fmr = mlx4_ib_fmr_alloc;
@@ -2435,26 +2389,6 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
goto err_notif;
}
}
- if (!iboe->nb_inet.notifier_call) {
- iboe->nb_inet.notifier_call = mlx4_ib_inet_event;
- err = register_inetaddr_notifier(&iboe->nb_inet);
- if (err) {
- iboe->nb_inet.notifier_call = NULL;
- goto err_notif;
- }
- }
-#if IS_ENABLED(CONFIG_IPV6)
- if (!iboe->nb_inet6.notifier_call) {
- iboe->nb_inet6.notifier_call = mlx4_ib_inet6_event;
- err = register_inet6addr_notifier(&iboe->nb_inet6);
- if (err) {
- iboe->nb_inet6.notifier_call = NULL;
- goto err_notif;
- }
- }
-#endif
- if (mlx4_ib_init_gid_table(ibdev))
- goto err_notif;
}
for (j = 0; j < ARRAY_SIZE(mlx4_class_attributes); ++j) {
@@ -2485,18 +2419,6 @@ err_notif:
pr_warn("failure unregistering notifier\n");
ibdev->iboe.nb.notifier_call = NULL;
}
- if (ibdev->iboe.nb_inet.notifier_call) {
- if (unregister_inetaddr_notifier(&ibdev->iboe.nb_inet))
- pr_warn("failure unregistering notifier\n");
- ibdev->iboe.nb_inet.notifier_call = NULL;
- }
-#if IS_ENABLED(CONFIG_IPV6)
- if (ibdev->iboe.nb_inet6.notifier_call) {
- if (unregister_inet6addr_notifier(&ibdev->iboe.nb_inet6))
- pr_warn("failure unregistering notifier\n");
- ibdev->iboe.nb_inet6.notifier_call = NULL;
- }
-#endif
flush_workqueue(wq);
mlx4_ib_close_sriov(ibdev);
@@ -2622,19 +2544,6 @@ static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr)
kfree(ibdev->ib_uc_qpns_bitmap);
}
- if (ibdev->iboe.nb_inet.notifier_call) {
- if (unregister_inetaddr_notifier(&ibdev->iboe.nb_inet))
- pr_warn("failure unregistering notifier\n");
- ibdev->iboe.nb_inet.notifier_call = NULL;
- }
-#if IS_ENABLED(CONFIG_IPV6)
- if (ibdev->iboe.nb_inet6.notifier_call) {
- if (unregister_inet6addr_notifier(&ibdev->iboe.nb_inet6))
- pr_warn("failure unregistering notifier\n");
- ibdev->iboe.nb_inet6.notifier_call = NULL;
- }
-#endif
-
iounmap(ibdev->uar_map);
for (p = 0; p < ibdev->num_ports; ++p)
if (ibdev->counters[p].index != -1 &&