diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2019-07-15 20:38:15 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2019-07-15 20:38:15 -0700 |
commit | 2a3c389a0fde49b241430df806a34276568cfb29 (patch) | |
tree | 9cf35829317e8cc2aaffc4341fb824dad63fce02 /include | |
parent | 8de262531f5fbb7458463224a7587429800c24bf (diff) | |
parent | 0b043644c0ca601cb19943a81aa1f1455dbe9461 (diff) |
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma
Pull rdma updates from Jason Gunthorpe:
"A smaller cycle this time. Notably we see another new driver, 'Soft
iWarp', and the deletion of an ancient unused driver for nes.
- Revise and simplify the signature offload RDMA MR APIs
- More progress on hoisting object allocation boiler plate code out
of the drivers
- Driver bug fixes and revisions for hns, hfi1, efa, cxgb4, qib,
i40iw
- Tree wide cleanups: struct_size, put_user_page, xarray, rst doc
conversion
- Removal of obsolete ib_ucm chardev and nes driver
- netlink based discovery of chardevs and autoloading of the modules
providing them
- Move more of the rdamvt/hfi1 uapi to include/uapi/rdma
- New driver 'siw' for software based iWarp running on top of netdev,
much like rxe's software RoCE.
- mlx5 feature to report events in their raw devx format to userspace
- Expose per-object counters through rdma tool
- Adaptive interrupt moderation for RDMA (DIM), sharing the DIM core
from netdev"
* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma: (194 commits)
RMDA/siw: Require a 64 bit arch
RDMA/siw: Mark expected switch fall-throughs
RDMA/core: Fix -Wunused-const-variable warnings
rdma/siw: Remove set but not used variable 's'
rdma/siw: Add missing dependencies on LIBCRC32C and DMA_VIRT_OPS
RDMA/siw: Add missing rtnl_lock around access to ifa
rdma/siw: Use proper enumerated type in map_cqe_status
RDMA/siw: Remove unnecessary kthread create/destroy printouts
IB/rdmavt: Fix variable shadowing issue in rvt_create_cq
RDMA/core: Fix race when resolving IP address
RDMA/core: Make rdma_counter.h compile stand alone
IB/core: Work on the caller socket net namespace in nldev_newlink()
RDMA/rxe: Fill in wc byte_len with IB_WC_RECV_RDMA_WITH_IMM
RDMA/mlx5: Set RDMA DIM to be enabled by default
RDMA/nldev: Added configuration of RDMA dynamic interrupt moderation to netlink
RDMA/core: Provide RDMA DIM support for ULPs
linux/dim: Implement RDMA adaptive moderation (DIM)
IB/mlx5: Report correctly tag matching rendezvous capability
docs: infiniband: add it to the driver-api bookset
IB/mlx5: Implement VHCA tunnel mechanism in DEVX
...
Diffstat (limited to 'include')
-rw-r--r-- | include/linux/dim.h | 23 | ||||
-rw-r--r-- | include/linux/mlx5/mlx5_ifc.h | 6 | ||||
-rw-r--r-- | include/linux/mlx5/qp.h | 4 | ||||
-rw-r--r-- | include/rdma/ib_umem.h | 19 | ||||
-rw-r--r-- | include/rdma/ib_umem_odp.h | 20 | ||||
-rw-r--r-- | include/rdma/ib_verbs.h | 247 | ||||
-rw-r--r-- | include/rdma/mr_pool.h | 2 | ||||
-rw-r--r-- | include/rdma/rdma_counter.h | 65 | ||||
-rw-r--r-- | include/rdma/rdma_netlink.h | 8 | ||||
-rw-r--r-- | include/rdma/rdma_vt.h | 5 | ||||
-rw-r--r-- | include/rdma/rdmavt_cq.h | 25 | ||||
-rw-r--r-- | include/rdma/rdmavt_qp.h | 312 | ||||
-rw-r--r-- | include/rdma/restrack.h | 9 | ||||
-rw-r--r-- | include/rdma/rw.h | 9 | ||||
-rw-r--r-- | include/rdma/signature.h | 122 | ||||
-rw-r--r-- | include/uapi/rdma/ib_user_cm.h | 326 | ||||
-rw-r--r-- | include/uapi/rdma/mlx5_user_ioctl_cmds.h | 19 | ||||
-rw-r--r-- | include/uapi/rdma/mlx5_user_ioctl_verbs.h | 9 | ||||
-rw-r--r-- | include/uapi/rdma/rdma_netlink.h | 86 | ||||
-rw-r--r-- | include/uapi/rdma/rdma_user_ioctl_cmds.h | 1 | ||||
-rw-r--r-- | include/uapi/rdma/rvt-abi.h | 66 | ||||
-rw-r--r-- | include/uapi/rdma/siw-abi.h | 185 |
22 files changed, 980 insertions, 588 deletions
diff --git a/include/linux/dim.h b/include/linux/dim.h index aa9bdd47a648..d3a0fbfff2bb 100644 --- a/include/linux/dim.h +++ b/include/linux/dim.h @@ -82,6 +82,7 @@ struct dim_stats { * @prev_stats: Measured rates from previous iteration (for comparison) * @start_sample: Sampled data at start of current iteration * @work: Work to perform on action required + * @priv: A pointer to the struct that points to dim * @profile_ix: Current moderation profile * @mode: CQ period count mode * @tune_state: Algorithm tuning state (see below) @@ -95,6 +96,7 @@ struct dim { struct dim_sample start_sample; struct dim_sample measuring_sample; struct work_struct work; + void *priv; u8 profile_ix; u8 mode; u8 tune_state; @@ -363,4 +365,25 @@ struct dim_cq_moder net_dim_get_def_tx_moderation(u8 cq_period_mode); */ void net_dim(struct dim *dim, struct dim_sample end_sample); +/* RDMA DIM */ + +/* + * RDMA DIM profile: + * profile size must be of RDMA_DIM_PARAMS_NUM_PROFILES. + */ +#define RDMA_DIM_PARAMS_NUM_PROFILES 9 +#define RDMA_DIM_START_PROFILE 0 + +/** + * rdma_dim - Runs the adaptive moderation. + * @dim: The moderation struct. + * @completions: The number of completions collected in this round. + * + * Each call to rdma_dim takes the latest amount of completions that + * have been collected and counts them as a new event. + * Once enough events have been collected the algorithm decides a new + * moderation level. + */ +void rdma_dim(struct dim *dim, u64 completions); + #endif /* DIM_H */ diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 515624c66ce1..b3d5752657d9 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1391,7 +1391,9 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 reserved_at_6c8[0x28]; u8 sf_base_id[0x10]; - u8 reserved_at_700[0x100]; + u8 reserved_at_700[0x80]; + u8 vhca_tunnel_commands[0x40]; + u8 reserved_at_7c0[0x40]; }; enum mlx5_flow_destination_type { @@ -9695,7 +9697,7 @@ struct mlx5_ifc_general_obj_in_cmd_hdr_bits { u8 opcode[0x10]; u8 uid[0x10]; - u8 reserved_at_20[0x10]; + u8 vhca_tunnel_id[0x10]; u8 obj_type[0x10]; u8 obj_id[0x20]; diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h index 127d224443e3..ae63b1ae9004 100644 --- a/include/linux/mlx5/qp.h +++ b/include/linux/mlx5/qp.h @@ -37,7 +37,8 @@ #include <linux/mlx5/driver.h> #define MLX5_INVALID_LKEY 0x100 -#define MLX5_SIG_WQE_SIZE (MLX5_SEND_WQE_BB * 5) +/* UMR (3 WQE_BB's) + SIG (3 WQE_BB's) + PSV (mem) + PSV (wire) */ +#define MLX5_SIG_WQE_SIZE (MLX5_SEND_WQE_BB * 8) #define MLX5_DIF_SIZE 8 #define MLX5_STRIDE_BLOCK_OP 0x400 #define MLX5_CPY_GRD_MASK 0xc0 @@ -70,6 +71,7 @@ enum mlx5_qp_optpar { MLX5_QP_OPTPAR_CQN_RCV = 1 << 19, MLX5_QP_OPTPAR_DC_HS = 1 << 20, MLX5_QP_OPTPAR_DC_KEY = 1 << 21, + MLX5_QP_OPTPAR_COUNTER_SET_ID = 1 << 25, }; enum mlx5_qp_state { diff --git a/include/rdma/ib_umem.h b/include/rdma/ib_umem.h index 040d853077c6..1052d0d62be7 100644 --- a/include/rdma/ib_umem.h +++ b/include/rdma/ib_umem.h @@ -46,7 +46,6 @@ struct ib_umem { struct mm_struct *owning_mm; size_t length; unsigned long address; - int page_shift; u32 writable : 1; u32 is_odp : 1; struct work_struct work; @@ -58,24 +57,14 @@ struct ib_umem { /* Returns the offset of the umem start relative to the first page. */ static inline int ib_umem_offset(struct ib_umem *umem) { - return umem->address & (BIT(umem->page_shift) - 1); -} - -/* Returns the first page of an ODP umem. */ -static inline unsigned long ib_umem_start(struct ib_umem *umem) -{ - return umem->address - ib_umem_offset(umem); -} - -/* Returns the address of the page after the last one of an ODP umem. */ -static inline unsigned long ib_umem_end(struct ib_umem *umem) -{ - return ALIGN(umem->address + umem->length, BIT(umem->page_shift)); + return umem->address & ~PAGE_MASK; } static inline size_t ib_umem_num_pages(struct ib_umem *umem) { - return (ib_umem_end(umem) - ib_umem_start(umem)) >> umem->page_shift; + return (ALIGN(umem->address + umem->length, PAGE_SIZE) - + ALIGN_DOWN(umem->address, PAGE_SIZE)) >> + PAGE_SHIFT; } #ifdef CONFIG_INFINIBAND_USER_MEM diff --git a/include/rdma/ib_umem_odp.h b/include/rdma/ib_umem_odp.h index eeec4e53c448..479db5c98ff6 100644 --- a/include/rdma/ib_umem_odp.h +++ b/include/rdma/ib_umem_odp.h @@ -76,6 +76,7 @@ struct ib_umem_odp { struct completion notifier_completion; int dying; + unsigned int page_shift; struct work_struct work; }; @@ -84,6 +85,25 @@ static inline struct ib_umem_odp *to_ib_umem_odp(struct ib_umem *umem) return container_of(umem, struct ib_umem_odp, umem); } +/* Returns the first page of an ODP umem. */ +static inline unsigned long ib_umem_start(struct ib_umem_odp *umem_odp) +{ + return ALIGN_DOWN(umem_odp->umem.address, 1UL << umem_odp->page_shift); +} + +/* Returns the address of the page after the last one of an ODP umem. */ +static inline unsigned long ib_umem_end(struct ib_umem_odp *umem_odp) +{ + return ALIGN(umem_odp->umem.address + umem_odp->umem.length, + 1UL << umem_odp->page_shift); +} + +static inline size_t ib_umem_odp_num_pages(struct ib_umem_odp *umem_odp) +{ + return (ib_umem_end(umem_odp) - ib_umem_start(umem_odp)) >> + umem_odp->page_shift; +} + /* * The lower 2 bits of the DMA address signal the R/W permissions for * the entry. To upgrade the permissions, provide the appropriate diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 54873085f2da..c5f8a9f17063 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -61,8 +61,11 @@ #include <linux/cgroup_rdma.h> #include <linux/irqflags.h> #include <linux/preempt.h> +#include <linux/dim.h> #include <uapi/rdma/ib_user_verbs.h> +#include <rdma/rdma_counter.h> #include <rdma/restrack.h> +#include <rdma/signature.h> #include <uapi/rdma/rdma_user_ioctl.h> #include <uapi/rdma/ib_user_ioctl_verbs.h> @@ -132,17 +135,6 @@ struct ib_gid_attr { u8 port_num; }; -enum rdma_node_type { - /* IB values map to NodeInfo:NodeType. */ - RDMA_NODE_IB_CA = 1, - RDMA_NODE_IB_SWITCH, - RDMA_NODE_IB_ROUTER, - RDMA_NODE_RNIC, - RDMA_NODE_USNIC, - RDMA_NODE_USNIC_UDP, - RDMA_NODE_UNSPECIFIED, -}; - enum { /* set the local administered indication */ IB_SA_WELL_KNOWN_GUID = BIT_ULL(57) | 2, @@ -164,7 +156,7 @@ enum rdma_protocol_type { }; __attribute_const__ enum rdma_transport_type -rdma_node_get_transport(enum rdma_node_type node_type); +rdma_node_get_transport(unsigned int node_type); enum rdma_network_type { RDMA_NETWORK_IB, @@ -263,7 +255,7 @@ enum ib_device_cap_flags { */ IB_DEVICE_CROSS_CHANNEL = (1 << 27), IB_DEVICE_MANAGED_FLOW_STEERING = (1 << 29), - IB_DEVICE_SIGNATURE_HANDOVER = (1 << 30), + IB_DEVICE_INTEGRITY_HANDOVER = (1 << 30), IB_DEVICE_ON_DEMAND_PAGING = (1ULL << 31), IB_DEVICE_SG_GAPS_REG = (1ULL << 32), IB_DEVICE_VIRTUAL_FUNCTION = (1ULL << 33), @@ -275,17 +267,6 @@ enum ib_device_cap_flags { IB_DEVICE_ALLOW_USER_UNREG = (1ULL << 37), }; -enum ib_signature_prot_cap { - IB_PROT_T10DIF_TYPE_1 = 1, - IB_PROT_T10DIF_TYPE_2 = 1 << 1, - IB_PROT_T10DIF_TYPE_3 = 1 << 2, -}; - -enum ib_signature_guard_cap { - IB_GUARD_T10DIF_CRC = 1, - IB_GUARD_T10DIF_CSUM = 1 << 1, -}; - enum ib_atomic_cap { IB_ATOMIC_NONE, IB_ATOMIC_HCA, @@ -327,8 +308,8 @@ struct ib_rss_caps { }; enum ib_tm_cap_flags { - /* Support tag matching on RC transport */ - IB_TM_CAP_RC = 1 << 0, + /* Support tag matching with rendezvous offload for RC transport */ + IB_TM_CAP_RNDV_RC = 1 << 0, }; struct ib_tm_caps { @@ -411,6 +392,7 @@ struct ib_device_attr { int max_srq_wr; int max_srq_sge; unsigned int max_fast_reg_page_list_len; + unsigned int max_pi_fast_reg_page_list_len; u16 max_pkeys; u8 local_ca_ack_delay; int sig_prot_cap; @@ -796,118 +778,26 @@ __attribute_const__ int ib_rate_to_mbps(enum ib_rate rate); * enum ib_mr_type - memory region type * @IB_MR_TYPE_MEM_REG: memory region that is used for * normal registration - * @IB_MR_TYPE_SIGNATURE: memory region that is used for - * signature operations (data-integrity - * capable regions) * @IB_MR_TYPE_SG_GAPS: memory region that is capable to * register any arbitrary sg lists (without * the normal mr constraints - see * ib_map_mr_sg) + * @IB_MR_TYPE_DM: memory region that is used for device + * memory registration + * @IB_MR_TYPE_USER: memory region that is used for the user-space + * application + * @IB_MR_TYPE_DMA: memory region that is used for DMA operations + * without address translations (VA=PA) + * @IB_MR_TYPE_INTEGRITY: memory region that is used for + * data integrity operations */ enum ib_mr_type { IB_MR_TYPE_MEM_REG, - IB_MR_TYPE_SIGNATURE, IB_MR_TYPE_SG_GAPS, -}; - -/** - * Signature types - * IB_SIG_TYPE_NONE: Unprotected. - * IB_SIG_TYPE_T10_DIF: Type T10-DIF - */ -enum ib_signature_type { - IB_SIG_TYPE_NONE, - IB_SIG_TYPE_T10_DIF, -}; - -/** - * Signature T10-DIF block-guard types - * IB_T10DIF_CRC: Corresponds to T10-PI mandated CRC checksum rules. - * IB_T10DIF_CSUM: Corresponds to IP checksum rules. - */ -enum ib_t10_dif_bg_type { - IB_T10DIF_CRC, - IB_T10DIF_CSUM -}; - -/** - * struct ib_t10_dif_domain - Parameters specific for T10-DIF - * domain. - * @bg_type: T10-DIF block guard type (CRC|CSUM) - * @pi_interval: protection information interval. - * @bg: seed of guard computation. - * @app_tag: application tag of guard block - * @ref_tag: initial guard block reference tag. - * @ref_remap: Indicate wethear the reftag increments each block - * @app_escape: Indicate to skip block check if apptag=0xffff - * @ref_escape: Indicate to skip block check if reftag=0xffffffff - * @apptag_check_mask: check bitmask of application tag. - */ -struct ib_t10_dif_domain { - enum ib_t10_dif_bg_type bg_type; - u16 pi_interval; - u16 bg; - u16 app_tag; - u32 ref_tag; - bool ref_remap; - bool app_escape; - bool ref_escape; - u16 apptag_check_mask; -}; - -/** - * struct ib_sig_domain - Parameters for signature domain - * @sig_type: specific signauture type - * @sig: union of all signature domain attributes that may - * be used to set domain layout. - */ -struct ib_sig_domain { - enum ib_signature_type sig_type; - union { - struct ib_t10_dif_domain dif; - } sig; -}; - -/** - * struct ib_sig_attrs - Parameters for signature handover operation - * @check_mask: bitmask for signature byte check (8 bytes) - * @mem: memory domain layout desciptor. - * @wire: wire domain layout desciptor. - */ -struct ib_sig_attrs { - u8 check_mask; - struct ib_sig_domain mem; - struct ib_sig_domain wire; -}; - -enum ib_sig_err_type { - IB_SIG_BAD_GUARD, - IB_SIG_BAD_REFTAG, - IB_SIG_BAD_APPTAG, -}; - -/** - * Signature check masks (8 bytes in total) according to the T10-PI standard: - * -------- -------- ------------ - * | GUARD | APPTAG | REFTAG | - * | 2B | 2B | 4B | - * -------- -------- ------------ - */ -enum { - IB_SIG_CHECK_GUARD = 0xc0, - IB_SIG_CHECK_APPTAG = 0x30, - IB_SIG_CHECK_REFTAG = 0x0f, -}; - -/** - * struct ib_sig_err - signature error descriptor - */ -struct ib_sig_err { - enum ib_sig_err_type err_type; - u32 expected; - u32 actual; - u64 sig_err_offset; - u32 key; + IB_MR_TYPE_DM, + IB_MR_TYPE_USER, + IB_MR_TYPE_DMA, + IB_MR_TYPE_INTEGRITY, }; enum ib_mr_status_check { @@ -1164,7 +1054,7 @@ enum ib_qp_create_flags { IB_QP_CREATE_MANAGED_SEND = 1 << 3, IB_QP_CREATE_MANAGED_RECV = 1 << 4, IB_QP_CREATE_NETIF_QP = 1 << 5, - IB_QP_CREATE_SIGNATURE_EN = 1 << 6, + IB_QP_CREATE_INTEGRITY_EN = 1 << 6, /* FREE = 1 << 7, */ IB_QP_CREATE_SCATTER_FCS = 1 << 8, IB_QP_CREATE_CVLAN_STRIPPING = 1 << 9, @@ -1343,7 +1233,7 @@ enum ib_wr_opcode { /* These are kernel only and can not be issued by userspace */ IB_WR_REG_MR = 0x20, - IB_WR_REG_SIG_MR, + IB_WR_REG_MR_INTEGRITY, /* reserve values for low level drivers' internal use. * These values will not be used at all in the ib core layer. @@ -1453,20 +1343,6 @@ static inline const struct ib_reg_wr *reg_wr(const struct ib_send_wr *wr) return container_of(wr, struct ib_reg_wr, wr); } -struct ib_sig_handover_wr { - struct ib_send_wr wr; - struct ib_sig_attrs *sig_attrs; - struct ib_mr *sig_mr; - int access_flags; - struct ib_sge *prot; -}; - -static inline const struct ib_sig_handover_wr * -sig_handover_wr(const struct ib_send_wr *wr) -{ - return container_of(wr, struct ib_sig_handover_wr, wr); -} - struct ib_recv_wr { struct ib_recv_wr *next; union { @@ -1634,6 +1510,7 @@ struct ib_cq { struct work_struct work; }; struct workqueue_struct *comp_wq; + struct dim *dim; /* * Implementation details of the RDMA core, don't use in drivers: */ @@ -1818,10 +1695,14 @@ struct ib_qp { struct ib_qp_security *qp_sec; u8 port; + bool integrity_en; /* * Implementation details of the RDMA core, don't use in drivers: */ struct rdma_restrack_entry res; + + /* The counter the qp is bind to */ + struct rdma_counter *counter; }; struct ib_dm { @@ -1840,6 +1721,7 @@ struct ib_mr { u64 iova; u64 length; unsigned int page_size; + enum ib_mr_type type; bool need_inval; union { struct ib_uobject *uobject; /* user */ @@ -1847,7 +1729,7 @@ struct ib_mr { }; struct ib_dm *dm; - + struct ib_sig_attrs *sig_attrs; /* only for IB_MR_TYPE_INTEGRITY MRs */ /* * Implementation details of the RDMA core, don't use in drivers: */ @@ -2243,6 +2125,8 @@ struct ib_port_data { spinlock_t netdev_lock; struct net_device __rcu *netdev; struct hlist_node ndev_hash_link; + struct rdma_port_counter port_counter; + struct rdma_hw_stats *hw_stats; }; /* rdma netdev type - specifies protocol type */ @@ -2329,6 +2213,11 @@ struct iw_cm_conn_param; * need to define the supported operations, otherwise they will be set to null. */ struct ib_device_ops { + struct module *owner; + enum rdma_driver_id driver_id; + u32 uverbs_abi_ver; + unsigned int uverbs_no_driver_id_binding:1; + int (*post_send)(struct ib_qp *qp, const struct ib_send_wr *send_wr, const struct ib_send_wr **bad_send_wr); int (*post_recv)(struct ib_qp *qp, const struct ib_recv_wr *recv_wr, @@ -2454,11 +2343,10 @@ struct ib_device_ops { int (*query_qp)(struct ib_qp *qp, struct ib_qp_attr *qp_attr, int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr); int (*destroy_qp)(struct ib_qp *qp, struct ib_udata *udata); - struct ib_cq *(*create_cq)(struct ib_device *device, - const struct ib_cq_init_attr *attr, - struct ib_udata *udata); + int (*create_cq)(struct ib_cq *cq, const struct ib_cq_init_attr *attr, + struct ib_udata *udata); int (*modify_cq)(struct ib_cq *cq, u16 cq_count, u16 cq_period); - int (*destroy_cq)(struct ib_cq *cq, struct ib_udata *udata); + void (*destroy_cq)(struct ib_cq *cq, struct ib_udata *udata); int (*resize_cq)(struct ib_cq *cq, int cqe, struct ib_udata *udata); struct ib_mr *(*get_dma_mr)(struct ib_pd *pd, int mr_access_flags); struct ib_mr *(*reg_user_mr)(struct ib_pd *pd, u64 start, u64 length, @@ -2470,6 +2358,9 @@ struct ib_device_ops { int (*dereg_mr)(struct ib_mr *mr, struct ib_udata *udata); struct ib_mr *(*alloc_mr)(struct ib_pd *pd, enum ib_mr_type mr_type, u32 max_num_sg, struct ib_udata *udata); + struct ib_mr *(*alloc_mr_integrity)(struct ib_pd *pd, + u32 max_num_data_sg, + u32 max_num_meta_sg); int (*advise_mr)(struct ib_pd *pd, enum ib_uverbs_advise_mr_advice advice, u32 flags, struct ib_sge *sg_list, u32 num_sge, @@ -2516,7 +2407,7 @@ struct ib_device_ops { struct ib_wq *(*create_wq)(struct ib_pd *pd, struct ib_wq_init_attr *init_attr, struct ib_udata *udata); - int (*destroy_wq)(struct ib_wq *wq, struct ib_udata *udata); + void (*destroy_wq)(struct ib_wq *wq, struct ib_udata *udata); int (*modify_wq)(struct ib_wq *wq, struct ib_wq_attr *attr, u32 wq_attr_mask, struct ib_udata *udata); struct ib_rwq_ind_table *(*create_rwq_ind_table)( @@ -2538,6 +2429,11 @@ struct ib_device_ops { int (*read_counters)(struct ib_counters *counters, struct ib_counters_read_attr *counters_read_attr, struct uverbs_attr_bundle *attrs); + int (*map_mr_sg_pi)(struct ib_mr *mr, struct scatterlist *data_sg, + int data_sg_nents, unsigned int *data_sg_offset, + struct scatterlist *meta_sg, int meta_sg_nents, + unsigned int *meta_sg_offset); + /** * alloc_hw_stats - Allocate a struct rdma_hw_stats and fill in the * driver initialized data. The struct is kfree()'ed by the sysfs @@ -2595,8 +2491,34 @@ struct ib_device_ops { u8 pdata_len); int (*iw_create_listen)(struct iw_cm_id *cm_id, int backlog); int (*iw_destroy_listen)(struct iw_cm_id *cm_id); + /** + * counter_bind_qp - Bind a QP to a counter. + * @counter - The counter to be bound. If counter->id is zero then + * the driver needs to allocate a new counter and set counter->id + */ + int (*counter_bind_qp)(struct rdma_counter *counter, struct ib_qp *qp); + /** + * counter_unbind_qp - Unbind the qp from the dynamically-allocated + * counter and bind it onto the default one + */ + int (*counter_unbind_qp)(struct ib_qp *qp); + /** + * counter_dealloc -De-allocate the hw counter + */ + int (*counter_dealloc)(struct rdma_counter *counter); + /** + * counter_alloc_stats - Allocate a struct rdma_hw_stats and fill in + * the driver initialized data. + */ + struct rdma_hw_stats *(*counter_alloc_stats)( + struct rdma_counter *counter); + /** + * counter_update_stats - Query the stats value of this counter + */ + int (*counter_update_stats)(struct rdma_counter *counter); DECLARE_RDMA_OBJ_SIZE(ib_ah); + DECLARE_RDMA_OBJ_SIZE(ib_cq); DECLARE_RDMA_OBJ_SIZE(ib_pd); DECLARE_RDMA_OBJ_SIZE(ib_srq); DECLARE_RDMA_OBJ_SIZE(ib_ucontext); @@ -2636,7 +2558,6 @@ struct ib_device { int num_comp_vectors; - struct module *owner; union { struct device dev; struct ib_core_device coredev; @@ -2648,7 +2569,6 @@ struct ib_device { */ const struct attribute_group *groups[3]; - int uverbs_abi_ver; u64 uverbs_cmd_mask; u64 uverbs_ex_cmd_mask; @@ -2658,6 +2578,8 @@ struct ib_device { u16 is_switch:1; /* Indicates kernel verbs support, should not be used in drivers */ u16 kverbs_provider:1; + /* CQ adaptive moderation (RDMA DIM) */ + u16 use_cq_dim:1; u8 node_type; u8 phys_port_cnt; struct ib_device_attr attrs; @@ -2672,7 +2594,6 @@ struct ib_device { struct rdma_restrack_root *res; const struct uapi_definition *driver_def; - enum rdma_driver_id driver_id; /* * Positive refcount indicates that the device is currently @@ -2694,11 +2615,15 @@ struct ib_device { u32 iw_driver_flags; }; +struct ib_client_nl_info; struct ib_client { const char *name; void (*add) (struct ib_device *); void (*remove)(struct ib_device *, void *client_data); void (*rename)(struct ib_device *dev, void *client_data); + int (*get_nl_info)(struct ib_device *ibdev, void *client_data, + struct ib_client_nl_info *res); + int (*get_global_nl_info)(struct ib_client_nl_info *res); /* Returns the net_dev belonging to this ib_client and matching the * given parameters. @@ -3859,9 +3784,9 @@ int ib_destroy_cq_user(struct ib_cq *cq, struct ib_udata *udata); * * NOTE: for user cq use ib_destroy_cq_user with valid udata! */ -static inline int ib_destroy_cq(struct ib_cq *cq) +static inline void ib_destroy_cq(struct ib_cq *cq) { - return ib_destroy_cq_user(cq, NULL); + ib_destroy_cq_user(cq, NULL); } /** @@ -4148,6 +4073,10 @@ static inline struct ib_mr *ib_alloc_mr(struct ib_pd *pd, return ib_alloc_mr_user(pd, mr_type, max_num_sg, NULL); } +struct ib_mr *ib_alloc_mr_integrity(struct ib_pd *pd, + u32 max_num_data_sg, + u32 max_num_meta_sg); + /** * ib_update_fast_reg_key - updates the key portion of the fast_reg MR * R_Key and L_Key. @@ -4332,6 +4261,10 @@ int ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *wq_ind_table); int ib_map_mr_sg(struct ib_mr *mr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset, unsigned int page_size); +int ib_map_mr_sg_pi(struct ib_mr *mr, struct scatterlist *data_sg, + int data_sg_nents, unsigned int *data_sg_offset, + struct scatterlist *meta_sg, int meta_sg_nents, + unsigned int *meta_sg_offset, unsigned int page_size); static inline int ib_map_mr_sg_zbva(struct ib_mr *mr, struct scatterlist *sg, int sg_nents, diff --git a/include/rdma/mr_pool.h b/include/rdma/mr_pool.h index 83763ef82354..e77123bcb43b 100644 --- a/include/rdma/mr_pool.h +++ b/include/rdma/mr_pool.h @@ -11,7 +11,7 @@ struct ib_mr *ib_mr_pool_get(struct ib_qp *qp, struct list_head *list); void ib_mr_pool_put(struct ib_qp *qp, struct list_head *list, struct ib_mr *mr); int ib_mr_pool_init(struct ib_qp *qp, struct list_head *list, int nr, - enum ib_mr_type type, u32 max_num_sg); + enum ib_mr_type type, u32 max_num_sg, u32 max_num_meta_sg); void ib_mr_pool_destroy(struct ib_qp *qp, struct list_head *list); #endif /* _RDMA_MR_POOL_H */ diff --git a/include/rdma/rdma_counter.h b/include/rdma/rdma_counter.h new file mode 100644 index 000000000000..eb99856e8b30 --- /dev/null +++ b/include/rdma/rdma_counter.h @@ -0,0 +1,65 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* + * Copyright (c) 2019 Mellanox Technologies. All rights reserved. + */ + +#ifndef _RDMA_COUNTER_H_ +#define _RDMA_COUNTER_H_ + +#include <linux/mutex.h> +#include <linux/pid_namespace.h> + +#include <rdma/restrack.h> +#include <rdma/rdma_netlink.h> + +struct ib_device; +struct ib_qp; + +struct auto_mode_param { + int qp_type; +}; + +struct rdma_counter_mode { + enum rdma_nl_counter_mode mode; + enum rdma_nl_counter_mask mask; + struct auto_mode_param param; +}; + +struct rdma_port_counter { + struct rdma_counter_mode mode; + struct rdma_hw_stats *hstats; + unsigned int num_counters; + struct mutex lock; +}; + +struct rdma_counter { + struct rdma_restrack_entry res; + struct ib_device *device; + uint32_t id; + struct kref kref; + struct rdma_counter_mode mode; + struct mutex lock; + struct rdma_hw_stats *stats; + u8 port; +}; + +void rdma_counter_init(struct ib_device *dev); +void rdma_counter_release(struct ib_device *dev); +int rdma_counter_set_auto_mode(struct ib_device *dev, u8 port, + bool on, enum rdma_nl_counter_mask mask); +int rdma_counter_bind_qp_auto(struct ib_qp *qp, u8 port); +int rdma_counter_unbind_qp(struct ib_qp *qp, bool force); + +int rdma_counter_query_stats(struct rdma_counter *counter); +u64 rdma_counter_get_hwstat_value(struct ib_device *dev, u8 port, u32 index); +int rdma_counter_bind_qpn(struct ib_device *dev, u8 port, + u32 qp_num, u32 counter_id); +int rdma_counter_bind_qpn_alloc(struct ib_device *dev, u8 port, + u32 qp_num, u32 *counter_id); +int rdma_counter_unbind_qpn(struct ib_device *dev, u8 port, + u32 qp_num, u32 counter_id); +int rdma_counter_get_mode(struct ib_device *dev, u8 port, + enum rdma_nl_counter_mode *mode, + enum rdma_nl_counter_mask *mask); + +#endif /* _RDMA_COUNTER_H_ */ diff --git a/include/rdma/rdma_netlink.h b/include/rdma/rdma_netlink.h index 10732ab31ba2..6631624e4d7c 100644 --- a/include/rdma/rdma_netlink.h +++ b/include/rdma/rdma_netlink.h @@ -6,6 +6,12 @@ #include <linux/netlink.h> #include <uapi/rdma/rdma_netlink.h> +enum { + RDMA_NLDEV_ATTR_EMPTY_STRING = 1, + RDMA_NLDEV_ATTR_ENTRY_STRLEN = 16, + RDMA_NLDEV_ATTR_CHARDEV_TYPE_SIZE = 32, +}; + struct rdma_nl_cbs { int (*doit)(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack); @@ -110,4 +116,6 @@ void rdma_link_register(struct rdma_link_ops *ops); void rdma_link_unregister(struct rdma_link_ops *ops); #define MODULE_ALIAS_RDMA_LINK(type) MODULE_ALIAS("rdma-link-" type) +#define MODULE_ALIAS_RDMA_CLIENT(type) MODULE_ALIAS("rdma-client-" type) + #endif /* _RDMA_NETLINK_H */ diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index b9cd06db1a71..525848e227dc 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -2,7 +2,7 @@ #define DEF_RDMA_VT_H /* - * Copyright(c) 2016 - 2018 Intel Corporation. + * Copyright(c) 2016 - 2019 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -202,7 +202,6 @@ struct rvt_pd { struct rvt_ah { struct ib_ah ibah; struct rdma_ah_attr attr; - atomic_t refcount; u8 vl; u8 log_pmtu; }; @@ -555,7 +554,7 @@ static inline u16 rvt_get_pkey(struct rvt_dev_info *rdi, struct rvt_dev_info *rvt_alloc_device(size_t size, int nports); void rvt_dealloc_device(struct rvt_dev_info *rdi); -int rvt_register_device(struct rvt_dev_info *rvd, u32 driver_id); +int rvt_register_device(struct rvt_dev_info *rvd); void rvt_unregister_device(struct rvt_dev_info *rvd); int rvt_check_ah(struct ib_device *ibdev, struct rdma_ah_attr *ah_attr); int rvt_init_port(struct rvt_dev_info *rdi, struct rvt_ibport *port, diff --git a/include/rdma/rdmavt_cq.h b/include/rdma/rdmavt_cq.h index 75dc65c0bfb8..04c519ef6d71 100644 --- a/include/rdma/rdmavt_cq.h +++ b/include/rdma/rdmavt_cq.h @@ -61,18 +61,27 @@ #define RVT_CQ_NONE (IB_CQ_NEXT_COMP + 1) /* + * Define read macro that apply smp_load_acquire memory barrier + * when reading indice of circular buffer that mmaped to user space. + */ +#define RDMA_READ_UAPI_ATOMIC(member) smp_load_acquire(&(member).val) + +/* + * Define write macro that uses smp_store_release memory barrier + * when writing indice of circular buffer that mmaped to user space. + */ +#define RDMA_WRITE_UAPI_ATOMIC(member, x) smp_store_release(&(member).val, x) +#include <rdma/rvt-abi.h> + +/* * This structure is used to contain the head pointer, tail pointer, * and completion queue entries as a single memory allocation so * it can be mmap'ed into user space. */ -struct rvt_cq_wc { +struct rvt_k_cq_wc { u32 head; /* index of next entry to fill */ u32 tail; /* index of next ib_poll_cq() entry */ - union { - /* these are actually size ibcq.cqe + 1 */ - struct ib_uverbs_wc uqueue[0]; - struct ib_wc kqueue[0]; - }; + struct ib_wc kqueue[]; }; /* @@ -84,10 +93,12 @@ struct rvt_cq { spinlock_t lock; /* protect changes in this struct */ u8 notify; u8 triggered; + u8 cq_full; int comp_vector_cpu; struct rvt_dev_info *rdi; struct rvt_cq_wc *queue; struct rvt_mmap_info *ip; + struct rvt_k_cq_wc *kqueue; }; static inline struct rvt_cq *ibcq_to_rvtcq(struct ib_cq *ibcq) @@ -95,6 +106,6 @@ static inline struct rvt_cq *ibcq_to_rvtcq(struct ib_cq *ibcq) return container_of(ibcq, struct rvt_cq, ibcq); } -void rvt_cq_enter(struct rvt_cq *cq, struct ib_wc *entry, bool solicited); +bool rvt_cq_enter(struct rvt_cq *cq, struct ib_wc *entry, bool solicited); #endif /* DEF_RDMAVT_INCCQH */ diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h index 68e38c20afc0..0eeea520a853 100644 --- a/include/rdma/rdmavt_qp.h +++ b/include/rdma/rdmavt_qp.h @@ -2,7 +2,7 @@ #define DEF_RDMAVT_INCQP_H /* - * Copyright(c) 2016 - 2018 Intel Corporation. + * Copyright(c) 2016 - 2019 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -52,6 +52,7 @@ #include <rdma/ib_pack.h> #include <rdma/ib_verbs.h> #include <rdma/rdmavt_cq.h> +#include <rdma/rvt-abi.h> /* * Atomic bit definitions for r_aflags. */ @@ -156,6 +157,22 @@ #define RVT_SEND_RESERVE_USED IB_SEND_RESERVED_START #define RVT_SEND_COMPLETION_ONLY (IB_SEND_RESERVED_START << 1) +/** + * rvt_ud_wr - IB UD work plus AH cache + * @wr: valid IB work request + * @attr: pointer to an allocated AH attribute + * + * Special case the UD WR so we can keep track of the AH attributes. + * + * NOTE: This data structure is stricly ordered wr then attr. I.e the attr + * MUST come after wr. The ib_ud_wr is sized and copied in rvt_post_one_wr. + * The copy assumes that wr is first. + */ +struct rvt_ud_wr { + struct ib_ud_wr wr; + struct rdma_ah_attr *attr; +}; + /* * Send work request queue entry. * The size of the sg_list is determined when the QP is created and stored @@ -164,7 +181,7 @@ struct rvt_swqe { union { struct ib_send_wr wr; /* don't use wr.sg_list */ - struct ib_ud_wr ud_wr; + struct rvt_ud_wr ud_wr; struct ib_reg_wr reg_wr; struct ib_rdma_wr rdma_wr; struct ib_atomic_wr atomic_wr; @@ -177,33 +194,84 @@ struct rvt_swqe { struct rvt_sge sg_list[0]; }; -/* - * Receive work request queue entry. - * The size of the sg_list is determined when the QP (or SRQ) is created - * and stored in qp->r_rq.max_sge (or srq->rq.max_sge). +/** + * struct rvt_krwq - kernel struct receive work request + * @p_lock: lock to protect producer of the kernel buffer + * @head: index of next entry to fill + * @c_lock:lock to protect consumer of the kernel buffer + * @tail: index of next entry to pull + * @count: count is aproximate of total receive enteries posted + * @rvt_rwqe: struct of receive work request queue entry + * + * This structure is used to contain the head pointer, + * tail pointer and receive work queue entries for kernel + * mode user. */ -struct rvt_rwqe { - u64 wr_id; - u8 num_sge; - struct ib_sge sg_list[0]; -}; - -/* - * This structure is used to contain the head pointer, tail pointer, - * and receive work queue entries as a single memory allocation so - * it can be mmap'ed into user space. - * Note that the wq array elements are variable size so you can't - * just index into the array to get the N'th element; - * use get_rwqe_ptr() instead. - */ -struct rvt_rwq { +struct rvt_krwq { + spinlock_t p_lock; /* protect producer */ u32 head; /* new work requests posted to the head */ + + /* protect consumer */ + spinlock_t c_lock ____cacheline_aligned_in_smp; u32 tail; /* receives pull requests from here. */ - struct rvt_rwqe wq[0]; + u32 count; /* approx count of receive entries posted */ + struct rvt_rwqe *curr_wq; + struct rvt_rwqe wq[]; }; +/* + * rvt_get_swqe_ah - Return the pointer to the struct rvt_ah + * @swqe: valid Send WQE + * + */ +static inline struct rvt_ah *rvt_get_swqe_ah(struct rvt_swqe *swqe) +{ + return ibah_to_rvtah(swqe->ud_wr.wr.ah); +} + +/** + * rvt_get_swqe_ah_attr - Return the cached ah attribute information + * @swqe: valid Send WQE + * + */ +static inline struct rdma_ah_attr *rvt_get_swqe_ah_attr(struct rvt_swqe *swqe) +{ + return swqe->ud_wr.attr; +} + +/** + * rvt_get_swqe_remote_qpn - Access the remote QPN value + * @swqe: valid Send WQE + * + */ +static inline u32 rvt_get_swqe_remote_qpn(struct rvt_swqe *swqe) +{ + return swqe->ud_wr.wr.remote_qpn; +} + +/** + * rvt_get_swqe_remote_qkey - Acces the remote qkey value + * @swqe: valid Send WQE + * + */ +static inline u32 rvt_get_swqe_remote_qkey(struct rvt_swqe *swqe) +{ + return swqe->ud_wr.wr.remote_qkey; +} + +/** + * rvt_get_swqe_pkey_index - Access the pkey index + * @swqe: valid Send WQE + * + */ +static inline u16 rvt_get_swqe_pkey_index(struct rvt_swqe *swqe) +{ + return swqe->ud_wr.wr.pkey_index; +} + struct rvt_rq { struct rvt_rwq *wq; + struct rvt_krwq *kwq; u32 size; /* size of RWQE array */ u8 max_sge; /* protect changes in this struct */ @@ -472,7 +540,7 @@ static inline struct rvt_swqe *rvt_get_swqe_ptr(struct rvt_qp *qp, static inline struct rvt_rwqe *rvt_get_rwqe_ptr(struct rvt_rq *rq, unsigned n) { return (struct rvt_rwqe *) - ((char *)rq->wq->wq + + ((char *)rq->kwq->curr_wq + (sizeof(struct rvt_rwqe) + rq->max_sge * sizeof(struct ib_sge)) * n); } @@ -565,42 +633,6 @@ static inline void rvt_qp_wqe_unreserve( extern const enum ib_wc_opcode ib_rvt_wc_opcode[]; -/** - * rvt_qp_swqe_complete() - insert send completion - * @qp - the qp - * @wqe - the send wqe - * @status - completion status - * - * Insert a send completion into the completion - * queue if the qp indicates it should be done. - * - * See IBTA 10.7.3.1 for info on completion - * control. - */ -static inline void rvt_qp_swqe_complete( - struct rvt_qp *qp, - struct rvt_swqe *wqe, - enum ib_wc_opcode opcode, - enum ib_wc_status status) -{ - if (unlikely(wqe->wr.send_flags & RVT_SEND_RESERVE_USED)) - return; - if (!(qp->s_flags & RVT_S_SIGNAL_REQ_WR) || - (wqe->wr.send_flags & IB_SEND_SIGNALED) || - status != IB_WC_SUCCESS) { - struct ib_wc wc; - - memset(&wc, 0, sizeof(wc)); - wc.wr_id = wqe->wr.wr_id; - wc.status = status; - wc.opcode = opcode; - wc.qp = &qp->ibqp; - wc.byte_len = wqe->length; - rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.send_cq), &wc, - status != IB_WC_SUCCESS); - } -} - /* * Compare the lower 24 bits of the msn values. * Returns an integer <, ==, or > than zero. @@ -734,7 +766,119 @@ static inline void rvt_put_qp_swqe(struct rvt_qp *qp, struct rvt_swqe *wqe) { rvt_put_swqe(wqe); if (qp->allowed_ops == IB_OPCODE_UD) - atomic_dec(&ibah_to_rvtah(wqe->ud_wr.ah)->refcount); + rdma_destroy_ah_attr(wqe->ud_wr.attr); +} + +/** + * rvt_qp_sqwe_incr - increment ring index + * @qp: the qp + * @val: the starting value + * + * Return: the new value wrapping as appropriate + */ +static inline u32 +rvt_qp_swqe_incr(struct rvt_qp *qp, u32 val) +{ + if (++val >= qp->s_size) + val = 0; + return val; +} + +int rvt_error_qp(struct rvt_qp *qp, enum ib_wc_status err); + +/** + * rvt_recv_cq - add a new entry to completion queue + * by receive queue + * @qp: receive queue + * @wc: work completion entry to add + * @solicited: true if @entry is solicited + * + * This is wrapper function for rvt_enter_cq function call by + * receive queue. If rvt_cq_enter return false, it means cq is + * full and the qp is put into error state. + */ +static inline void rvt_recv_cq(struct rvt_qp *qp, struct ib_wc *wc, + bool solicited) +{ + struct rvt_cq *cq = ibcq_to_rvtcq(qp->ibqp.recv_cq); + + if (unlikely(!rvt_cq_enter(cq, wc, solicited))) + rvt_error_qp(qp, IB_WC_LOC_QP_OP_ERR); +} + +/** + * rvt_send_cq - add a new entry to completion queue + * by send queue + * @qp: send queue + * @wc: work completion entry to add + * @solicited: true if @entry is solicited + * + * This is wrapper function for rvt_enter_cq function call by + * send queue. If rvt_cq_enter return false, it means cq is + * full and the qp is put into error state. + */ +static inline void rvt_send_cq(struct rvt_qp *qp, struct ib_wc *wc, + bool solicited) +{ + struct rvt_cq *cq = ibcq_to_rvtcq(qp->ibqp.send_cq); + + if (unlikely(!rvt_cq_enter(cq, wc, solicited))) + rvt_error_qp(qp, IB_WC_LOC_QP_OP_ERR); +} + +/** + * rvt_qp_complete_swqe - insert send completion + * @qp - the qp + * @wqe - the send wqe + * @opcode - wc operation (driver dependent) + * @status - completion status + * + * Update the s_last information, and then insert a send + * completion into the completion + * queue if the qp indicates it should be done. + * + * See IBTA 10.7.3.1 for info on completion + * control. + * + * Return: new last + */ +static inline u32 +rvt_qp_complete_swqe(struct rvt_qp *qp, + struct rvt_swqe *wqe, + enum ib_wc_opcode opcode, + enum ib_wc_status status) +{ + bool need_completion; + u64 wr_id; + u32 byte_len, last; + int flags = wqe->wr.send_flags; + + rvt_put_qp_swqe(qp, wqe); + + need_completion = + !(flags & RVT_SEND_RESERVE_USED) && + (!(qp->s_flags & RVT_S_SIGNAL_REQ_WR) || + (flags & IB_SEND_SIGNALED) || + status != IB_WC_SUCCESS); + if (need_completion) { + wr_id = wqe->wr.wr_id; + byte_len = wqe->length; + /* above fields required before writing s_last */ + } + last = rvt_qp_swqe_incr(qp, qp->s_last); + /* see rvt_qp_is_avail() */ + smp_store_release(&qp->s_last, last); + if (need_completion) { + struct ib_wc w = { + .wr_id = wr_id, + .status = status, + .opcode = opcode, + .qp = &qp->ibqp, + .byte_len = byte_len, + }; + rvt_send_cq(qp, &w, status != IB_WC_SUCCESS); + } + return last; } extern const int ib_rvt_state_ops[]; @@ -742,7 +886,6 @@ extern const int ib_rvt_state_ops[]; struct rvt_dev_info; int rvt_get_rwqe(struct rvt_qp *qp, bool wr_id_only); void rvt_comm_est(struct rvt_qp *qp); -int rvt_error_qp(struct rvt_qp *qp, enum ib_wc_status err); void rvt_rc_error(struct rvt_qp *qp, enum ib_wc_status err); unsigned long rvt_rnr_tbl_to_usec(u32 index); enum hrtimer_restart rvt_rc_rnr_retry(struct hrtimer *t); @@ -784,6 +927,53 @@ struct rvt_qp_iter { int n; }; +/** + * ib_cq_tail - Return tail index of cq buffer + * @send_cq - The cq for send + * + * This is called in qp_iter_print to get tail + * of cq buffer. + */ +static inline u32 ib_cq_tail(struct ib_cq *send_cq) +{ + struct rvt_cq *cq = ibcq_to_rvtcq(send_cq); + + return ibcq_to_rvtcq(send_cq)->ip ? + RDMA_READ_UAPI_ATOMIC(cq->queue->tail) : + ibcq_to_rvtcq(send_cq)->kqueue->tail; +} + +/** + * ib_cq_head - Return head index of cq buffer + * @send_cq - The cq for send + * + * This is called in qp_iter_print to get head + * of cq buffer. + */ +static inline u32 ib_cq_head(struct ib_cq *send_cq) +{ + struct rvt_cq *cq = ibcq_to_rvtcq(send_cq); + + return ibcq_to_rvtcq(send_cq)->ip ? + RDMA_READ_UAPI_ATOMIC(cq->queue->head) : + ibcq_to_rvtcq(send_cq)->kqueue->head; +} + +/** + * rvt_free_rq - free memory allocated for rvt_rq struct + * @rvt_rq: request queue data structure + * + * This function should only be called if the rvt_mmap_info() + * has not succeeded. + */ +static inline void rvt_free_rq(struct rvt_rq *rq) +{ + kvfree(rq->kwq); + rq->kwq = NULL; + vfree(rq->wq); + rq->wq = NULL; +} + struct rvt_qp_iter *rvt_qp_iter_init(struct rvt_dev_info *rdi, u64 v, void (*cb)(struct rvt_qp *qp, u64 v)); diff --git a/include/rdma/restrack.h b/include/rdma/restrack.h index ecf3c7702a4f..b0fc6b26bdf5 100644 --- a/include/rdma/restrack.h +++ b/include/rdma/restrack.h @@ -14,6 +14,9 @@ #include <uapi/rdma/rdma_netlink.h> #include <linux/xarray.h> +struct ib_device; +struct sk_buff; + /** * enum rdma_restrack_type - HW objects to track */ @@ -43,13 +46,15 @@ enum rdma_restrack_type { */ RDMA_RESTRACK_CTX, /** + * @RDMA_RESTRACK_COUNTER: Statistic Counter + */ + RDMA_RESTRACK_COUNTER, + /** * @RDMA_RESTRACK_MAX: Last entry, used for array dclarations */ RDMA_RESTRACK_MAX }; -struct ib_device; - /** * struct rdma_restrack_entry - metadata per-entry */ diff --git a/include/rdma/rw.h b/include/rdma/rw.h index 494f79ca3e62..6ad9dc836c10 100644 --- a/include/rdma/rw.h +++ b/include/rdma/rw.h @@ -39,15 +39,6 @@ struct rdma_rw_ctx { struct ib_send_wr inv_wr; struct ib_mr *mr; } *reg; - - struct { - struct rdma_rw_reg_ctx data; - struct rdma_rw_reg_ctx prot; - struct ib_send_wr sig_inv_wr; - struct ib_mr *sig_mr; - struct ib_sge sig_sge; - struct ib_sig_handover_wr sig_wr; - } *sig; }; }; diff --git a/include/rdma/signature.h b/include/rdma/signature.h new file mode 100644 index 000000000000..f24cc2a1d3c5 --- /dev/null +++ b/include/rdma/signature.h @@ -0,0 +1,122 @@ +/* SPDX-License-Identifier: (GPL-2.0 OR Linux-OpenIB) */ +/* + * Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved. + */ + +#ifndef _RDMA_SIGNATURE_H_ +#define _RDMA_SIGNATURE_H_ + +enum ib_signature_prot_cap { + IB_PROT_T10DIF_TYPE_1 = 1, + IB_PROT_T10DIF_TYPE_2 = 1 << 1, + IB_PROT_T10DIF_TYPE_3 = 1 << 2, +}; + +enum ib_signature_guard_cap { + IB_GUARD_T10DIF_CRC = 1, + IB_GUARD_T10DIF_CSUM = 1 << 1, +}; + +/** + * enum ib_signature_type - Signature types + * @IB_SIG_TYPE_NONE: Unprotected. + * @IB_SIG_TYPE_T10_DIF: Type T10-DIF + */ +enum ib_signature_type { + IB_SIG_TYPE_NONE, + IB_SIG_TYPE_T10_DIF, +}; + +/** + * enum ib_t10_dif_bg_type - Signature T10-DIF block-guard types + * @IB_T10DIF_CRC: Corresponds to T10-PI mandated CRC checksum rules. + * @IB_T10DIF_CSUM: Corresponds to IP checksum rules. + */ +enum ib_t10_dif_bg_type { + IB_T10DIF_CRC, + IB_T10DIF_CSUM, +}; + +/** + * struct ib_t10_dif_domain - Parameters specific for T10-DIF + * domain. + * @bg_type: T10-DIF block guard type (CRC|CSUM) + * @pi_interval: protection information interval. + * @bg: seed of guard computation. + * @app_tag: application tag of guard block + * @ref_tag: initial guard block reference tag. + * @ref_remap: Indicate wethear the reftag increments each block + * @app_escape: Indicate to skip block check if apptag=0xffff + * @ref_escape: Indicate to skip block check if reftag=0xffffffff + * @apptag_check_mask: check bitmask of application tag. + */ +struct ib_t10_dif_domain { + enum ib_t10_dif_bg_type bg_type; + u16 pi_interval; + u16 bg; + u16 app_tag; + u32 ref_tag; + bool ref_remap; + bool app_escape; + bool ref_escape; + u16 apptag_check_mask; +}; + +/** + * struct ib_sig_domain - Parameters for signature domain + * @sig_type: specific signauture type + * @sig: union of all signature domain attributes that may + * be used to set domain layout. + */ +struct ib_sig_domain { + enum ib_signature_type sig_type; + union { + struct ib_t10_dif_domain dif; + } sig; +}; + +/** + * struct ib_sig_attrs - Parameters for signature handover operation + * @check_mask: bitmask for signature byte check (8 bytes) + * @mem: memory domain layout descriptor. + * @wire: wire domain layout descriptor. + * @meta_length: metadata length + */ +struct ib_sig_attrs { + u8 check_mask; + struct ib_sig_domain mem; + struct ib_sig_domain wire; + int meta_length; +}; + +enum ib_sig_err_type { + IB_SIG_BAD_GUARD, + IB_SIG_BAD_REFTAG, + IB_SIG_BAD_APPTAG, +}; + +/* + * Signature check masks (8 bytes in total) according to the T10-PI standard: + * -------- -------- ------------ + * | GUARD | APPTAG | REFTAG | + * | 2B | 2B | 4B | + * -------- -------- ------------ + */ +enum { + IB_SIG_CHECK_GUARD = 0xc0, + IB_SIG_CHECK_APPTAG = 0x30, + IB_SIG_CHECK_REFTAG = 0x0f, +}; + +/* + * struct ib_sig_err - signature error descriptor + */ +struct ib_sig_err { + enum ib_sig_err_type err_type; + u32 expected; + u32 actual; + u64 sig_err_offset; + u32 key; +}; + +#endif /* _RDMA_SIGNATURE_H_ */ diff --git a/include/uapi/rdma/ib_user_cm.h b/include/uapi/rdma/ib_user_cm.h deleted file mode 100644 index e2709bb8cb18..000000000000 --- a/include/uapi/rdma/ib_user_cm.h +++ /dev/null @@ -1,326 +0,0 @@ -/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR Linux-OpenIB) */ -/* - * Copyright (c) 2005 Topspin Communications. All rights reserved. - * Copyright (c) 2005 Intel Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef IB_USER_CM_H -#define IB_USER_CM_H - -#include <linux/types.h> -#include <rdma/ib_user_sa.h> - -#define IB_USER_CM_ABI_VERSION 5 - -enum { - IB_USER_CM_CMD_CREATE_ID, - IB_USER_CM_CMD_DESTROY_ID, - IB_USER_CM_CMD_ATTR_ID, - - IB_USER_CM_CMD_LISTEN, - IB_USER_CM_CMD_NOTIFY, - - IB_USER_CM_CMD_SEND_REQ, - IB_USER_CM_CMD_SEND_REP, - IB_USER_CM_CMD_SEND_RTU, - IB_USER_CM_CMD_SEND_DREQ, - IB_USER_CM_CMD_SEND_DREP, - IB_USER_CM_CMD_SEND_REJ, - IB_USER_CM_CMD_SEND_MRA, - IB_USER_CM_CMD_SEND_LAP, - IB_USER_CM_CMD_SEND_APR, - IB_USER_CM_CMD_SEND_SIDR_REQ, - IB_USER_CM_CMD_SEND_SIDR_REP, - - IB_USER_CM_CMD_EVENT, - IB_USER_CM_CMD_INIT_QP_ATTR, -}; -/* - * command ABI structures. - */ -struct ib_ucm_cmd_hdr { - __u32 cmd; - __u16 in; - __u16 out; -}; - -struct ib_ucm_create_id { - __aligned_u64 uid; - __aligned_u64 response; -}; - -struct ib_ucm_create_id_resp { - __u32 id; -}; - -struct ib_ucm_destroy_id { - __aligned_u64 response; - __u32 id; - __u32 reserved; -}; - -struct ib_ucm_destroy_id_resp { - __u32 events_reported; -}; - -struct ib_ucm_attr_id { - __aligned_u64 response; - __u32 id; - __u32 reserved; -}; - -struct ib_ucm_attr_id_resp { - __be64 service_id; - __be64 service_mask; - __be32 local_id; - __be32 remote_id; -}; - -struct ib_ucm_init_qp_attr { - __aligned_u64 response; - __u32 id; - __u32 qp_state; -}; - -struct ib_ucm_listen { - __be64 service_id; - __be64 service_mask; - __u32 id; - __u32 reserved; -}; - -struct ib_ucm_notify { - __u32 id; - __u32 event; -}; - -struct ib_ucm_private_data { - __aligned_u64 data; - __u32 id; - __u8 len; - __u8 reserved[3]; -}; - -struct ib_ucm_req { - __u32 id; - __u32 qpn; - __u32 qp_type; - __u32 psn; - __be64 sid; - __aligned_u64 data; - __aligned_u64 primary_path; - __aligned_u64 alternate_path; - __u8 len; - __u8 peer_to_peer; - __u8 responder_resources; - __u8 initiator_depth; - __u8 remote_cm_response_timeout; - __u8 flow_control; - __u8 local_cm_response_timeout; - __u8 retry_count; - __u8 rnr_retry_count; - __u8 max_cm_retries; - __u8 srq; - __u8 reserved[5]; -}; - -struct ib_ucm_rep { - __aligned_u64 uid; - __aligned_u64 data; - __u32 id; - __u32 qpn; - __u32 psn; - __u8 len; - __u8 responder_resources; - __u8 initiator_depth; - __u8 target_ack_delay; - __u8 failover_accepted; - __u8 flow_control; - __u8 rnr_retry_count; - __u8 srq; - __u8 reserved[4]; -}; - -struct ib_ucm_info { - __u32 id; - __u32 status; - __aligned_u64 info; - __aligned_u64 data; - __u8 info_len; - __u8 data_len; - __u8 reserved[6]; -}; - -struct ib_ucm_mra { - __aligned_u64 data; - __u32 id; - __u8 len; - __u8 timeout; - __u8 reserved[2]; -}; - -struct ib_ucm_lap { - __aligned_u64 path; - __aligned_u64 data; - __u32 id; - __u8 len; - __u8 reserved[3]; -}; - -struct ib_ucm_sidr_req { - __u32 id; - __u32 timeout; - __be64 sid; - __aligned_u64 data; - __aligned_u64 path; - __u16 reserved_pkey; - __u8 len; - __u8 max_cm_retries; - __u8 reserved[4]; -}; - -struct ib_ucm_sidr_rep { - __u32 id; - __u32 qpn; - __u32 qkey; - __u32 status; - __aligned_u64 info; - __aligned_u64 data; - __u8 info_len; - __u8 data_len; - __u8 reserved[6]; -}; -/* - * event notification ABI structures. - */ -struct ib_ucm_event_get { - __aligned_u64 response; - __aligned_u64 data; - __aligned_u64 info; - __u8 data_len; - __u8 info_len; - __u8 reserved[6]; -}; - -struct ib_ucm_req_event_resp { - struct ib_user_path_rec primary_path; - struct ib_user_path_rec alternate_path; - __be64 remote_ca_guid; - __u32 remote_qkey; - __u32 remote_qpn; - __u32 qp_type; - __u32 starting_psn; - __u8 responder_resources; - __u8 initiator_depth; - __u8 local_cm_response_timeout; - __u8 flow_control; - __u8 remote_cm_response_timeout; - __u8 retry_count; - __u8 rnr_retry_count; - __u8 srq; - __u8 port; - __u8 reserved[7]; -}; - -struct ib_ucm_rep_event_resp { - __be64 remote_ca_guid; - __u32 remote_qkey; - __u32 remote_qpn; - __u32 starting_psn; - __u8 responder_resources; - __u8 initiator_depth; - __u8 target_ack_delay; - __u8 failover_accepted; - __u8 flow_control; - __u8 rnr_retry_count; - __u8 srq; - __u8 reserved[5]; -}; - -struct ib_ucm_rej_event_resp { - __u32 reason; - /* ari in ib_ucm_event_get info field. */ -}; - -struct ib_ucm_mra_event_resp { - __u8 timeout; - __u8 reserved[3]; -}; - -struct ib_ucm_lap_event_resp { - struct ib_user_path_rec path; -}; - -struct ib_ucm_apr_event_resp { - __u32 status; - /* apr info in ib_ucm_event_get info field. */ -}; - -struct ib_ucm_sidr_req_event_resp { - __u16 pkey; - __u8 port; - __u8 reserved; -}; - -struct ib_ucm_sidr_rep_event_resp { - __u32 status; - __u32 qkey; - __u32 qpn; - /* info in ib_ucm_event_get info field. */ -}; - -#define IB_UCM_PRES_DATA 0x01 -#define IB_UCM_PRES_INFO 0x02 -#define IB_UCM_PRES_PRIMARY 0x04 -#define IB_UCM_PRES_ALTERNATE 0x08 - -struct ib_ucm_event_resp { - __aligned_u64 uid; - __u32 id; - __u32 event; - __u32 present; - __u32 reserved; - union { - struct ib_ucm_req_event_resp req_resp; - struct ib_ucm_rep_event_resp rep_resp; - struct ib_ucm_rej_event_resp rej_resp; - struct ib_ucm_mra_event_resp mra_resp; - struct ib_ucm_lap_event_resp lap_resp; - struct ib_ucm_apr_event_resp apr_resp; - - struct ib_ucm_sidr_req_event_resp sidr_req_resp; - struct ib_ucm_sidr_rep_event_resp sidr_rep_resp; - - __u32 send_status; - } u; -}; - -#endif /* IB_USER_CM_H */ diff --git a/include/uapi/rdma/mlx5_user_ioctl_cmds.h b/include/uapi/rdma/mlx5_user_ioctl_cmds.h index d404c951954c..d0da070cf0ab 100644 --- a/include/uapi/rdma/mlx5_user_ioctl_cmds.h +++ b/include/uapi/rdma/mlx5_user_ioctl_cmds.h @@ -51,6 +51,7 @@ enum mlx5_ib_devx_methods { MLX5_IB_METHOD_DEVX_OTHER = (1U << UVERBS_ID_NS_SHIFT), MLX5_IB_METHOD_DEVX_QUERY_UAR, MLX5_IB_METHOD_DEVX_QUERY_EQN, + MLX5_IB_METHOD_DEVX_SUBSCRIBE_EVENT, }; enum mlx5_ib_devx_other_attrs { @@ -93,6 +94,14 @@ enum mlx5_ib_devx_obj_query_async_attrs { MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_OUT_LEN, }; +enum mlx5_ib_devx_subscribe_event_attrs { + MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_FD_HANDLE = (1U << UVERBS_ID_NS_SHIFT), + MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_OBJ_HANDLE, + MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_TYPE_NUM_LIST, + MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_FD_NUM, + MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_COOKIE, +}; + enum mlx5_ib_devx_query_eqn_attrs { MLX5_IB_ATTR_DEVX_QUERY_EQN_USER_VEC = (1U << UVERBS_ID_NS_SHIFT), MLX5_IB_ATTR_DEVX_QUERY_EQN_DEV_EQN, @@ -127,16 +136,26 @@ enum mlx5_ib_devx_async_cmd_fd_alloc_attrs { MLX5_IB_ATTR_DEVX_ASYNC_CMD_FD_ALLOC_HANDLE = (1U << UVERBS_ID_NS_SHIFT), }; +enum mlx5_ib_devx_async_event_fd_alloc_attrs { + MLX5_IB_ATTR_DEVX_ASYNC_EVENT_FD_ALLOC_HANDLE = (1U << UVERBS_ID_NS_SHIFT), + MLX5_IB_ATTR_DEVX_ASYNC_EVENT_FD_ALLOC_FLAGS, +}; + enum mlx5_ib_devx_async_cmd_fd_methods { MLX5_IB_METHOD_DEVX_ASYNC_CMD_FD_ALLOC = (1U << UVERBS_ID_NS_SHIFT), }; +enum mlx5_ib_devx_async_event_fd_methods { + MLX5_IB_METHOD_DEVX_ASYNC_EVENT_FD_ALLOC = (1U << UVERBS_ID_NS_SHIFT), +}; + enum mlx5_ib_objects { MLX5_IB_OBJECT_DEVX = (1U << UVERBS_ID_NS_SHIFT), MLX5_IB_OBJECT_DEVX_OBJ, MLX5_IB_OBJECT_DEVX_UMEM, MLX5_IB_OBJECT_FLOW_MATCHER, MLX5_IB_OBJECT_DEVX_ASYNC_CMD_FD, + MLX5_IB_OBJECT_DEVX_ASYNC_EVENT_FD, }; enum mlx5_ib_flow_matcher_create_attrs { diff --git a/include/uapi/rdma/mlx5_user_ioctl_verbs.h b/include/uapi/rdma/mlx5_user_ioctl_verbs.h index a8f34c237458..7e9900b0e746 100644 --- a/include/uapi/rdma/mlx5_user_ioctl_verbs.h +++ b/include/uapi/rdma/mlx5_user_ioctl_verbs.h @@ -63,5 +63,14 @@ enum mlx5_ib_uapi_dm_type { MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM, }; +enum mlx5_ib_uapi_devx_create_event_channel_flags { + MLX5_IB_UAPI_DEVX_CR_EV_CH_FLAGS_OMIT_DATA = 1 << 0, +}; + +struct mlx5_ib_uapi_devx_async_event_hdr { + __aligned_u64 cookie; + __u8 out_data[]; +}; + #endif diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h index 41db51367efa..8e277783fa96 100644 --- a/include/uapi/rdma/rdma_netlink.h +++ b/include/uapi/rdma/rdma_netlink.h @@ -147,6 +147,18 @@ enum { IWPM_NLA_HELLO_MAX }; +/* For RDMA_NLDEV_ATTR_DEV_NODE_TYPE */ +enum { + /* IB values map to NodeInfo:NodeType. */ + RDMA_NODE_IB_CA = 1, + RDMA_NODE_IB_SWITCH, + RDMA_NODE_IB_ROUTER, + RDMA_NODE_RNIC, + RDMA_NODE_USNIC, + RDMA_NODE_USNIC_UDP, + RDMA_NODE_UNSPECIFIED, +}; + /* * Local service operations: * RESOLVE - The client requests the local service to resolve a path. @@ -267,11 +279,15 @@ enum rdma_nldev_command { RDMA_NLDEV_CMD_RES_PD_GET, /* can dump */ - RDMA_NLDEV_NUM_OPS -}; + RDMA_NLDEV_CMD_GET_CHARDEV, -enum { - RDMA_NLDEV_ATTR_ENTRY_STRLEN = 16, + RDMA_NLDEV_CMD_STAT_SET, + + RDMA_NLDEV_CMD_STAT_GET, /* can dump */ + + RDMA_NLDEV_CMD_STAT_DEL, + + RDMA_NLDEV_NUM_OPS }; enum rdma_nldev_print_type { @@ -478,10 +494,72 @@ enum rdma_nldev_attr { * File descriptor handle of the net namespace object */ RDMA_NLDEV_NET_NS_FD, /* u32 */ + /* + * Information about a chardev. + * CHARDEV_TYPE is the name of the chardev ABI (ie uverbs, umad, etc) + * CHARDEV_ABI signals the ABI revision (historical) + * CHARDEV_NAME is the kernel name for the /dev/ file (no directory) + * CHARDEV is the 64 bit dev_t for the inode + */ + RDMA_NLDEV_ATTR_CHARDEV_TYPE, /* string */ + RDMA_NLDEV_ATTR_CHARDEV_NAME, /* string */ + RDMA_NLDEV_ATTR_CHARDEV_ABI, /* u64 */ + RDMA_NLDEV_ATTR_CHARDEV, /* u64 */ + RDMA_NLDEV_ATTR_UVERBS_DRIVER_ID, /* u64 */ + /* + * Counter-specific attributes. + */ + RDMA_NLDEV_ATTR_STAT_MODE, /* u32 */ + RDMA_NLDEV_ATTR_STAT_RES, /* u32 */ + RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK, /* u32 */ + RDMA_NLDEV_ATTR_STAT_COUNTER, /* nested table */ + RDMA_NLDEV_ATTR_STAT_COUNTER_ENTRY, /* nested table */ + RDMA_NLDEV_ATTR_STAT_COUNTER_ID, /* u32 */ + RDMA_NLDEV_ATTR_STAT_HWCOUNTERS, /* nested table */ + RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY, /* nested table */ + RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_NAME, /* string */ + RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_VALUE, /* u64 */ + + /* + * CQ adaptive moderatio (DIM) + */ + RDMA_NLDEV_ATTR_DEV_DIM, /* u8 */ /* * Always the end */ RDMA_NLDEV_ATTR_MAX }; + +/* + * Supported counter bind modes. All modes are mutual-exclusive. + */ +enum rdma_nl_counter_mode { + RDMA_COUNTER_MODE_NONE, + + /* + * A qp is bound with a counter automatically during initialization + * based on the auto mode (e.g., qp type, ...) + */ + RDMA_COUNTER_MODE_AUTO, + + /* + * Which qp are bound with which counter is explicitly specified + * by the user + */ + RDMA_COUNTER_MODE_MANUAL, + + /* + * Always the end + */ + RDMA_COUNTER_MODE_MAX, +}; + +/* + * Supported criteria in counter auto mode. + * Currently only "qp type" is supported + */ +enum rdma_nl_counter_mask { + RDMA_COUNTER_MASK_QP_TYPE = 1, +}; #endif /* _UAPI_RDMA_NETLINK_H */ diff --git a/include/uapi/rdma/rdma_user_ioctl_cmds.h b/include/uapi/rdma/rdma_user_ioctl_cmds.h index 26213f49f5c8..64c14cb0022f 100644 --- a/include/uapi/rdma/rdma_user_ioctl_cmds.h +++ b/include/uapi/rdma/rdma_user_ioctl_cmds.h @@ -103,6 +103,7 @@ enum rdma_driver_id { RDMA_DRIVER_HFI1, RDMA_DRIVER_QIB, RDMA_DRIVER_EFA, + RDMA_DRIVER_SIW, }; #endif diff --git a/include/uapi/rdma/rvt-abi.h b/include/uapi/rdma/rvt-abi.h new file mode 100644 index 000000000000..7328293c715c --- /dev/null +++ b/include/uapi/rdma/rvt-abi.h @@ -0,0 +1,66 @@ +/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */ + +/* + * This file contains defines, structures, etc. that are used + * to communicate between kernel and user code. + */ + +#ifndef RVT_ABI_USER_H +#define RVT_ABI_USER_H + +#include <linux/types.h> +#include <rdma/ib_user_verbs.h> +#ifndef RDMA_ATOMIC_UAPI +#define RDMA_ATOMIC_UAPI(_type, _name) struct{ _type val; } _name +#endif + +struct rvt_wqe_sge { + __aligned_u64 addr; + __u32 length; + __u32 lkey; +}; + +/* + * This structure is used to contain the head pointer, tail pointer, + * and completion queue entries as a single memory allocation so + * it can be mmap'ed into user space. + */ +struct rvt_cq_wc { + /* index of next entry to fill */ + RDMA_ATOMIC_UAPI(__u32, head); + /* index of next ib_poll_cq() entry */ + RDMA_ATOMIC_UAPI(__u32, tail); + + /* these are actually size ibcq.cqe + 1 */ + struct ib_uverbs_wc uqueue[]; +}; + +/* + * Receive work request queue entry. + * The size of the sg_list is determined when the QP (or SRQ) is created + * and stored in qp->r_rq.max_sge (or srq->rq.max_sge). + */ +struct rvt_rwqe { + __u64 wr_id; + __u8 num_sge; + __u8 padding[7]; + struct rvt_wqe_sge sg_list[]; +}; + +/* + * This structure is used to contain the head pointer, tail pointer, + * and receive work queue entries as a single memory allocation so + * it can be mmap'ed into user space. + * Note that the wq array elements are variable size so you can't + * just index into the array to get the N'th element; + * use get_rwqe_ptr() for user space and rvt_get_rwqe_ptr() + * for kernel space. + */ +struct rvt_rwq { + /* new work requests posted to the head */ + RDMA_ATOMIC_UAPI(__u32, head); + /* receives pull requests from here. */ + RDMA_ATOMIC_UAPI(__u32, tail); + struct rvt_rwqe wq[]; +}; +#endif /* RVT_ABI_USER_H */ diff --git a/include/uapi/rdma/siw-abi.h b/include/uapi/rdma/siw-abi.h new file mode 100644 index 000000000000..3dd8071ace7b --- /dev/null +++ b/include/uapi/rdma/siw-abi.h @@ -0,0 +1,185 @@ +/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */ + +/* Authors: Bernard Metzler <bmt@zurich.ibm.com> */ +/* Copyright (c) 2008-2019, IBM Corporation */ + +#ifndef _SIW_USER_H +#define _SIW_USER_H + +#include <linux/types.h> + +#define SIW_NODE_DESC_COMMON "Software iWARP stack" +#define SIW_ABI_VERSION 1 +#define SIW_MAX_SGE 6 +#define SIW_UOBJ_MAX_KEY 0x08FFFF +#define SIW_INVAL_UOBJ_KEY (SIW_UOBJ_MAX_KEY + 1) + +struct siw_uresp_create_cq { + __u32 cq_id; + __u32 num_cqe; + __aligned_u64 cq_key; +}; + +struct siw_uresp_create_qp { + __u32 qp_id; + __u32 num_sqe; + __u32 num_rqe; + __u32 pad; + __aligned_u64 sq_key; + __aligned_u64 rq_key; +}; + +struct siw_ureq_reg_mr { + __u8 stag_key; + __u8 reserved[3]; + __u32 pad; +}; + +struct siw_uresp_reg_mr { + __u32 stag; + __u32 pad; +}; + +struct siw_uresp_create_srq { + __u32 num_rqe; + __u32 pad; + __aligned_u64 srq_key; +}; + +struct siw_uresp_alloc_ctx { + __u32 dev_id; + __u32 pad; +}; + +enum siw_opcode { + SIW_OP_WRITE, + SIW_OP_READ, + SIW_OP_READ_LOCAL_INV, + SIW_OP_SEND, + SIW_OP_SEND_WITH_IMM, + SIW_OP_SEND_REMOTE_INV, + + /* Unsupported */ + SIW_OP_FETCH_AND_ADD, + SIW_OP_COMP_AND_SWAP, + + SIW_OP_RECEIVE, + /* provider internal SQE */ + SIW_OP_READ_RESPONSE, + /* + * below opcodes valid for + * in-kernel clients only + */ + SIW_OP_INVAL_STAG, + SIW_OP_REG_MR, + SIW_NUM_OPCODES +}; + +/* Keep it same as ibv_sge to allow for memcpy */ +struct siw_sge { + __aligned_u64 laddr; + __u32 length; + __u32 lkey; +}; + +/* + * Inline data are kept within the work request itself occupying + * the space of sge[1] .. sge[n]. Therefore, inline data cannot be + * supported if SIW_MAX_SGE is below 2 elements. + */ +#define SIW_MAX_INLINE (sizeof(struct siw_sge) * (SIW_MAX_SGE - 1)) + +#if SIW_MAX_SGE < 2 +#error "SIW_MAX_SGE must be at least 2" +#endif + +enum siw_wqe_flags { + SIW_WQE_VALID = 1, + SIW_WQE_INLINE = (1 << 1), + SIW_WQE_SIGNALLED = (1 << 2), + SIW_WQE_SOLICITED = (1 << 3), + SIW_WQE_READ_FENCE = (1 << 4), + SIW_WQE_REM_INVAL = (1 << 5), + SIW_WQE_COMPLETED = (1 << 6) +}; + +/* Send Queue Element */ +struct siw_sqe { + __aligned_u64 id; + __u16 flags; + __u8 num_sge; + /* Contains enum siw_opcode values */ + __u8 opcode; + __u32 rkey; + union { + __aligned_u64 raddr; + __aligned_u64 base_mr; + }; + union { + struct siw_sge sge[SIW_MAX_SGE]; + __aligned_u64 access; + }; +}; + +/* Receive Queue Element */ +struct siw_rqe { + __aligned_u64 id; + __u16 flags; + __u8 num_sge; + /* + * only used by kernel driver, + * ignored if set by user + */ + __u8 opcode; + __u32 unused; + struct siw_sge sge[SIW_MAX_SGE]; +}; + +enum siw_notify_flags { + SIW_NOTIFY_NOT = (0), + SIW_NOTIFY_SOLICITED = (1 << 0), + SIW_NOTIFY_NEXT_COMPLETION = (1 << 1), + SIW_NOTIFY_MISSED_EVENTS = (1 << 2), + SIW_NOTIFY_ALL = SIW_NOTIFY_SOLICITED | SIW_NOTIFY_NEXT_COMPLETION | + SIW_NOTIFY_MISSED_EVENTS +}; + +enum siw_wc_status { + SIW_WC_SUCCESS, + SIW_WC_LOC_LEN_ERR, + SIW_WC_LOC_PROT_ERR, + SIW_WC_LOC_QP_OP_ERR, + SIW_WC_WR_FLUSH_ERR, + SIW_WC_BAD_RESP_ERR, + SIW_WC_LOC_ACCESS_ERR, + SIW_WC_REM_ACCESS_ERR, + SIW_WC_REM_INV_REQ_ERR, + SIW_WC_GENERAL_ERR, + SIW_NUM_WC_STATUS +}; + +struct siw_cqe { + __aligned_u64 id; + __u8 flags; + __u8 opcode; + __u16 status; + __u32 bytes; + union { + __aligned_u64 imm_data; + __u32 inval_stag; + }; + /* QP number or QP pointer */ + union { + struct ib_qp *base_qp; + __aligned_u64 qp_id; + }; +}; + +/* + * Shared structure between user and kernel + * to control CQ arming. + */ +struct siw_cq_ctrl { + __aligned_u64 notify; +}; +#endif |