diff options
Diffstat (limited to 'include/rdma')
-rw-r--r-- | include/rdma/ib_cm.h | 25 | ||||
-rw-r--r-- | include/rdma/ib_mad.h | 82 | ||||
-rw-r--r-- | include/rdma/ib_pack.h | 2 | ||||
-rw-r--r-- | include/rdma/ib_smi.h | 47 | ||||
-rw-r--r-- | include/rdma/ib_verbs.h | 249 | ||||
-rw-r--r-- | include/rdma/opa_port_info.h | 433 | ||||
-rw-r--r-- | include/rdma/opa_smi.h | 47 | ||||
-rw-r--r-- | include/rdma/rdma_netlink.h | 7 |
8 files changed, 746 insertions, 146 deletions
diff --git a/include/rdma/ib_cm.h b/include/rdma/ib_cm.h index 39ed2d2fbd51..92a7d85917b4 100644 --- a/include/rdma/ib_cm.h +++ b/include/rdma/ib_cm.h @@ -105,14 +105,16 @@ enum ib_cm_data_size { IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE = 216, IB_CM_SIDR_REP_PRIVATE_DATA_SIZE = 136, IB_CM_SIDR_REP_INFO_LENGTH = 72, - /* compare done u32 at a time */ - IB_CM_COMPARE_SIZE = (64 / sizeof(u32)) }; struct ib_cm_id; struct ib_cm_req_event_param { struct ib_cm_id *listen_id; + + /* P_Key that was used by the GMP's BTH header */ + u16 bth_pkey; + u8 port; struct ib_sa_path_rec *primary_path; @@ -223,6 +225,9 @@ struct ib_cm_apr_event_param { struct ib_cm_sidr_req_event_param { struct ib_cm_id *listen_id; + __be64 service_id; + /* P_Key that was used by the GMP's BTH header */ + u16 bth_pkey; u8 port; u16 pkey; }; @@ -337,11 +342,6 @@ void ib_destroy_cm_id(struct ib_cm_id *cm_id); #define IB_SDP_SERVICE_ID cpu_to_be64(0x0000000000010000ULL) #define IB_SDP_SERVICE_ID_MASK cpu_to_be64(0xFFFFFFFFFFFF0000ULL) -struct ib_cm_compare_data { - u32 data[IB_CM_COMPARE_SIZE]; - u32 mask[IB_CM_COMPARE_SIZE]; -}; - /** * ib_cm_listen - Initiates listening on the specified service ID for * connection and service ID resolution requests. @@ -354,12 +354,13 @@ struct ib_cm_compare_data { * range of service IDs. If set to 0, the service ID is matched * exactly. This parameter is ignored if %service_id is set to * IB_CM_ASSIGN_SERVICE_ID. - * @compare_data: This parameter is optional. It specifies data that must - * appear in the private data of a connection request for the specified - * listen request. */ -int ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id, __be64 service_mask, - struct ib_cm_compare_data *compare_data); +int ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id, + __be64 service_mask); + +struct ib_cm_id *ib_cm_insert_listen(struct ib_device *device, + ib_cm_handler cm_handler, + __be64 service_id); struct ib_cm_req_param { struct ib_sa_path_rec *primary_path; diff --git a/include/rdma/ib_mad.h b/include/rdma/ib_mad.h index c8422d5a5a91..188df91d5851 100644 --- a/include/rdma/ib_mad.h +++ b/include/rdma/ib_mad.h @@ -127,6 +127,23 @@ #define IB_DEFAULT_PKEY_PARTIAL 0x7FFF #define IB_DEFAULT_PKEY_FULL 0xFFFF +/* + * Generic trap/notice types + */ +#define IB_NOTICE_TYPE_FATAL 0x80 +#define IB_NOTICE_TYPE_URGENT 0x81 +#define IB_NOTICE_TYPE_SECURITY 0x82 +#define IB_NOTICE_TYPE_SM 0x83 +#define IB_NOTICE_TYPE_INFO 0x84 + +/* + * Generic trap/notice producers + */ +#define IB_NOTICE_PROD_CA cpu_to_be16(1) +#define IB_NOTICE_PROD_SWITCH cpu_to_be16(2) +#define IB_NOTICE_PROD_ROUTER cpu_to_be16(3) +#define IB_NOTICE_PROD_CLASS_MGR cpu_to_be16(4) + enum { IB_MGMT_MAD_HDR = 24, IB_MGMT_MAD_DATA = 232, @@ -240,6 +257,70 @@ struct ib_class_port_info { __be32 trap_qkey; }; +struct ib_mad_notice_attr { + u8 generic_type; + u8 prod_type_msb; + __be16 prod_type_lsb; + __be16 trap_num; + __be16 issuer_lid; + __be16 toggle_count; + + union { + struct { + u8 details[54]; + } raw_data; + + struct { + __be16 reserved; + __be16 lid; /* where violation happened */ + u8 port_num; /* where violation happened */ + } __packed ntc_129_131; + + struct { + __be16 reserved; + __be16 lid; /* LID where change occurred */ + u8 reserved2; + u8 local_changes; /* low bit - local changes */ + __be32 new_cap_mask; /* new capability mask */ + u8 reserved3; + u8 change_flags; /* low 3 bits only */ + } __packed ntc_144; + + struct { + __be16 reserved; + __be16 lid; /* lid where sys guid changed */ + __be16 reserved2; + __be64 new_sys_guid; + } __packed ntc_145; + + struct { + __be16 reserved; + __be16 lid; + __be16 dr_slid; + u8 method; + u8 reserved2; + __be16 attr_id; + __be32 attr_mod; + __be64 mkey; + u8 reserved3; + u8 dr_trunc_hop; + u8 dr_rtn_path[30]; + } __packed ntc_256; + + struct { + __be16 reserved; + __be16 lid1; + __be16 lid2; + __be32 key; + __be32 sl_qp1; /* SL: high 4 bits */ + __be32 qp2; /* high 8 bits reserved */ + union ib_gid gid1; + union ib_gid gid2; + } __packed ntc_257_258; + + } details; +}; + /** * ib_mad_send_buf - MAD data buffer and work request for sends. * @next: A pointer used to chain together MADs for posting. @@ -388,7 +469,6 @@ enum { struct ib_mad_agent { struct ib_device *device; struct ib_qp *qp; - struct ib_mr *mr; ib_mad_recv_handler recv_handler; ib_mad_send_handler send_handler; ib_mad_snoop_handler snoop_handler; diff --git a/include/rdma/ib_pack.h b/include/rdma/ib_pack.h index b1f7592e02e4..709a5331e6b9 100644 --- a/include/rdma/ib_pack.h +++ b/include/rdma/ib_pack.h @@ -76,6 +76,8 @@ enum { IB_OPCODE_UC = 0x20, IB_OPCODE_RD = 0x40, IB_OPCODE_UD = 0x60, + /* per IBTA 3.1 Table 38, A10.3.2 */ + IB_OPCODE_CNP = 0x80, /* operations -- just used to define real constants */ IB_OPCODE_SEND_FIRST = 0x00, diff --git a/include/rdma/ib_smi.h b/include/rdma/ib_smi.h index 98b9086d769a..b439e988408e 100644 --- a/include/rdma/ib_smi.h +++ b/include/rdma/ib_smi.h @@ -119,10 +119,57 @@ struct ib_port_info { u8 link_roundtrip_latency[3]; }; +struct ib_node_info { + u8 base_version; + u8 class_version; + u8 node_type; + u8 num_ports; + __be64 sys_guid; + __be64 node_guid; + __be64 port_guid; + __be16 partition_cap; + __be16 device_id; + __be32 revision; + u8 local_port_num; + u8 vendor_id[3]; +} __packed; + +struct ib_vl_weight_elem { + u8 vl; /* IB: VL is low 4 bits, upper 4 bits reserved */ + /* OPA: VL is low 5 bits, upper 3 bits reserved */ + u8 weight; +}; + static inline u8 ib_get_smp_direction(struct ib_smp *smp) { return ((smp->status & IB_SMP_DIRECTION) == IB_SMP_DIRECTION); } +/* + * SM Trap/Notice numbers + */ +#define IB_NOTICE_TRAP_LLI_THRESH cpu_to_be16(129) +#define IB_NOTICE_TRAP_EBO_THRESH cpu_to_be16(130) +#define IB_NOTICE_TRAP_FLOW_UPDATE cpu_to_be16(131) +#define IB_NOTICE_TRAP_CAP_MASK_CHG cpu_to_be16(144) +#define IB_NOTICE_TRAP_SYS_GUID_CHG cpu_to_be16(145) +#define IB_NOTICE_TRAP_BAD_MKEY cpu_to_be16(256) +#define IB_NOTICE_TRAP_BAD_PKEY cpu_to_be16(257) +#define IB_NOTICE_TRAP_BAD_QKEY cpu_to_be16(258) + +/* + * Other local changes flags (trap 144). + */ +#define IB_NOTICE_TRAP_LSE_CHG 0x04 /* Link Speed Enable changed */ +#define IB_NOTICE_TRAP_LWE_CHG 0x02 /* Link Width Enable changed */ +#define IB_NOTICE_TRAP_NODE_DESC_CHG 0x01 + +/* + * M_Key volation flags in dr_trunc_hop (trap 256). + */ +#define IB_NOTICE_TRAP_DR_NOTICE 0x80 +#define IB_NOTICE_TRAP_DR_TRUNC 0x40 + + #endif /* IB_SMI_H */ diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index b0f898e3b2e7..7845fae6f2df 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -48,6 +48,7 @@ #include <linux/rwsem.h> #include <linux/scatterlist.h> #include <linux/workqueue.h> +#include <linux/socket.h> #include <uapi/linux/if_ether.h> #include <linux/atomic.h> @@ -64,6 +65,12 @@ union ib_gid { } global; }; +extern union ib_gid zgid; + +struct ib_gid_attr { + struct net_device *ndev; +}; + enum rdma_node_type { /* IB values map to NodeInfo:NodeType. */ RDMA_NODE_IB_CA = 1, @@ -284,7 +291,7 @@ enum ib_port_cap_flags { IB_PORT_BOOT_MGMT_SUP = 1 << 23, IB_PORT_LINK_LATENCY_SUP = 1 << 24, IB_PORT_CLIENT_REG_SUP = 1 << 25, - IB_PORT_IP_BASED_GIDS = 1 << 26 + IB_PORT_IP_BASED_GIDS = 1 << 26, }; enum ib_port_width { @@ -556,20 +563,18 @@ __attribute_const__ int ib_rate_to_mult(enum ib_rate rate); */ __attribute_const__ int ib_rate_to_mbps(enum ib_rate rate); -enum ib_mr_create_flags { - IB_MR_SIGNATURE_EN = 1, -}; /** - * ib_mr_init_attr - Memory region init attributes passed to routine - * ib_create_mr. - * @max_reg_descriptors: max number of registration descriptors that - * may be used with registration work requests. - * @flags: MR creation flags bit mask. + * enum ib_mr_type - memory region type + * @IB_MR_TYPE_MEM_REG: memory region that is used for + * normal registration + * @IB_MR_TYPE_SIGNATURE: memory region that is used for + * signature operations (data-integrity + * capable regions) */ -struct ib_mr_init_attr { - int max_reg_descriptors; - u32 flags; +enum ib_mr_type { + IB_MR_TYPE_MEM_REG, + IB_MR_TYPE_SIGNATURE, }; /** @@ -1252,9 +1257,11 @@ struct ib_udata { }; struct ib_pd { + u32 local_dma_lkey; struct ib_device *device; struct ib_uobject *uobject; atomic_t usecnt; /* count all resources */ + struct ib_mr *local_mr; }; struct ib_xrcd { @@ -1488,7 +1495,7 @@ struct ib_cache { rwlock_t lock; struct ib_event_handler event_handler; struct ib_pkey_cache **pkey_cache; - struct ib_gid_cache **gid_cache; + struct ib_gid_table **gid_cache; u8 *lmc_cache; }; @@ -1550,6 +1557,8 @@ struct ib_device { spinlock_t client_data_lock; struct list_head core_list; + /* Access to the client_data_list is protected by the client_data_lock + * spinlock and the lists_rwsem read-write semaphore */ struct list_head client_data_list; struct ib_cache cache; @@ -1572,9 +1581,47 @@ struct ib_device { struct ib_port_attr *port_attr); enum rdma_link_layer (*get_link_layer)(struct ib_device *device, u8 port_num); + /* When calling get_netdev, the HW vendor's driver should return the + * net device of device @device at port @port_num or NULL if such + * a net device doesn't exist. The vendor driver should call dev_hold + * on this net device. The HW vendor's device driver must guarantee + * that this function returns NULL before the net device reaches + * NETDEV_UNREGISTER_FINAL state. + */ + struct net_device *(*get_netdev)(struct ib_device *device, + u8 port_num); int (*query_gid)(struct ib_device *device, u8 port_num, int index, union ib_gid *gid); + /* When calling add_gid, the HW vendor's driver should + * add the gid of device @device at gid index @index of + * port @port_num to be @gid. Meta-info of that gid (for example, + * the network device related to this gid is available + * at @attr. @context allows the HW vendor driver to store extra + * information together with a GID entry. The HW vendor may allocate + * memory to contain this information and store it in @context when a + * new GID entry is written to. Params are consistent until the next + * call of add_gid or delete_gid. The function should return 0 on + * success or error otherwise. The function could be called + * concurrently for different ports. This function is only called + * when roce_gid_table is used. + */ + int (*add_gid)(struct ib_device *device, + u8 port_num, + unsigned int index, + const union ib_gid *gid, + const struct ib_gid_attr *attr, + void **context); + /* When calling del_gid, the HW vendor's driver should delete the + * gid of device @device at gid index @index of port @port_num. + * Upon the deletion of a GID entry, the HW vendor must free any + * allocated memory. The caller will clear @context afterwards. + * This function is only called when roce_gid_table is used. + */ + int (*del_gid)(struct ib_device *device, + u8 port_num, + unsigned int index, + void **context); int (*query_pkey)(struct ib_device *device, u8 port_num, u16 index, u16 *pkey); int (*modify_device)(struct ib_device *device, @@ -1668,11 +1715,9 @@ struct ib_device { int (*query_mr)(struct ib_mr *mr, struct ib_mr_attr *mr_attr); int (*dereg_mr)(struct ib_mr *mr); - int (*destroy_mr)(struct ib_mr *mr); - struct ib_mr * (*create_mr)(struct ib_pd *pd, - struct ib_mr_init_attr *mr_init_attr); - struct ib_mr * (*alloc_fast_reg_mr)(struct ib_pd *pd, - int max_page_list_len); + struct ib_mr * (*alloc_mr)(struct ib_pd *pd, + enum ib_mr_type mr_type, + u32 max_num_sg); struct ib_fast_reg_page_list * (*alloc_fast_reg_page_list)(struct ib_device *device, int page_list_len); void (*free_fast_reg_page_list)(struct ib_fast_reg_page_list *page_list); @@ -1724,6 +1769,7 @@ struct ib_device { int (*destroy_flow)(struct ib_flow *flow_id); int (*check_mr_status)(struct ib_mr *mr, u32 check_mask, struct ib_mr_status *mr_status); + void (*disassociate_ucontext)(struct ib_ucontext *ibcontext); struct ib_dma_mapping_ops *dma_ops; @@ -1761,8 +1807,30 @@ struct ib_device { struct ib_client { char *name; void (*add) (struct ib_device *); - void (*remove)(struct ib_device *); - + void (*remove)(struct ib_device *, void *client_data); + + /* Returns the net_dev belonging to this ib_client and matching the + * given parameters. + * @dev: An RDMA device that the net_dev use for communication. + * @port: A physical port number on the RDMA device. + * @pkey: P_Key that the net_dev uses if applicable. + * @gid: A GID that the net_dev uses to communicate. + * @addr: An IP address the net_dev is configured with. + * @client_data: The device's client data set by ib_set_client_data(). + * + * An ib_client that implements a net_dev on top of RDMA devices + * (such as IP over IB) should implement this callback, allowing the + * rdma_cm module to find the right net_dev for a given request. + * + * The caller is responsible for calling dev_put on the returned + * netdev. */ + struct net_device *(*get_net_dev_by_params)( + struct ib_device *dev, + u8 port, + u16 pkey, + const union ib_gid *gid, + const struct sockaddr *addr, + void *client_data); struct list_head list; }; @@ -2071,34 +2139,6 @@ static inline bool rdma_cap_eth_ah(const struct ib_device *device, u8 port_num) } /** - * rdma_cap_read_multi_sge - Check if the port of device has the capability - * RDMA Read Multiple Scatter-Gather Entries. - * @device: Device to check - * @port_num: Port number to check - * - * iWARP has a restriction that RDMA READ requests may only have a single - * Scatter/Gather Entry (SGE) in the work request. - * - * NOTE: although the linux kernel currently assumes all devices are either - * single SGE RDMA READ devices or identical SGE maximums for RDMA READs and - * WRITEs, according to Tom Talpey, this is not accurate. There are some - * devices out there that support more than a single SGE on RDMA READ - * requests, but do not support the same number of SGEs as they do on - * RDMA WRITE requests. The linux kernel would need rearchitecting to - * support these imbalanced READ/WRITE SGEs allowed devices. So, for now, - * suffice with either the device supports the same READ/WRITE SGEs, or - * it only gets one READ sge. - * - * Return: true for any device that allows more than one SGE in RDMA READ - * requests. - */ -static inline bool rdma_cap_read_multi_sge(struct ib_device *device, - u8 port_num) -{ - return !(device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_PROT_IWARP); -} - -/** * rdma_max_mad_size - Return the max MAD size required by this RDMA Port. * * @device: Device @@ -2115,6 +2155,26 @@ static inline size_t rdma_max_mad_size(const struct ib_device *device, u8 port_n return device->port_immutable[port_num].max_mad_size; } +/** + * rdma_cap_roce_gid_table - Check if the port of device uses roce_gid_table + * @device: Device to check + * @port_num: Port number to check + * + * RoCE GID table mechanism manages the various GIDs for a device. + * + * NOTE: if allocating the port's GID table has failed, this call will still + * return true, but any RoCE GID table API will fail. + * + * Return: true if the port uses RoCE GID table mechanism in order to manage + * its GIDs. + */ +static inline bool rdma_cap_roce_gid_table(const struct ib_device *device, + u8 port_num) +{ + return rdma_protocol_roce(device, port_num) && + device->add_gid && device->del_gid; +} + int ib_query_gid(struct ib_device *device, u8 port_num, int index, union ib_gid *gid); @@ -2135,20 +2195,9 @@ int ib_find_gid(struct ib_device *device, union ib_gid *gid, int ib_find_pkey(struct ib_device *device, u8 port_num, u16 pkey, u16 *index); -/** - * ib_alloc_pd - Allocates an unused protection domain. - * @device: The device on which to allocate the protection domain. - * - * A protection domain object provides an association between QPs, shared - * receive queues, address handles, memory regions, and memory windows. - */ struct ib_pd *ib_alloc_pd(struct ib_device *device); -/** - * ib_dealloc_pd - Deallocates a protection domain. - * @pd: The protection domain to deallocate. - */ -int ib_dealloc_pd(struct ib_pd *pd); +void ib_dealloc_pd(struct ib_pd *pd); /** * ib_create_ah - Creates an address handle for the given address vector. @@ -2760,52 +2809,6 @@ static inline void ib_dma_free_coherent(struct ib_device *dev, } /** - * ib_reg_phys_mr - Prepares a virtually addressed memory region for use - * by an HCA. - * @pd: The protection domain associated assigned to the registered region. - * @phys_buf_array: Specifies a list of physical buffers to use in the - * memory region. - * @num_phys_buf: Specifies the size of the phys_buf_array. - * @mr_access_flags: Specifies the memory access rights. - * @iova_start: The offset of the region's starting I/O virtual address. - */ -struct ib_mr *ib_reg_phys_mr(struct ib_pd *pd, - struct ib_phys_buf *phys_buf_array, - int num_phys_buf, - int mr_access_flags, - u64 *iova_start); - -/** - * ib_rereg_phys_mr - Modifies the attributes of an existing memory region. - * Conceptually, this call performs the functions deregister memory region - * followed by register physical memory region. Where possible, - * resources are reused instead of deallocated and reallocated. - * @mr: The memory region to modify. - * @mr_rereg_mask: A bit-mask used to indicate which of the following - * properties of the memory region are being modified. - * @pd: If %IB_MR_REREG_PD is set in mr_rereg_mask, this field specifies - * the new protection domain to associated with the memory region, - * otherwise, this parameter is ignored. - * @phys_buf_array: If %IB_MR_REREG_TRANS is set in mr_rereg_mask, this - * field specifies a list of physical buffers to use in the new - * translation, otherwise, this parameter is ignored. - * @num_phys_buf: If %IB_MR_REREG_TRANS is set in mr_rereg_mask, this - * field specifies the size of the phys_buf_array, otherwise, this - * parameter is ignored. - * @mr_access_flags: If %IB_MR_REREG_ACCESS is set in mr_rereg_mask, this - * field specifies the new memory access rights, otherwise, this - * parameter is ignored. - * @iova_start: The offset of the region's starting I/O virtual address. - */ -int ib_rereg_phys_mr(struct ib_mr *mr, - int mr_rereg_mask, - struct ib_pd *pd, - struct ib_phys_buf *phys_buf_array, - int num_phys_buf, - int mr_access_flags, - u64 *iova_start); - -/** * ib_query_mr - Retrieves information about a specific memory region. * @mr: The memory region to retrieve information about. * @mr_attr: The attributes of the specified memory region. @@ -2821,33 +2824,9 @@ int ib_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr); */ int ib_dereg_mr(struct ib_mr *mr); - -/** - * ib_create_mr - Allocates a memory region that may be used for - * signature handover operations. - * @pd: The protection domain associated with the region. - * @mr_init_attr: memory region init attributes. - */ -struct ib_mr *ib_create_mr(struct ib_pd *pd, - struct ib_mr_init_attr *mr_init_attr); - -/** - * ib_destroy_mr - Destroys a memory region that was created using - * ib_create_mr and removes it from HW translation tables. - * @mr: The memory region to destroy. - * - * This function can fail, if the memory region has memory windows bound to it. - */ -int ib_destroy_mr(struct ib_mr *mr); - -/** - * ib_alloc_fast_reg_mr - Allocates memory region usable with the - * IB_WR_FAST_REG_MR send work request. - * @pd: The protection domain associated with the region. - * @max_page_list_len: requested max physical buffer list length to be - * used with fast register work requests for this MR. - */ -struct ib_mr *ib_alloc_fast_reg_mr(struct ib_pd *pd, int max_page_list_len); +struct ib_mr *ib_alloc_mr(struct ib_pd *pd, + enum ib_mr_type mr_type, + u32 max_num_sg); /** * ib_alloc_fast_reg_page_list - Allocates a page list array @@ -3040,4 +3019,8 @@ static inline int ib_check_mr_access(int flags) int ib_check_mr_status(struct ib_mr *mr, u32 check_mask, struct ib_mr_status *mr_status); +struct net_device *ib_get_net_dev_by_params(struct ib_device *dev, u8 port, + u16 pkey, const union ib_gid *gid, + const struct sockaddr *addr); + #endif /* IB_VERBS_H */ diff --git a/include/rdma/opa_port_info.h b/include/rdma/opa_port_info.h new file mode 100644 index 000000000000..391dae1931c0 --- /dev/null +++ b/include/rdma/opa_port_info.h @@ -0,0 +1,433 @@ +/* + * Copyright (c) 2014 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#if !defined(OPA_PORT_INFO_H) +#define OPA_PORT_INFO_H + +/* Temporary until HFI driver is updated */ +#ifndef USE_PI_LED_ENABLE +#define USE_PI_LED_ENABLE 0 +#endif + +#define OPA_PORT_LINK_MODE_NOP 0 /* No change */ +#define OPA_PORT_LINK_MODE_OPA 4 /* Port mode is OPA */ + +#define OPA_PORT_PACKET_FORMAT_NOP 0 /* No change */ +#define OPA_PORT_PACKET_FORMAT_8B 1 /* Format 8B */ +#define OPA_PORT_PACKET_FORMAT_9B 2 /* Format 9B */ +#define OPA_PORT_PACKET_FORMAT_10B 4 /* Format 10B */ +#define OPA_PORT_PACKET_FORMAT_16B 8 /* Format 16B */ + +#define OPA_PORT_LTP_CRC_MODE_NONE 0 /* No change */ +#define OPA_PORT_LTP_CRC_MODE_14 1 /* 14-bit LTP CRC mode (optional) */ +#define OPA_PORT_LTP_CRC_MODE_16 2 /* 16-bit LTP CRC mode */ +#define OPA_PORT_LTP_CRC_MODE_48 4 /* 48-bit LTP CRC mode (optional) */ +#define OPA_PORT_LTP_CRC_MODE_PER_LANE 8 /* 12/16-bit per lane LTP CRC mode */ + +/* Link Down / Neighbor Link Down Reason; indicated as follows: */ +#define OPA_LINKDOWN_REASON_NONE 0 /* No specified reason */ +#define OPA_LINKDOWN_REASON_RCV_ERROR_0 1 +#define OPA_LINKDOWN_REASON_BAD_PKT_LEN 2 +#define OPA_LINKDOWN_REASON_PKT_TOO_LONG 3 +#define OPA_LINKDOWN_REASON_PKT_TOO_SHORT 4 +#define OPA_LINKDOWN_REASON_BAD_SLID 5 +#define OPA_LINKDOWN_REASON_BAD_DLID 6 +#define OPA_LINKDOWN_REASON_BAD_L2 7 +#define OPA_LINKDOWN_REASON_BAD_SC 8 +#define OPA_LINKDOWN_REASON_RCV_ERROR_8 9 +#define OPA_LINKDOWN_REASON_BAD_MID_TAIL 10 +#define OPA_LINKDOWN_REASON_RCV_ERROR_10 11 +#define OPA_LINKDOWN_REASON_PREEMPT_ERROR 12 +#define OPA_LINKDOWN_REASON_PREEMPT_VL15 13 +#define OPA_LINKDOWN_REASON_BAD_VL_MARKER 14 +#define OPA_LINKDOWN_REASON_RCV_ERROR_14 15 +#define OPA_LINKDOWN_REASON_RCV_ERROR_15 16 +#define OPA_LINKDOWN_REASON_BAD_HEAD_DIST 17 +#define OPA_LINKDOWN_REASON_BAD_TAIL_DIST 18 +#define OPA_LINKDOWN_REASON_BAD_CTRL_DIST 19 +#define OPA_LINKDOWN_REASON_BAD_CREDIT_ACK 20 +#define OPA_LINKDOWN_REASON_UNSUPPORTED_VL_MARKER 21 +#define OPA_LINKDOWN_REASON_BAD_PREEMPT 22 +#define OPA_LINKDOWN_REASON_BAD_CONTROL_FLIT 23 +#define OPA_LINKDOWN_REASON_EXCEED_MULTICAST_LIMIT 24 +#define OPA_LINKDOWN_REASON_RCV_ERROR_24 25 +#define OPA_LINKDOWN_REASON_RCV_ERROR_25 26 +#define OPA_LINKDOWN_REASON_RCV_ERROR_26 27 +#define OPA_LINKDOWN_REASON_RCV_ERROR_27 28 +#define OPA_LINKDOWN_REASON_RCV_ERROR_28 29 +#define OPA_LINKDOWN_REASON_RCV_ERROR_29 30 +#define OPA_LINKDOWN_REASON_RCV_ERROR_30 31 +#define OPA_LINKDOWN_REASON_EXCESSIVE_BUFFER_OVERRUN 32 +#define OPA_LINKDOWN_REASON_UNKNOWN 33 +/* 34 -reserved */ +#define OPA_LINKDOWN_REASON_REBOOT 35 +#define OPA_LINKDOWN_REASON_NEIGHBOR_UNKNOWN 36 +/* 37-38 reserved */ +#define OPA_LINKDOWN_REASON_FM_BOUNCE 39 +#define OPA_LINKDOWN_REASON_SPEED_POLICY 40 +#define OPA_LINKDOWN_REASON_WIDTH_POLICY 41 +/* 42-48 reserved */ +#define OPA_LINKDOWN_REASON_DISCONNECTED 49 +#define OPA_LINKDOWN_REASONLOCAL_MEDIA_NOT_INSTALLED 50 +#define OPA_LINKDOWN_REASON_NOT_INSTALLED 51 +#define OPA_LINKDOWN_REASON_CHASSIS_CONFIG 52 +/* 53 reserved */ +#define OPA_LINKDOWN_REASON_END_TO_END_NOT_INSTALLED 54 +/* 55 reserved */ +#define OPA_LINKDOWN_REASON_POWER_POLICY 56 +#define OPA_LINKDOWN_REASON_LINKSPEED_POLICY 57 +#define OPA_LINKDOWN_REASON_LINKWIDTH_POLICY 58 +/* 59 reserved */ +#define OPA_LINKDOWN_REASON_SWITCH_MGMT 60 +#define OPA_LINKDOWN_REASON_SMA_DISABLED 61 +/* 62 reserved */ +#define OPA_LINKDOWN_REASON_TRANSIENT 63 +/* 64-255 reserved */ + +/* OPA Link Init reason; indicated as follows: */ +/* 3-7; 11-15 reserved; 8-15 cleared on Polling->LinkUp */ +#define OPA_LINKINIT_REASON_NOP 0 +#define OPA_LINKINIT_REASON_LINKUP (1 << 4) +#define OPA_LINKINIT_REASON_FLAPPING (2 << 4) +#define OPA_LINKINIT_REASON_CLEAR (8 << 4) +#define OPA_LINKINIT_OUTSIDE_POLICY (8 << 4) +#define OPA_LINKINIT_QUARANTINED (9 << 4) +#define OPA_LINKINIT_INSUFIC_CAPABILITY (10 << 4) + +#define OPA_LINK_SPEED_NOP 0x0000 /* Reserved (1-5 Gbps) */ +#define OPA_LINK_SPEED_12_5G 0x0001 /* 12.5 Gbps */ +#define OPA_LINK_SPEED_25G 0x0002 /* 25.78125? Gbps (EDR) */ + +#define OPA_LINK_WIDTH_1X 0x0001 +#define OPA_LINK_WIDTH_2X 0x0002 +#define OPA_LINK_WIDTH_3X 0x0004 +#define OPA_LINK_WIDTH_4X 0x0008 + +#define OPA_CAP_MASK3_IsSnoopSupported (1 << 7) +#define OPA_CAP_MASK3_IsAsyncSC2VLSupported (1 << 6) +#define OPA_CAP_MASK3_IsAddrRangeConfigSupported (1 << 5) +#define OPA_CAP_MASK3_IsPassThroughSupported (1 << 4) +#define OPA_CAP_MASK3_IsSharedSpaceSupported (1 << 3) +/* reserved (1 << 2) */ +#define OPA_CAP_MASK3_IsVLMarkerSupported (1 << 1) +#define OPA_CAP_MASK3_IsVLrSupported (1 << 0) + +/** + * new MTU values + */ +enum { + OPA_MTU_8192 = 6, + OPA_MTU_10240 = 7, +}; + +enum { + OPA_PORT_PHYS_CONF_DISCONNECTED = 0, + OPA_PORT_PHYS_CONF_STANDARD = 1, + OPA_PORT_PHYS_CONF_FIXED = 2, + OPA_PORT_PHYS_CONF_VARIABLE = 3, + OPA_PORT_PHYS_CONF_SI_PHOTO = 4 +}; + +enum port_info_field_masks { + /* vl.cap */ + OPA_PI_MASK_VL_CAP = 0x1F, + /* port_states.ledenable_offlinereason */ + OPA_PI_MASK_OFFLINE_REASON = 0x0F, + OPA_PI_MASK_LED_ENABLE = 0x40, + /* port_states.unsleepstate_downdefstate */ + OPA_PI_MASK_UNSLEEP_STATE = 0xF0, + OPA_PI_MASK_DOWNDEF_STATE = 0x0F, + /* port_states.portphysstate_portstate */ + OPA_PI_MASK_PORT_PHYSICAL_STATE = 0xF0, + OPA_PI_MASK_PORT_STATE = 0x0F, + /* port_phys_conf */ + OPA_PI_MASK_PORT_PHYSICAL_CONF = 0x0F, + /* collectivemask_multicastmask */ + OPA_PI_MASK_COLLECT_MASK = 0x38, + OPA_PI_MASK_MULTICAST_MASK = 0x07, + /* mkeyprotect_lmc */ + OPA_PI_MASK_MKEY_PROT_BIT = 0xC0, + OPA_PI_MASK_LMC = 0x0F, + /* smsl */ + OPA_PI_MASK_SMSL = 0x1F, + /* partenforce_filterraw */ + /* Filter Raw In/Out bits 1 and 2 were removed */ + OPA_PI_MASK_LINKINIT_REASON = 0xF0, + OPA_PI_MASK_PARTITION_ENFORCE_IN = 0x08, + OPA_PI_MASK_PARTITION_ENFORCE_OUT = 0x04, + /* operational_vls */ + OPA_PI_MASK_OPERATIONAL_VL = 0x1F, + /* sa_qp */ + OPA_PI_MASK_SA_QP = 0x00FFFFFF, + /* sm_trap_qp */ + OPA_PI_MASK_SM_TRAP_QP = 0x00FFFFFF, + /* localphy_overrun_errors */ + OPA_PI_MASK_LOCAL_PHY_ERRORS = 0xF0, + OPA_PI_MASK_OVERRUN_ERRORS = 0x0F, + /* clientrereg_subnettimeout */ + OPA_PI_MASK_CLIENT_REREGISTER = 0x80, + OPA_PI_MASK_SUBNET_TIMEOUT = 0x1F, + /* port_link_mode */ + OPA_PI_MASK_PORT_LINK_SUPPORTED = (0x001F << 10), + OPA_PI_MASK_PORT_LINK_ENABLED = (0x001F << 5), + OPA_PI_MASK_PORT_LINK_ACTIVE = (0x001F << 0), + /* port_link_crc_mode */ + OPA_PI_MASK_PORT_LINK_CRC_SUPPORTED = 0x0F00, + OPA_PI_MASK_PORT_LINK_CRC_ENABLED = 0x00F0, + OPA_PI_MASK_PORT_LINK_CRC_ACTIVE = 0x000F, + /* port_mode */ + OPA_PI_MASK_PORT_MODE_SECURITY_CHECK = 0x0001, + OPA_PI_MASK_PORT_MODE_16B_TRAP_QUERY = 0x0002, + OPA_PI_MASK_PORT_MODE_PKEY_CONVERT = 0x0004, + OPA_PI_MASK_PORT_MODE_SC2SC_MAPPING = 0x0008, + OPA_PI_MASK_PORT_MODE_VL_MARKER = 0x0010, + OPA_PI_MASK_PORT_PASS_THROUGH = 0x0020, + OPA_PI_MASK_PORT_ACTIVE_OPTOMIZE = 0x0040, + /* flit_control.interleave */ + OPA_PI_MASK_INTERLEAVE_DIST_SUP = (0x0003 << 12), + OPA_PI_MASK_INTERLEAVE_DIST_ENABLE = (0x0003 << 10), + OPA_PI_MASK_INTERLEAVE_MAX_NEST_TX = (0x001F << 5), + OPA_PI_MASK_INTERLEAVE_MAX_NEST_RX = (0x001F << 0), + + /* port_error_action */ + OPA_PI_MASK_EX_BUFFER_OVERRUN = 0x80000000, + /* 7 bits reserved */ + OPA_PI_MASK_FM_CFG_ERR_EXCEED_MULTICAST_LIMIT = 0x00800000, + OPA_PI_MASK_FM_CFG_BAD_CONTROL_FLIT = 0x00400000, + OPA_PI_MASK_FM_CFG_BAD_PREEMPT = 0x00200000, + OPA_PI_MASK_FM_CFG_UNSUPPORTED_VL_MARKER = 0x00100000, + OPA_PI_MASK_FM_CFG_BAD_CRDT_ACK = 0x00080000, + OPA_PI_MASK_FM_CFG_BAD_CTRL_DIST = 0x00040000, + OPA_PI_MASK_FM_CFG_BAD_TAIL_DIST = 0x00020000, + OPA_PI_MASK_FM_CFG_BAD_HEAD_DIST = 0x00010000, + /* 2 bits reserved */ + OPA_PI_MASK_PORT_RCV_BAD_VL_MARKER = 0x00002000, + OPA_PI_MASK_PORT_RCV_PREEMPT_VL15 = 0x00001000, + OPA_PI_MASK_PORT_RCV_PREEMPT_ERROR = 0x00000800, + /* 1 bit reserved */ + OPA_PI_MASK_PORT_RCV_BAD_MidTail = 0x00000200, + /* 1 bit reserved */ + OPA_PI_MASK_PORT_RCV_BAD_SC = 0x00000080, + OPA_PI_MASK_PORT_RCV_BAD_L2 = 0x00000040, + OPA_PI_MASK_PORT_RCV_BAD_DLID = 0x00000020, + OPA_PI_MASK_PORT_RCV_BAD_SLID = 0x00000010, + OPA_PI_MASK_PORT_RCV_PKTLEN_TOOSHORT = 0x00000008, + OPA_PI_MASK_PORT_RCV_PKTLEN_TOOLONG = 0x00000004, + OPA_PI_MASK_PORT_RCV_BAD_PKTLEN = 0x00000002, + OPA_PI_MASK_PORT_RCV_BAD_LT = 0x00000001, + + /* pass_through.res_drctl */ + OPA_PI_MASK_PASS_THROUGH_DR_CONTROL = 0x01, + + /* buffer_units */ + OPA_PI_MASK_BUF_UNIT_VL15_INIT = (0x00000FFF << 11), + OPA_PI_MASK_BUF_UNIT_VL15_CREDIT_RATE = (0x0000001F << 6), + OPA_PI_MASK_BUF_UNIT_CREDIT_ACK = (0x00000003 << 3), + OPA_PI_MASK_BUF_UNIT_BUF_ALLOC = (0x00000003 << 0), + + /* neigh_mtu.pvlx_to_mtu */ + OPA_PI_MASK_NEIGH_MTU_PVL0 = 0xF0, + OPA_PI_MASK_NEIGH_MTU_PVL1 = 0x0F, + + /* neigh_mtu.vlstall_hoq_life */ + OPA_PI_MASK_VL_STALL = (0x03 << 5), + OPA_PI_MASK_HOQ_LIFE = (0x1F << 0), + + /* port_neigh_mode */ + OPA_PI_MASK_NEIGH_MGMT_ALLOWED = (0x01 << 3), + OPA_PI_MASK_NEIGH_FW_AUTH_BYPASS = (0x01 << 2), + OPA_PI_MASK_NEIGH_NODE_TYPE = (0x03 << 0), + + /* resptime_value */ + OPA_PI_MASK_RESPONSE_TIME_VALUE = 0x1F, + + /* mtucap */ + OPA_PI_MASK_MTU_CAP = 0x0F, +}; + +#if USE_PI_LED_ENABLE +struct opa_port_states { + u8 reserved; + u8 ledenable_offlinereason; /* 1 res, 1 bit, 6 bits */ + u8 reserved2; + u8 portphysstate_portstate; /* 4 bits, 4 bits */ +}; +#define PI_LED_ENABLE_SUP 1 +#else +struct opa_port_states { + u8 reserved; + u8 offline_reason; /* 2 res, 6 bits */ + u8 reserved2; + u8 portphysstate_portstate; /* 4 bits, 4 bits */ +}; +#define PI_LED_ENABLE_SUP 0 +#endif + +struct opa_port_state_info { + struct opa_port_states port_states; + u16 link_width_downgrade_tx_active; + u16 link_width_downgrade_rx_active; +}; + +struct opa_port_info { + __be32 lid; + __be32 flow_control_mask; + + struct { + u8 res; /* was inittype */ + u8 cap; /* 3 res, 5 bits */ + __be16 high_limit; + __be16 preempt_limit; + u8 arb_high_cap; + u8 arb_low_cap; + } vl; + + struct opa_port_states port_states; + u8 port_phys_conf; /* 4 res, 4 bits */ + u8 collectivemask_multicastmask; /* 2 res, 3, 3 */ + u8 mkeyprotect_lmc; /* 2 bits, 2 res, 4 bits */ + u8 smsl; /* 3 res, 5 bits */ + + u8 partenforce_filterraw; /* bit fields */ + u8 operational_vls; /* 3 res, 5 bits */ + __be16 pkey_8b; + __be16 pkey_10b; + __be16 mkey_violations; + + __be16 pkey_violations; + __be16 qkey_violations; + __be32 sm_trap_qp; /* 8 bits, 24 bits */ + + __be32 sa_qp; /* 8 bits, 24 bits */ + u8 neigh_port_num; + u8 link_down_reason; + u8 neigh_link_down_reason; + u8 clientrereg_subnettimeout; /* 1 bit, 2 bits, 5 */ + + struct { + __be16 supported; + __be16 enabled; + __be16 active; + } link_speed; + struct { + __be16 supported; + __be16 enabled; + __be16 active; + } link_width; + struct { + __be16 supported; + __be16 enabled; + __be16 tx_active; + __be16 rx_active; + } link_width_downgrade; + __be16 port_link_mode; /* 1 res, 5 bits, 5 bits, 5 bits */ + __be16 port_ltp_crc_mode; /* 4 res, 4 bits, 4 bits, 4 bits */ + + __be16 port_mode; /* 9 res, bit fields */ + struct { + __be16 supported; + __be16 enabled; + } port_packet_format; + struct { + __be16 interleave; /* 2 res, 2,2,5,5 */ + struct { + __be16 min_initial; + __be16 min_tail; + u8 large_pkt_limit; + u8 small_pkt_limit; + u8 max_small_pkt_limit; + u8 preemption_limit; + } preemption; + } flit_control; + + __be32 reserved4; + __be32 port_error_action; /* bit field */ + + struct { + u8 egress_port; + u8 res_drctl; /* 7 res, 1 */ + } pass_through; + __be16 mkey_lease_period; + __be32 buffer_units; /* 9 res, 12, 5, 3, 3 */ + + __be32 reserved5; + __be32 sm_lid; + + __be64 mkey; + + __be64 subnet_prefix; + + struct { + u8 pvlx_to_mtu[OPA_MAX_VLS/2]; /* 4 bits, 4 bits */ + } neigh_mtu; + + struct { + u8 vlstall_hoqlife; /* 3 bits, 5 bits */ + } xmit_q[OPA_MAX_VLS]; + + struct { + u8 addr[16]; + } ipaddr_ipv6; + + struct { + u8 addr[4]; + } ipaddr_ipv4; + + u32 reserved6; + u32 reserved7; + u32 reserved8; + + __be64 neigh_node_guid; + + __be32 ib_cap_mask; + __be16 reserved9; /* was ib_cap_mask2 */ + __be16 opa_cap_mask; + + __be32 reserved10; /* was link_roundtrip_latency */ + __be16 overall_buffer_space; + __be16 reserved11; /* was max_credit_hint */ + + __be16 diag_code; + struct { + u8 buffer; + u8 wire; + } replay_depth; + u8 port_neigh_mode; + u8 mtucap; /* 4 res, 4 bits */ + + u8 resptimevalue; /* 3 res, 5 bits */ + u8 local_port_num; + u8 reserved12; + u8 reserved13; /* was guid_cap */ +} __attribute__ ((packed)); + +#endif /* OPA_PORT_INFO_H */ diff --git a/include/rdma/opa_smi.h b/include/rdma/opa_smi.h index 29063e84c253..4a529ef47995 100644 --- a/include/rdma/opa_smi.h +++ b/include/rdma/opa_smi.h @@ -40,6 +40,10 @@ #define OPA_SMP_DR_DATA_SIZE 1872 #define OPA_SMP_MAX_PATH_HOPS 64 +#define OPA_MAX_VLS 32 +#define OPA_MAX_SLS 32 +#define OPA_MAX_SCS 32 + #define OPA_SMI_CLASS_VERSION 0x80 #define OPA_LID_PERMISSIVE cpu_to_be32(0xFFFFFFFF) @@ -73,6 +77,49 @@ struct opa_smp { } __packed; +/* Subnet management attributes */ +/* ... */ +#define OPA_ATTRIB_ID_NODE_DESCRIPTION cpu_to_be16(0x0010) +#define OPA_ATTRIB_ID_NODE_INFO cpu_to_be16(0x0011) +#define OPA_ATTRIB_ID_PORT_INFO cpu_to_be16(0x0015) +#define OPA_ATTRIB_ID_PARTITION_TABLE cpu_to_be16(0x0016) +#define OPA_ATTRIB_ID_SL_TO_SC_MAP cpu_to_be16(0x0017) +#define OPA_ATTRIB_ID_VL_ARBITRATION cpu_to_be16(0x0018) +#define OPA_ATTRIB_ID_SM_INFO cpu_to_be16(0x0020) +#define OPA_ATTRIB_ID_CABLE_INFO cpu_to_be16(0x0032) +#define OPA_ATTRIB_ID_AGGREGATE cpu_to_be16(0x0080) +#define OPA_ATTRIB_ID_SC_TO_SL_MAP cpu_to_be16(0x0082) +#define OPA_ATTRIB_ID_SC_TO_VLR_MAP cpu_to_be16(0x0083) +#define OPA_ATTRIB_ID_SC_TO_VLT_MAP cpu_to_be16(0x0084) +#define OPA_ATTRIB_ID_SC_TO_VLNT_MAP cpu_to_be16(0x0085) +/* ... */ +#define OPA_ATTRIB_ID_PORT_STATE_INFO cpu_to_be16(0x0087) +/* ... */ +#define OPA_ATTRIB_ID_BUFFER_CONTROL_TABLE cpu_to_be16(0x008A) +/* ... */ + +struct opa_node_description { + u8 data[64]; +} __attribute__ ((packed)); + +struct opa_node_info { + u8 base_version; + u8 class_version; + u8 node_type; + u8 num_ports; + __be32 reserved; + __be64 system_image_guid; + __be64 node_guid; + __be64 port_guid; + __be16 partition_cap; + __be16 device_id; + __be32 revision; + u8 local_port_num; + u8 vendor_id[3]; /* network byte order */ +} __attribute__ ((packed)); + +#define OPA_PARTITION_TABLE_BLK_SIZE 32 + static inline u8 opa_get_smp_direction(struct opa_smp *smp) { diff --git a/include/rdma/rdma_netlink.h b/include/rdma/rdma_netlink.h index 0790882e0c9b..585266144329 100644 --- a/include/rdma/rdma_netlink.h +++ b/include/rdma/rdma_netlink.h @@ -77,4 +77,11 @@ int ibnl_unicast(struct sk_buff *skb, struct nlmsghdr *nlh, int ibnl_multicast(struct sk_buff *skb, struct nlmsghdr *nlh, unsigned int group, gfp_t flags); +/** + * Check if there are any listeners to the netlink group + * @group: the netlink group ID + * Returns 0 on success or a negative for no listeners. + */ +int ibnl_chk_listeners(unsigned int group); + #endif /* _RDMA_NETLINK_H */ |