Home | History | Annotate | Download | only in rpc
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License, Version 1.0 only
      6  * (the "License").  You may not use this file except in compliance
      7  * with the License.
      8  *
      9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
     10  * or http://www.opensolaris.org/os/licensing.
     11  * See the License for the specific language governing permissions
     12  * and limitations under the License.
     13  *
     14  * When distributing Covered Code, include this CDDL HEADER in each
     15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     16  * If applicable, add the following below this CDDL HEADER, with the
     17  * fields enclosed by brackets "[]" replaced with your own identifying
     18  * information: Portions Copyright [yyyy] [name of copyright owner]
     19  *
     20  * CDDL HEADER END
     21  */
     22 /*
     23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
     24  * Use is subject to license terms.
     25  */
     26 
     27 #ifndef _IB_H
     28 #define	_IB_H
     29 
     30 #pragma ident	"@(#)ib.h	1.8	05/06/08 SMI"
     31 
     32 /*
     33  * ib.h, rpcib plugin interface.
     34  */
     35 
     36 #include <sys/types.h>
     37 #include <sys/ddi.h>
     38 #include <sys/sunddi.h>
     39 #include <sys/conf.h>
     40 #include <sys/stat.h>
     41 #include <rpc/rpc.h>
     42 #include <rpc/rpc_rdma.h>
     43 #include <sys/ib/ibtl/ibti.h>
     44 
     45 #ifdef __cplusplus
     46 extern "C" {
     47 #endif
     48 
     49 #define	MAX_BUFS	256	/* max no. of buffers per pool */
     50 #define	DEF_CQ_SIZE	4096 - 1	/* default CQ size */
     51 				/*
     52 				 * Tavor returns the next higher power of 2
     53 				 * CQ entries than the requested size.
     54 				 * For instance, if you request (2^12 - 1)
     55 				 * CQ entries, Tavor returns 2^12 entries.
     56 				 * 4K CQ entries suffice.  Hence, 4096 - 1.
     57 				 */
     58 #define	DEF_SQ_SIZE	128	/* default SendQ size */
     59 #define	DEF_RQ_SIZE	256	/* default RecvQ size */
     60 #define	DSEG_MAX	2
     61 #define	RQ_DSEG_MAX	1	/* default RQ data seg */
     62 #define	IBSRM_HB	0x8000	/* high order bit of pkey */
     63 #define	NFS_SEC_KEY0	0x6878	/* randomly selected NFS security key */
     64 #define	NFS_SEC_KEY1	0x8679
     65 
     66 /* max no. of refresh attempts on IBT_CM_CONN_STALE error */
     67 #define	REFRESH_ATTEMPTS	3
     68 
     69 typedef struct rib_hca_s rib_hca_t;
     70 typedef struct rib_qp_s rib_qp_t;
     71 typedef struct rib_cq_s rib_cq_t;
     72 
     73 /*
     74  * Notification for RDMA_DONE is based on xid
     75  */
     76 struct rdma_done_list {
     77 	uint32_t	xid;		/* XID waiting for RDMA_DONE */
     78 	kcondvar_t	rdma_done_cv;	/* cv for RDMA_DONE */
     79 	struct rdma_done_list	*next;
     80 	struct rdma_done_list	*prev;
     81 };
     82 
     83 /*
     84  * State of the plugin.
     85  * ACCEPT = accepting new connections and requests
     86  * NO_ACCEPT = not accepting new connection and requests
     87  */
     88 #define	ACCEPT		1
     89 #define	NO_ACCEPT	2
     90 
     91 /*
     92  * Send Wait states
     93  */
     94 #define	SEND_WAIT	-1
     95 
     96 /*
     97  * Reply states
     98  */
     99 #define	REPLY_WAIT	-1
    100 
    101 typedef void * rib_pvoid;
    102 typedef rib_pvoid RIB_SYNCMEM_HANDLE;
    103 
    104 /*
    105  * IB buffer pool management structure
    106  */
    107 
    108 /*
    109  * Buffer pool info
    110  */
    111 typedef struct {
    112 	kmutex_t	buflock;	/* lock for this structure */
    113 	caddr_t		buf;		/* pool address */
    114 	uint32_t	bufhandle;	/* rkey for this pool */
    115 	ulong_t		bufsize;	/* size of pool */
    116 	int		rsize;		/* size of each element */
    117 	int		numelems;	/* no. of elements allocated */
    118 	int		buffree;	/* no. of free elements */
    119 	void		*buflist[1];	/* free elements in pool */
    120 } bufpool_t;
    121 
    122 typedef struct {
    123 	bufpool_t	*bpool;
    124 	ibt_mr_hdl_t	*mr_hdl;
    125 	ibt_mr_desc_t	*mr_desc;	/* vaddr, lkey, rkey */
    126 } rib_bufpool_t;
    127 
    128 /*
    129  * ATS relsted defines and structures.
    130  */
    131 #define	ATS_AR_DATA_LEN	16
    132 #define	IBD_NAME	"ibd"
    133 #define	N_IBD_INSTANCES	4
    134 
    135 typedef struct rpcib_ats_s {
    136 	int			ras_inst;
    137 	ib_pkey_t		ras_pkey;
    138 	ib_gid_t		ras_port_gid;
    139 	sa_family_t		ras_inet_type;
    140 	union {
    141 		struct sockaddr_in	ras_sockaddr;
    142 		struct sockaddr_in6	ras_sockaddr6;
    143 	} ra_sin;
    144 #define	ras_sin			ra_sin.ras_sockaddr
    145 #define	ras_sin6		ra_sin.ras_sockaddr6
    146 } rpcib_ats_t;
    147 
    148 typedef struct rpcib_ibd_insts_s {
    149 	int			rib_ibd_alloc;
    150 	int			rib_ibd_cnt;
    151 	rpcib_ats_t		*rib_ats;
    152 } rpcib_ibd_insts_t;
    153 
    154 /*
    155  * Service types supported by RPCIB
    156  * For now only NFS is supported.
    157  */
    158 #define	NFS		1
    159 #define	NLM		2
    160 
    161 /*
    162  * Tracks consumer state (client or server).
    163  */
    164 typedef enum {
    165 	RIB_SERVER,
    166 	RIB_CLIENT
    167 } rib_mode_t;
    168 
    169 /*
    170  * CQ structure
    171  */
    172 struct rib_cq_s {
    173 	rib_hca_t		*rib_hca;
    174 	ibt_cq_hdl_t		rib_cq_hdl;
    175 };
    176 
    177 /*
    178  * RPCIB plugin state
    179  */
    180 typedef struct rpcib_state {
    181 	ibt_clnt_hdl_t		ibt_clnt_hdl;
    182 	uint32_t		hca_count;
    183 	uint32_t		nhca_inited;
    184 	ib_guid_t		*hca_guids;
    185 	rib_hca_t		*hcas;
    186 	int			refcount;
    187 	kmutex_t		open_hca_lock;
    188 	rib_hca_t		*hca;		/* the hca being used */
    189 	queue_t			*q;		/* up queue for a serv_type */
    190 	uint32_t		service_type;	/* NFS, NLM, etc */
    191 	void			*private;
    192 } rpcib_state_t;
    193 
    194 /*
    195  * Each registered service's data structure.
    196  * Each HCA has a list of these structures, which are the registered
    197  * services on this HCA.
    198  */
    199 typedef struct rib_service rib_service_t;
    200 struct rib_service {
    201 	uint32_t		srv_type;	/* i.e, NFS, NLM, v4CBD */
    202 
    203 	/*
    204 	 * service name, i.e, <IP>::NFS or <IP>::NLM. Since
    205 	 * each type of service can be registered with many
    206 	 * IP addrs(srv_name) and is running on all ports
    207 	 * for all HCAs.
    208 	 */
    209 	char			*srv_name;
    210 
    211 	uint32_t		srv_port;	/* port on which registered */
    212 	ib_svc_id_t		srv_id;		/* from ibt_register call */
    213 	ibt_srv_hdl_t		srv_hdl;	/* from ibt_register call */
    214 	ibt_sbind_hdl_t		*srv_sbind_hdl;	/* from ibt_bind call */
    215 	ibt_ar_t		srv_ar;
    216 
    217 	/*
    218 	 * pointer to the next service registered on this
    219 	 * particular HCA
    220 	 */
    221 	rib_service_t		*srv_next;
    222 };
    223 
    224 /*
    225  * Connection lists
    226  */
    227 typedef struct {
    228 	krwlock_t	conn_lock;	/* list lock */
    229 	CONN		*conn_hd;	/* list head */
    230 } rib_conn_list_t;
    231 
    232 enum hca_state {
    233 	HCA_INITED,		/* hca in up and running state */
    234 	HCA_DETACHED		/* hca in detached state */
    235 };
    236 
    237 /*
    238  * RPCIB per HCA structure
    239  */
    240 struct rib_hca_s {
    241 	ibt_clnt_hdl_t		ibt_clnt_hdl;
    242 
    243 	/*
    244 	 * per HCA.
    245 	 */
    246 	ibt_hca_hdl_t		hca_hdl;	/* HCA handle */
    247 	ibt_hca_attr_t		hca_attrs;	/* HCA attributes */
    248 	ibt_pd_hdl_t		pd_hdl;
    249 	ib_guid_t		hca_guid;
    250 	uint32_t		hca_nports;
    251 	ibt_hca_portinfo_t	*hca_ports;
    252 	size_t			hca_pinfosz;
    253 	enum hca_state		state;		/* state of HCA */
    254 	krwlock_t		state_lock;	/* protects state field */
    255 	bool_t			inuse;		/* indicates HCA usage */
    256 	kmutex_t		inuse_lock;	/* protects inuse field */
    257 	/*
    258 	 * List of services registered on all ports available
    259 	 * on this HCA. Only one consumer of KRPC can register
    260 	 * its services at one time or tear them down at one
    261 	 * time.
    262 	 */
    263 	rib_service_t	*service_list;
    264 	krwlock_t		service_list_lock;
    265 
    266 	rib_service_t	*ats_list;		/* Service list for ATS */
    267 
    268 	rib_conn_list_t		cl_conn_list;	/* client conn list */
    269 	rib_conn_list_t		srv_conn_list;	/* server conn list */
    270 
    271 	rib_cq_t		*clnt_scq;
    272 	rib_cq_t		*clnt_rcq;
    273 	rib_cq_t		*svc_scq;
    274 	rib_cq_t		*svc_rcq;
    275 	kmutex_t		cb_lock;
    276 	kcondvar_t		cb_cv;
    277 
    278 	rib_bufpool_t		*recv_pool;	/* recv buf pool */
    279 	rib_bufpool_t		*send_pool;	/* send buf pool */
    280 
    281 	void			*iblock;	/* interrupt cookie */
    282 };
    283 
    284 
    285 /*
    286  * Structure on wait state of a post send
    287  */
    288 struct send_wid {
    289 	uint32_t 	xid;
    290 	int		cv_sig;
    291 	kmutex_t	sendwait_lock;
    292 	kcondvar_t	wait_cv;
    293 	uint_t		status;
    294 	rib_qp_t	*qp;
    295 	int		nsbufs;			/* # of send buffers posted */
    296 	uint64_t	sbufaddr[DSEG_MAX];	/* posted send buffers */
    297 };
    298 
    299 /*
    300  * Structure on reply descriptor for recv queue.
    301  * Different from the above posting of a descriptor.
    302  */
    303 struct reply {
    304 	uint32_t 	xid;
    305 	uint_t		status;
    306 	uint64_t	vaddr_cq;	/* buf addr from CQ */
    307 	uint_t		bytes_xfer;
    308 	kcondvar_t	wait_cv;
    309 	struct reply	*next;
    310 	struct reply 	*prev;
    311 };
    312 
    313 struct svc_recv {
    314 	rib_qp_t	*qp;
    315 	uint64_t	vaddr;
    316 	uint_t		bytes_xfer;
    317 };
    318 
    319 struct recv_wid {
    320 	uint32_t 	xid;
    321 	rib_qp_t	*qp;
    322 	uint64_t	addr;	/* posted buf addr */
    323 };
    324 
    325 /*
    326  * Per QP data structure
    327  */
    328 struct rib_qp_s {
    329 	rib_hca_t		*hca;
    330 	rib_mode_t		mode;	/* RIB_SERVER or RIB_CLIENT */
    331 	CONN			rdmaconn;
    332 	ibt_channel_hdl_t	qp_hdl;
    333 	uint_t			port_num;
    334 	ib_qpn_t		qpn;
    335 	int			chan_flags;
    336 	clock_t			timeout;
    337 	ibt_rc_chan_query_attr_t	qp_q_attrs;
    338 	rib_cq_t		*send_cq;	/* send CQ */
    339 	rib_cq_t		*recv_cq;	/* recv CQ */
    340 
    341 	/*
    342 	 * Number of pre-posted rbufs
    343 	 */
    344 	uint_t			n_posted_rbufs;
    345 	kcondvar_t 		posted_rbufs_cv;
    346 	kmutex_t		posted_rbufs_lock;
    347 
    348 	/*
    349 	 * RPC reply
    350 	 */
    351 	uint_t			rep_list_size;
    352 	struct reply		*replylist;
    353 	kmutex_t		replylist_lock;
    354 
    355 	/*
    356 	 * server only, RDMA_DONE
    357 	 */
    358 	struct rdma_done_list	*rdlist;
    359 	kmutex_t		rdlist_lock;
    360 
    361 	kmutex_t		cb_lock;
    362 	kcondvar_t 		cb_conn_cv;
    363 
    364 	caddr_t			q;	/* upstream queue */
    365 };
    366 
    367 #define	ctoqp(conn)	((rib_qp_t *)((conn)->c_private))
    368 #define	qptoc(rqp)	((CONN *)&((rqp)->rdmaconn))
    369 
    370 /*
    371  * Timeout for various calls
    372  */
    373 #define	CONN_WAIT_TIME	40
    374 #define	SEND_WAIT_TIME	40	/* time for send completion */
    375 
    376 #define	REPLY_WAIT_TIME	40	/* time to get reply from remote QP */
    377 
    378 #ifdef __cplusplus
    379 }
    380 #endif
    381 
    382 #endif	/* !_IB_H */
    383