Home | History | Annotate | Download | only in rpc
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License, Version 1.0 only
      6  * (the "License").  You may not use this file except in compliance
      7  * with the License.
      8  *
      9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
     10  * or http://www.opensolaris.org/os/licensing.
     11  * See the License for the specific language governing permissions
     12  * and limitations under the License.
     13  *
     14  * When distributing Covered Code, include this CDDL HEADER in each
     15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     16  * If applicable, add the following below this CDDL HEADER, with the
     17  * fields enclosed by brackets "[]" replaced with your own identifying
     18  * information: Portions Copyright [yyyy] [name of copyright owner]
     19  *
     20  * CDDL HEADER END
     21  */
     22 /*
     23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
     24  * Use is subject to license terms.
     25  */
     26 
     27 #ifndef	_RPC_RPC_RDMA_H
     28 #define	_RPC_RPC_RDMA_H
     29 
     30 #pragma ident	"@(#)rpc_rdma.h	1.9	05/06/08 SMI"
     31 
     32 #include <rpc/rpc.h>
     33 #include <rpc/rpc_sztypes.h>
     34 #include <sys/sunddi.h>
     35 #include <sys/sunldi.h>
     36 
     37 #ifdef __cplusplus
     38 extern "C" {
     39 #endif
     40 
     41 #define	RPCRDMA_VERS	0	/* Version of the RPC over RDMA protocol */
     42 #define	RDMATF_VERS	1	/* Version of the API used by RPC for RDMA */
     43 #define	RDMATF_VERS_1	1	/* Current version of RDMATF */
     44 
     45 /*
     46  * The size of an RPC call or reply message
     47  */
     48 #define	RPC_MSG_SZ  1024
     49 
     50 /*
     51  * Storage for a chunk list
     52  */
     53 #define	RPC_CL_SZ  1024
     54 
     55 /*
     56  * Chunk size
     57  */
     58 #define	MINCHUNK  1024
     59 
     60 /*
     61  * Size of receive buffer
     62  */
     63 #define	RPC_BUF_SIZE	2048
     64 
     65 #define	NOWAIT	0	/* don't wait for operation of complete */
     66 #define	WAIT	1	/* wait and ensure that operation is complete */
     67 
     68 /*
     69  * RDMA xdr buffer control and other control flags. Add new flags here,
     70  * set them in private structure for xdr over RDMA in xdr_rdma.c
     71  */
     72 #define	RDMA_NOCHUNK		0x1
     73 
     74 /*
     75  * Return codes from RDMA operations
     76  */
     77 typedef enum {
     78 
     79 	RDMA_SUCCESS = 0,	/* successful operation */
     80 
     81 	RDMA_INVAL = 1,		/* invalid parameter */
     82 	RDMA_TIMEDOUT = 2,	/* operation timed out */
     83 	RDMA_INTR = 3,		/* operation interrupted */
     84 	RDMA_NORESOURCE = 4,	/* insufficient resource */
     85 	/*
     86 	 * connection errors
     87 	 */
     88 	RDMA_REJECT = 5,	/* connection req rejected */
     89 	RDMA_NOLISTENER = 6,	/* no listener on server */
     90 	RDMA_UNREACHABLE = 7,	/* host unreachable */
     91 	RDMA_CONNLOST = 8,	/* connection lost */
     92 
     93 	RDMA_XPRTFAILED = 9,	/* RDMA transport failed */
     94 	RDMA_PROTECTERR = 10,	/* memory protection error */
     95 	RDMA_OVERRUN = 11,	/* transport overrun */
     96 	RDMA_RECVQEMPTY = 12,	/* incoming pkt dropped, recv q empty */
     97 	RDMA_PROTFAILED = 13,	/* RDMA protocol failed */
     98 	RDMA_NOTSUPP = 14,	/* requested feature not supported */
     99 	RDMA_REMOTERR = 15,	/* error at remote end */
    100 	/*
    101 	 * RDMATF errors
    102 	 */
    103 	RDMA_BADVERS = 16,	/* mismatch RDMATF versions */
    104 	RDMA_REG_EXIST = 17,	/* RDMATF registration already exists */
    105 
    106 	/*
    107 	 * fallback error
    108 	 */
    109 	RDMA_FAILED = 18	/* generic error */
    110 } rdma_stat;
    111 
    112 /*
    113  * Memory region context. This is an RDMA provider generated
    114  * handle for a registered arbitrary size contiguous virtual
    115  * memory. The RDMA Interface Adapter needs this for local or
    116  * remote memory access.
    117  *
    118  * The mrc_rmr field holds the remote memory region context
    119  * which is sent over-the-wire to provide the remote host
    120  * with RDMA access to the memory region.
    121  */
    122 struct mrc {
    123 	uint32_t	mrc_rmr;	/* Remote MR context, sent OTW */
    124 	union {
    125 		struct mr {
    126 			uint32_t	lmr; 	/* Local MR context */
    127 			uint64_t	linfo;	/* Local memory info */
    128 		} mr;
    129 	} lhdl;
    130 };
    131 
    132 #define	mrc_lmr		lhdl.mr.lmr
    133 #define	mrc_linfo	lhdl.mr.linfo
    134 
    135 /*
    136  * The XDR offset value is used by the XDR
    137  * routine to identify the position in the
    138  * RPC message where the opaque object would
    139  * normally occur. Neither the data content
    140  * of the chunk, nor its size field are included
    141  * in the RPC message.  The XDR offset is calculated
    142  * as if the chunks were present.
    143  *
    144  * The remaining fields identify the chunk of data
    145  * on the sender.  The c_memhandle identifies a
    146  * registered RDMA memory region and the c_addr
    147  * and c_len fields identify the chunk within it.
    148  */
    149 struct clist {
    150 	uint32		c_xdroff;	/* XDR offset */
    151 	uint32		c_len;		/* Length */
    152 	struct mrc	c_smemhandle;	/* src memory handle */
    153 	uint64 		c_ssynchandle;	/* src sync handle */
    154 	uint64		c_saddr;	/* src address */
    155 	struct mrc	c_dmemhandle;	/* dst memory handle */
    156 	uint64		c_dsynchandle;	/* dst sync handle */
    157 	uint64		c_daddr;	/* dst address */
    158 	struct clist	*c_next;	/* Next chunk */
    159 };
    160 
    161 typedef struct clist clist;
    162 
    163 enum rdma_proc {
    164 	RDMA_MSG	= 0,	/* chunk list and RPC msg follow */
    165 	RDMA_NOMSG	= 1,	/* only chunk list follows */
    166 	RDMA_MSGP	= 2,	/* chunk list and RPC msg with padding follow */
    167 	RDMA_DONE	= 3	/* signal completion of chunk transfer */
    168 };
    169 
    170 /*
    171  * Listener information for a service
    172  */
    173 struct rdma_svc_data {
    174 	queue_t		q;	/* queue_t to place incoming pkts */
    175 	int		active;	/* If active, after registeration startup */
    176 	rdma_stat	err_code;	/* Error code from plugin layer */
    177 	int32_t		svcid;		/* RDMA based service identifier */
    178 };
    179 
    180 /*
    181  * Per RDMA plugin module information.
    182  * Will be populated by each plugin
    183  * module during its initialization.
    184  */
    185 typedef struct rdma_mod {
    186 	char 		*rdma_api;		/* "kvipl", "ibtf", etc */
    187 	uint_t 		rdma_version;		/* RDMATF API version */
    188 	int		rdma_count;		/* # of devices */
    189 	struct rdmaops 	*rdma_ops;		/* rdma op vector for api */
    190 } rdma_mod_t;
    191 
    192 /*
    193  * Registry of RDMA plugins
    194  */
    195 typedef struct rdma_registry {
    196 	rdma_mod_t	*r_mod;		/* plugin mod info */
    197 	struct rdma_registry *r_next;	/* next registered RDMA plugin */
    198 } rdma_registry_t;
    199 
    200 /*
    201  * RDMA transport information
    202  */
    203 typedef struct rdma_info {
    204 	uint_t	addrlen;	/* address length */
    205 	uint_t  mts;		/* max transfer size */
    206 	uint_t  mtu;		/* native mtu size of unlerlying network */
    207 } rdma_info_t;
    208 
    209 /*
    210  * RDMA Connection information
    211  */
    212 typedef struct conn {
    213 	rdma_mod_t	*c_rdmamod;	/* RDMA transport info for conn */
    214 	struct netbuf	c_raddr;	/* remote address */
    215 	struct netbuf	c_laddr;	/* local address */
    216 	int		c_ref;		/* no. of clients of connection */
    217 	struct conn	*c_next;	/* next in list of connections */
    218 	struct conn	*c_prev;	/* prev in list of connections */
    219 	caddr_t		c_private;	/* transport specific stuff */
    220 
    221 #define	C_IDLE		0x80000000
    222 #define	C_CONN_PEND	0x40000000
    223 #define	C_CONNECTED	0x20000000
    224 #define	C_ERROR		0x10000000
    225 #define	C_DISCONN_PEND	0x08000000
    226 #define	C_REMOTE_DOWN	0x04000000
    227 
    228 	uint_t		c_state;	/* state of connection */
    229 	kmutex_t	c_lock;		/* protect c_state and c_ref fields */
    230 	kcondvar_t	c_cv;		/* to signal when pending is done */
    231 } CONN;
    232 
    233 
    234 /*
    235  * Memory management for the RDMA buffers
    236  */
    237 /*
    238  * RDMA buffer types
    239  */
    240 typedef enum {
    241 	SEND_BUFFER,	/* buf for send msg */
    242 	SEND_DESCRIPTOR, /* buf used for send msg descriptor in plugins only */
    243 	RECV_BUFFER,	/* buf for recv msg */
    244 	RECV_DESCRIPTOR, /* buf used for recv msg descriptor in plugins only */
    245 	CHUNK_BUFFER	/* chunk buf used in RDMATF only and not in plugins */
    246 } rdma_btype;
    247 
    248 /*
    249  * RDMA buffer information
    250  */
    251 typedef struct rdma_buf {
    252 	rdma_btype	type;	/* buffer type */
    253 	int		len;	/* length of buffer */
    254 	caddr_t		addr;	/* buffer address */
    255 	struct mrc	handle;	/* buffer registration handle */
    256 } rdma_buf_t;
    257 
    258 /*
    259  * Data transferred from plugin interrupt to svc_queuereq()
    260  */
    261 struct recv_data {
    262 	CONN		*conn;
    263 	int		status;
    264 	rdma_buf_t	rpcmsg;
    265 };
    266 
    267 /*
    268  * Operations vector for RDMA transports.
    269  */
    270 typedef struct rdmaops {
    271 	/* Network */
    272 	rdma_stat	(*rdma_reachable)(int addr_type, struct netbuf *,
    273 						void **handle);
    274 	/* Connection */
    275 	rdma_stat	(*rdma_get_conn)(struct netbuf *, int addr_type,
    276 						void *, CONN **);
    277 	rdma_stat	(*rdma_rel_conn)(CONN *);
    278 	/* Server side listner start and stop routines */
    279 	void		(*rdma_svc_listen)(struct rdma_svc_data *);
    280 	void		(*rdma_svc_stop)(struct rdma_svc_data *);
    281 	/* Memory */
    282 	rdma_stat	(*rdma_regmem)(CONN *, caddr_t, uint_t, struct mrc *);
    283 	rdma_stat	(*rdma_deregmem)(CONN *, caddr_t, struct mrc);
    284 	rdma_stat	(*rdma_regmemsync)(CONN *, caddr_t, uint_t,
    285 				struct mrc *, void **);
    286 	rdma_stat	(*rdma_deregmemsync)(CONN *, caddr_t, struct mrc,
    287 				void *);
    288 	rdma_stat	(*rdma_syncmem)(CONN *, void *, caddr_t, int, int);
    289 	/* Buffer */
    290 	rdma_stat	(*rdma_buf_alloc)(CONN *, rdma_buf_t *);
    291 	void		(*rdma_buf_free)(CONN *, rdma_buf_t *);
    292 	/* Transfer */
    293 	rdma_stat	(*rdma_send)(CONN *, clist *, uint32_t);
    294 	rdma_stat	(*rdma_send_resp)(CONN *, clist *, uint32_t);
    295 	rdma_stat	(*rdma_clnt_recvbuf)(CONN *, clist *, uint32_t);
    296 	rdma_stat	(*rdma_svc_recvbuf)(CONN *, clist *);
    297 	rdma_stat	(*rdma_recv)(CONN *, clist **, uint32_t);
    298 	/* RDMA */
    299 	rdma_stat	(*rdma_read)(CONN *, clist *, int);
    300 	rdma_stat	(*rdma_write)(CONN *, clist *, int);
    301 	/* INFO */
    302 	rdma_stat	(*rdma_getinfo)(rdma_info_t *info);
    303 
    304 } rdmaops_t;
    305 
    306 /*
    307  * RDMA operations.
    308  */
    309 #define	RDMA_REACHABLE(rdma_ops, addr_type, addr, handle)	\
    310 	(*(rdma_ops)->rdma_reachable)(addr_type, addr, handle)
    311 
    312 #define	RDMA_GET_CONN(rdma_ops, addr, addr_type, handle, conn)	\
    313 	(*(rdma_ops)->rdma_get_conn)(addr, addr_type, handle, conn)
    314 
    315 #define	RDMA_REL_CONN(conn)	\
    316 	(*(conn)->c_rdmamod->rdma_ops->rdma_rel_conn)(conn)
    317 
    318 #define	RDMA_REGMEM(conn, buff, len, handle)	\
    319 	(*(conn)->c_rdmamod->rdma_ops->rdma_regmem)(conn, buff, len, handle)
    320 
    321 #define	RDMA_DEREGMEM(conn, buff, handle)	\
    322 	(*(conn)->c_rdmamod->rdma_ops->rdma_deregmem)(conn, buff, handle)
    323 
    324 #define	RDMA_REGMEMSYNC(conn, buff, len, handle, synchandle)	\
    325 	(*(conn)->c_rdmamod->rdma_ops->rdma_regmemsync)(conn, buff, \
    326 	    len, handle, synchandle)
    327 
    328 #define	RDMA_DEREGMEMSYNC(conn, buff, handle, synchandle)	\
    329 	(*(conn)->c_rdmamod->rdma_ops->rdma_deregmemsync)(conn, buff, \
    330 	    handle, synchandle)
    331 
    332 #define	RDMA_SYNCMEM(conn, handle, buff, len, direction)	\
    333 	(*(conn)->c_rdmamod->rdma_ops->rdma_syncmem)(conn, handle, \
    334 	    buff, len, direction)
    335 
    336 #define	RDMA_BUF_ALLOC(conn, rbuf)	\
    337 	(*(conn)->c_rdmamod->rdma_ops->rdma_buf_alloc)(conn, rbuf)
    338 
    339 #define	RDMA_BUF_FREE(conn, rbuf)	\
    340 	(*(conn)->c_rdmamod->rdma_ops->rdma_buf_free)(conn, rbuf)
    341 
    342 #define	RDMA_SEND(conn, sendlist, xid)	\
    343 	(*(conn)->c_rdmamod->rdma_ops->rdma_send)(conn, sendlist, xid)
    344 
    345 #define	RDMA_SEND_RESP(conn, sendlist, xid)	\
    346 	(*(conn)->c_rdmamod->rdma_ops->rdma_send_resp)(conn, sendlist, xid)
    347 
    348 #define	RDMA_CLNT_RECVBUF(conn, cl, xid)	\
    349 	(*(conn)->c_rdmamod->rdma_ops->rdma_clnt_recvbuf)(conn, cl, xid)
    350 
    351 #define	RDMA_SVC_RECVBUF(conn, cl)	\
    352 	(*(conn)->c_rdmamod->rdma_ops->rdma_svc_recvbuf)(conn, cl)
    353 
    354 #define	RDMA_RECV(conn, recvlist, xid)	\
    355 	(*(conn)->c_rdmamod->rdma_ops->rdma_recv)(conn, recvlist, xid)
    356 
    357 #define	RDMA_READ(conn, cl, wait)	\
    358 	(*(conn)->c_rdmamod->rdma_ops->rdma_read)(conn, cl, wait)
    359 
    360 #define	RDMA_WRITE(conn, cl, wait)	\
    361 	(*(conn)->c_rdmamod->rdma_ops->rdma_write)(conn, cl, wait)
    362 
    363 #define	RDMA_GETINFO(rdma_mod, info)	\
    364 	(*(rdma_mod)->rdma_ops->rdma_getinfo)(info)
    365 
    366 #ifdef _KERNEL
    367 extern rdma_registry_t	*rdma_mod_head;
    368 extern krwlock_t rdma_lock;		/* protects rdma_mod_head list */
    369 extern int rdma_modloaded;		/* flag for loading RDMA plugins */
    370 extern int rdma_dev_available;		/* rdma device is loaded or not */
    371 extern kmutex_t rdma_modload_lock;	/* protects rdma_modloaded flag */
    372 extern uint_t rdma_minchunk;
    373 extern ldi_ident_t rpcmod_li; 		/* needed by layed driver framework */
    374 
    375 /*
    376  * General RDMA routines
    377  */
    378 extern void clist_add(struct clist **clp, uint32_t xdroff, int len,
    379 	struct mrc *shandle, caddr_t saddr,
    380 	struct mrc *dhandle, caddr_t daddr);
    381 extern void clist_free(struct clist *cl);
    382 extern int clist_register(CONN *conn, struct clist *cl, bool_t src);
    383 extern int clist_deregister(CONN *conn, struct clist *cl, bool_t src);
    384 rdma_stat rdma_clnt_postrecv(CONN *conn, uint32_t xid);
    385 rdma_stat rdma_svc_postrecv(CONN *conn);
    386 extern rdma_stat clist_syncmem(CONN *conn, struct clist *cl, bool_t src);
    387 extern rdma_stat rdma_register_mod(rdma_mod_t *mod);
    388 extern rdma_stat rdma_unregister_mod(rdma_mod_t *mod);
    389 extern void rdma_buf_free(CONN *conn, rdma_buf_t *rbuf);
    390 extern int rdma_modload();
    391 
    392 /*
    393  * RDMA XDR
    394  */
    395 extern void xdrrdma_create(XDR *, caddr_t, uint_t, int, struct clist *,
    396 	enum xdr_op, CONN *);
    397 extern void xdrrdma_destroy(XDR *);
    398 extern struct clist *xdrrdma_clist(XDR *);
    399 extern uint_t xdrrdma_getpos(XDR *);
    400 extern bool_t xdrrdma_setpos(XDR *, uint_t);
    401 extern bool_t xdr_clist(XDR *, clist *);
    402 extern bool_t xdr_do_clist(XDR *, clist **);
    403 extern uint_t xdr_getbufsize(XDR *);
    404 unsigned int xdrrdma_sizeof(xdrproc_t func, void *data, int min_chunk);
    405 unsigned int xdrrdma_authsize(AUTH *auth, struct cred *cred, int min_chunk);
    406 #endif /* _KERNEL */
    407 
    408 #ifdef __cplusplus
    409 }
    410 #endif
    411 
    412 #endif	/* _RPC_RPC_RDMA_H */
    413