Home | History | Annotate | Download | only in ixgbe
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * Copyright(c) 2007-2009 Intel Corporation. All rights reserved.
      5  * The contents of this file are subject to the terms of the
      6  * Common Development and Distribution License (the "License").
      7  * You may not use this file except in compliance with the License.
      8  *
      9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
     10  * or http://www.opensolaris.org/os/licensing.
     11  * See the License for the specific language governing permissions
     12  * and limitations under the License.
     13  *
     14  * When distributing Covered Code, include this CDDL HEADER in each
     15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     16  * If applicable, add the following below this CDDL HEADER, with the
     17  * fields enclosed by brackets "[]" replaced with your own identifying
     18  * information: Portions Copyright [yyyy] [name of copyright owner]
     19  *
     20  * CDDL HEADER END
     21  */
     22 
     23 /*
     24  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     25  * Use is subject to license terms.
     26  */
     27 
     28 #include "ixgbe_sw.h"
     29 
     30 /* function prototypes */
     31 static mblk_t *ixgbe_rx_bind(ixgbe_rx_data_t *, uint32_t, uint32_t);
     32 static mblk_t *ixgbe_rx_copy(ixgbe_rx_data_t *, uint32_t, uint32_t);
     33 static void ixgbe_rx_assoc_hcksum(mblk_t *, uint32_t);
     34 
     35 #ifndef IXGBE_DEBUG
     36 #pragma inline(ixgbe_rx_assoc_hcksum)
     37 #endif
     38 
     39 /*
     40  * ixgbe_rx_recycle - The call-back function to reclaim rx buffer.
     41  *
     42  * This function is called when an mp is freed by the user thru
     43  * freeb call (Only for mp constructed through desballoc call).
     44  * It returns back the freed buffer to the free list.
     45  */
     46 void
     47 ixgbe_rx_recycle(caddr_t arg)
     48 {
     49 	ixgbe_t *ixgbe;
     50 	ixgbe_rx_ring_t *rx_ring;
     51 	ixgbe_rx_data_t	*rx_data;
     52 	rx_control_block_t *recycle_rcb;
     53 	uint32_t free_index;
     54 	uint32_t ref_cnt;
     55 
     56 	recycle_rcb = (rx_control_block_t *)(uintptr_t)arg;
     57 	rx_data = recycle_rcb->rx_data;
     58 	rx_ring = rx_data->rx_ring;
     59 	ixgbe = rx_ring->ixgbe;
     60 
     61 	if (recycle_rcb->ref_cnt == 0) {
     62 		/*
     63 		 * This case only happens when rx buffers are being freed
     64 		 * in ixgbe_stop() and freemsg() is called.
     65 		 */
     66 		return;
     67 	}
     68 
     69 	ASSERT(recycle_rcb->mp == NULL);
     70 
     71 	/*
     72 	 * Using the recycled data buffer to generate a new mblk
     73 	 */
     74 	recycle_rcb->mp = desballoc((unsigned char *)
     75 	    recycle_rcb->rx_buf.address,
     76 	    recycle_rcb->rx_buf.size,
     77 	    0, &recycle_rcb->free_rtn);
     78 
     79 	/*
     80 	 * Put the recycled rx control block into free list
     81 	 */
     82 	mutex_enter(&rx_data->recycle_lock);
     83 
     84 	free_index = rx_data->rcb_tail;
     85 	ASSERT(rx_data->free_list[free_index] == NULL);
     86 
     87 	rx_data->free_list[free_index] = recycle_rcb;
     88 	rx_data->rcb_tail = NEXT_INDEX(free_index, 1, rx_data->free_list_size);
     89 
     90 	mutex_exit(&rx_data->recycle_lock);
     91 
     92 	/*
     93 	 * The atomic operation on the number of the available rx control
     94 	 * blocks in the free list is used to make the recycling mutual
     95 	 * exclusive with the receiving.
     96 	 */
     97 	atomic_inc_32(&rx_data->rcb_free);
     98 	ASSERT(rx_data->rcb_free <= rx_data->free_list_size);
     99 
    100 	/*
    101 	 * Considering the case that the interface is unplumbed
    102 	 * and there are still some buffers held by the upper layer.
    103 	 * When the buffer is returned back, we need to free it.
    104 	 */
    105 	ref_cnt = atomic_dec_32_nv(&recycle_rcb->ref_cnt);
    106 	if (ref_cnt == 0) {
    107 		if (recycle_rcb->mp != NULL) {
    108 			freemsg(recycle_rcb->mp);
    109 			recycle_rcb->mp = NULL;
    110 		}
    111 
    112 		ixgbe_free_dma_buffer(&recycle_rcb->rx_buf);
    113 
    114 		mutex_enter(&ixgbe->rx_pending_lock);
    115 		atomic_dec_32(&rx_data->rcb_pending);
    116 		atomic_dec_32(&ixgbe->rcb_pending);
    117 
    118 		/*
    119 		 * When there is not any buffer belonging to this rx_data
    120 		 * held by the upper layer, the rx_data can be freed.
    121 		 */
    122 		if ((rx_data->flag & IXGBE_RX_STOPPED) &&
    123 		    (rx_data->rcb_pending == 0))
    124 			ixgbe_free_rx_ring_data(rx_data);
    125 
    126 		mutex_exit(&ixgbe->rx_pending_lock);
    127 	}
    128 }
    129 
    130 /*
    131  * ixgbe_rx_copy - Use copy to process the received packet.
    132  *
    133  * This function will use bcopy to process the packet
    134  * and send the copied packet upstream.
    135  */
    136 static mblk_t *
    137 ixgbe_rx_copy(ixgbe_rx_data_t *rx_data, uint32_t index, uint32_t pkt_len)
    138 {
    139 	ixgbe_t *ixgbe;
    140 	rx_control_block_t *current_rcb;
    141 	mblk_t *mp;
    142 
    143 	ixgbe = rx_data->rx_ring->ixgbe;
    144 	current_rcb = rx_data->work_list[index];
    145 
    146 	DMA_SYNC(&current_rcb->rx_buf, DDI_DMA_SYNC_FORKERNEL);
    147 
    148 	if (ixgbe_check_dma_handle(current_rcb->rx_buf.dma_handle) !=
    149 	    DDI_FM_OK) {
    150 		ddi_fm_service_impact(ixgbe->dip, DDI_SERVICE_DEGRADED);
    151 	}
    152 
    153 	/*
    154 	 * Allocate buffer to receive this packet
    155 	 */
    156 	mp = allocb(pkt_len + IPHDR_ALIGN_ROOM, 0);
    157 	if (mp == NULL) {
    158 		ixgbe_log(ixgbe, "ixgbe_rx_copy: allocate buffer failed");
    159 		return (NULL);
    160 	}
    161 
    162 	/*
    163 	 * Copy the data received into the new cluster
    164 	 */
    165 	mp->b_rptr += IPHDR_ALIGN_ROOM;
    166 	bcopy(current_rcb->rx_buf.address, mp->b_rptr, pkt_len);
    167 	mp->b_wptr = mp->b_rptr + pkt_len;
    168 
    169 	return (mp);
    170 }
    171 
    172 /*
    173  * ixgbe_rx_bind - Use existing DMA buffer to build mblk for receiving.
    174  *
    175  * This function will use pre-bound DMA buffer to receive the packet
    176  * and build mblk that will be sent upstream.
    177  */
    178 static mblk_t *
    179 ixgbe_rx_bind(ixgbe_rx_data_t *rx_data, uint32_t index, uint32_t pkt_len)
    180 {
    181 	rx_control_block_t *current_rcb;
    182 	rx_control_block_t *free_rcb;
    183 	uint32_t free_index;
    184 	mblk_t *mp;
    185 	ixgbe_t	*ixgbe = rx_data->rx_ring->ixgbe;
    186 
    187 	/*
    188 	 * If the free list is empty, we cannot proceed to send
    189 	 * the current DMA buffer upstream. We'll have to return
    190 	 * and use bcopy to process the packet.
    191 	 */
    192 	if (ixgbe_atomic_reserve(&rx_data->rcb_free, 1) < 0)
    193 		return (NULL);
    194 
    195 	current_rcb = rx_data->work_list[index];
    196 	/*
    197 	 * If the mp of the rx control block is NULL, try to do
    198 	 * desballoc again.
    199 	 */
    200 	if (current_rcb->mp == NULL) {
    201 		current_rcb->mp = desballoc((unsigned char *)
    202 		    current_rcb->rx_buf.address,
    203 		    current_rcb->rx_buf.size,
    204 		    0, &current_rcb->free_rtn);
    205 		/*
    206 		 * If it is failed to built a mblk using the current
    207 		 * DMA buffer, we have to return and use bcopy to
    208 		 * process the packet.
    209 		 */
    210 		if (current_rcb->mp == NULL) {
    211 			atomic_inc_32(&rx_data->rcb_free);
    212 			return (NULL);
    213 		}
    214 	}
    215 	/*
    216 	 * Sync up the data received
    217 	 */
    218 	DMA_SYNC(&current_rcb->rx_buf, DDI_DMA_SYNC_FORKERNEL);
    219 
    220 	if (ixgbe_check_dma_handle(current_rcb->rx_buf.dma_handle) !=
    221 	    DDI_FM_OK) {
    222 		ddi_fm_service_impact(ixgbe->dip, DDI_SERVICE_DEGRADED);
    223 	}
    224 
    225 	mp = current_rcb->mp;
    226 	current_rcb->mp = NULL;
    227 	atomic_inc_32(&current_rcb->ref_cnt);
    228 
    229 	mp->b_wptr = mp->b_rptr + pkt_len;
    230 	mp->b_next = mp->b_cont = NULL;
    231 
    232 	/*
    233 	 * Strip off one free rx control block from the free list
    234 	 */
    235 	free_index = rx_data->rcb_head;
    236 	free_rcb = rx_data->free_list[free_index];
    237 	ASSERT(free_rcb != NULL);
    238 	rx_data->free_list[free_index] = NULL;
    239 	rx_data->rcb_head = NEXT_INDEX(free_index, 1, rx_data->free_list_size);
    240 
    241 	/*
    242 	 * Put the rx control block to the work list
    243 	 */
    244 	rx_data->work_list[index] = free_rcb;
    245 
    246 	return (mp);
    247 }
    248 
    249 /*
    250  * ixgbe_rx_assoc_hcksum - Check the rx hardware checksum status and associate
    251  * the hcksum flags.
    252  */
    253 static void
    254 ixgbe_rx_assoc_hcksum(mblk_t *mp, uint32_t status_error)
    255 {
    256 	uint32_t hcksum_flags = 0;
    257 
    258 	/*
    259 	 * Check TCP/UDP checksum
    260 	 */
    261 	if ((status_error & IXGBE_RXD_STAT_L4CS) &&
    262 	    !(status_error & IXGBE_RXDADV_ERR_TCPE))
    263 		hcksum_flags |= HCK_FULLCKSUM | HCK_FULLCKSUM_OK;
    264 
    265 	/*
    266 	 * Check IP Checksum
    267 	 */
    268 	if ((status_error & IXGBE_RXD_STAT_IPCS) &&
    269 	    !(status_error & IXGBE_RXDADV_ERR_IPE))
    270 		hcksum_flags |= HCK_IPV4_HDRCKSUM;
    271 
    272 	if (hcksum_flags != 0) {
    273 		(void) hcksum_assoc(mp,
    274 		    NULL, NULL, 0, 0, 0, 0, hcksum_flags, 0);
    275 	}
    276 }
    277 
    278 /*
    279  * ixgbe_ring_rx - Receive the data of one ring.
    280  *
    281  * This function goes throught h/w descriptor in one specified rx ring,
    282  * receives the data if the descriptor status shows the data is ready.
    283  * It returns a chain of mblks containing the received data, to be
    284  * passed up to mac_rx().
    285  */
    286 mblk_t *
    287 ixgbe_ring_rx(ixgbe_rx_ring_t *rx_ring, int poll_bytes)
    288 {
    289 	union ixgbe_adv_rx_desc *current_rbd;
    290 	rx_control_block_t *current_rcb;
    291 	mblk_t *mp;
    292 	mblk_t *mblk_head;
    293 	mblk_t **mblk_tail;
    294 	uint32_t rx_next;
    295 	uint32_t rx_tail;
    296 	uint32_t pkt_len;
    297 	uint32_t status_error;
    298 	uint32_t pkt_num;
    299 	uint32_t received_bytes;
    300 	ixgbe_t *ixgbe = rx_ring->ixgbe;
    301 	ixgbe_rx_data_t *rx_data = rx_ring->rx_data;
    302 
    303 	mblk_head = NULL;
    304 	mblk_tail = &mblk_head;
    305 
    306 	/*
    307 	 * Sync the receive descriptors before accepting the packets
    308 	 */
    309 	DMA_SYNC(&rx_data->rbd_area, DDI_DMA_SYNC_FORKERNEL);
    310 
    311 	if (ixgbe_check_dma_handle(rx_data->rbd_area.dma_handle) != DDI_FM_OK) {
    312 		ddi_fm_service_impact(ixgbe->dip, DDI_SERVICE_DEGRADED);
    313 	}
    314 
    315 	/*
    316 	 * Get the start point of rx bd ring which should be examined
    317 	 * during this cycle.
    318 	 */
    319 	rx_next = rx_data->rbd_next;
    320 
    321 	current_rbd = &rx_data->rbd_ring[rx_next];
    322 	received_bytes = 0;
    323 	pkt_num = 0;
    324 	status_error = current_rbd->wb.upper.status_error;
    325 	while (status_error & IXGBE_RXD_STAT_DD) {
    326 		/*
    327 		 * If adapter has found errors, but the error
    328 		 * is hardware checksum error, this does not discard the
    329 		 * packet: let upper layer compute the checksum;
    330 		 * Otherwise discard the packet.
    331 		 */
    332 		if ((status_error & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) ||
    333 		    !(status_error & IXGBE_RXD_STAT_EOP)) {
    334 			IXGBE_DEBUG_STAT(rx_ring->stat_frame_error);
    335 			goto rx_discard;
    336 		}
    337 
    338 		IXGBE_DEBUG_STAT_COND(rx_ring->stat_cksum_error,
    339 		    (status_error & IXGBE_RXDADV_ERR_TCPE) ||
    340 		    (status_error & IXGBE_RXDADV_ERR_IPE));
    341 
    342 		pkt_len = current_rbd->wb.upper.length;
    343 
    344 		if ((poll_bytes != IXGBE_POLL_NULL) &&
    345 		    ((received_bytes + pkt_len) > poll_bytes))
    346 			break;
    347 
    348 		received_bytes += pkt_len;
    349 
    350 		mp = NULL;
    351 		/*
    352 		 * For packets with length more than the copy threshold,
    353 		 * we'll first try to use the existing DMA buffer to build
    354 		 * an mblk and send the mblk upstream.
    355 		 *
    356 		 * If the first method fails, or the packet length is less
    357 		 * than the copy threshold, we'll allocate a new mblk and
    358 		 * copy the packet data to the new mblk.
    359 		 */
    360 		if (pkt_len > ixgbe->rx_copy_thresh)
    361 			mp = ixgbe_rx_bind(rx_data, rx_next, pkt_len);
    362 
    363 		if (mp == NULL)
    364 			mp = ixgbe_rx_copy(rx_data, rx_next, pkt_len);
    365 
    366 		if (mp != NULL) {
    367 			/*
    368 			 * Check h/w checksum offload status
    369 			 */
    370 			if (ixgbe->rx_hcksum_enable)
    371 				ixgbe_rx_assoc_hcksum(mp, status_error);
    372 
    373 			*mblk_tail = mp;
    374 			mblk_tail = &mp->b_next;
    375 		}
    376 
    377 rx_discard:
    378 		/*
    379 		 * Reset rx descriptor read bits
    380 		 */
    381 		current_rcb = rx_data->work_list[rx_next];
    382 		current_rbd->read.pkt_addr = current_rcb->rx_buf.dma_address;
    383 		current_rbd->read.hdr_addr = 0;
    384 
    385 		rx_next = NEXT_INDEX(rx_next, 1, rx_data->ring_size);
    386 
    387 		/*
    388 		 * The receive function is in interrupt context, so here
    389 		 * rx_limit_per_intr is used to avoid doing receiving too long
    390 		 * per interrupt.
    391 		 */
    392 		if (++pkt_num > ixgbe->rx_limit_per_intr) {
    393 			IXGBE_DEBUG_STAT(rx_ring->stat_exceed_pkt);
    394 			break;
    395 		}
    396 
    397 		current_rbd = &rx_data->rbd_ring[rx_next];
    398 		status_error = current_rbd->wb.upper.status_error;
    399 	}
    400 
    401 	DMA_SYNC(&rx_data->rbd_area, DDI_DMA_SYNC_FORDEV);
    402 
    403 	rx_data->rbd_next = rx_next;
    404 
    405 	/*
    406 	 * Update the h/w tail accordingly
    407 	 */
    408 	rx_tail = PREV_INDEX(rx_next, 1, rx_data->ring_size);
    409 	IXGBE_WRITE_REG(&ixgbe->hw, IXGBE_RDT(rx_ring->index), rx_tail);
    410 
    411 	if (ixgbe_check_acc_handle(ixgbe->osdep.reg_handle) != DDI_FM_OK) {
    412 		ddi_fm_service_impact(ixgbe->dip, DDI_SERVICE_DEGRADED);
    413 	}
    414 
    415 	return (mblk_head);
    416 }
    417 
    418 mblk_t *
    419 ixgbe_ring_rx_poll(void *arg, int n_bytes)
    420 {
    421 	ixgbe_rx_ring_t *rx_ring = (ixgbe_rx_ring_t *)arg;
    422 	mblk_t *mp = NULL;
    423 
    424 	ASSERT(n_bytes >= 0);
    425 
    426 	if (n_bytes == 0)
    427 		return (mp);
    428 
    429 	mutex_enter(&rx_ring->rx_lock);
    430 	mp = ixgbe_ring_rx(rx_ring, n_bytes);
    431 	mutex_exit(&rx_ring->rx_lock);
    432 
    433 	return (mp);
    434 }
    435