Home | History | Annotate | Download | only in common
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 
     22 /*
     23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
     24  * Use is subject to license terms.
     25  */
     26 
     27 	.file	"memset.s"
     28 
     29 /*
     30  * char *memset(sp, c, n)
     31  *
     32  * Set an array of n chars starting at sp to the character c.
     33  * Return sp.
     34  *
     35  * Fast assembler language version of the following C-program for memset
     36  * which represents the `standard' for the C-library.
     37  *
     38  *	void *
     39  *	memset(void *sp1, int c, size_t n)
     40  *	{
     41  *	    if (n != 0) {
     42  *		char *sp = sp1;
     43  *		do {
     44  *		    *sp++ = (char)c;
     45  *		} while (--n != 0);
     46  *	    }
     47  *	    return (sp1);
     48  *	}
     49  */
     50 
     51 #include <sys/asm_linkage.h>
     52 #include <sys/sun4asi.h>
     53 
     54 	ANSI_PRAGMA_WEAK(memset,function)
     55 
     56 #define	ALIGN8(X)	(((X) + 7) & ~7)
     57 
     58 	.section        ".text"
     59 	.align 32
     60 
     61 	ENTRY(memset)
     62 	cmp	%o2, 12			! if small counts, just write bytes
     63 	bgeu,pn	%ncc, .wrbig
     64 	mov	%o0, %o5		! copy sp1 before using it
     65 
     66 .wrchar:
     67 	deccc   %o2			! byte clearing loop
     68         inc     %o5
     69 	bgeu,a,pt %ncc, .wrchar
     70         stb     %o1, [%o5 + -1]         ! we've already incremented the address
     71 
     72         retl
     73 	.empty	! next instruction is safe, %o0 still good
     74 
     75 .wrbig:
     76         andcc	%o5, 7, %o3		! is sp1 aligned on a 8 byte bound
     77         bz,pt	%ncc, .blkchk		! already double aligned
     78 	and	%o1, 0xff, %o1		! o1 is (char)c
     79         sub	%o3, 8, %o3		! -(bytes till double aligned)
     80         add	%o2, %o3, %o2		! update o2 with new count
     81 
     82 	! Set -(%o3) bytes till sp1 double aligned
     83 1:	stb	%o1, [%o5]		! there is at least 1 byte to set
     84 	inccc	%o3			! byte clearing loop
     85         bl,pt	%ncc, 1b
     86         inc	%o5
     87 
     88 
     89 	! Now sp1 is double aligned (sp1 is found in %o5)
     90 .blkchk:
     91 	sll     %o1, 8, %o3
     92         or      %o1, %o3, %o1		! now o1 has 2 bytes of c
     93 
     94         sll     %o1, 16, %o3
     95         or      %o1, %o3, %o1		! now o1 has 4 bytes of c
     96 
     97 	cmp     %o2, 4095		! if large count use Block ld/st
     98 
     99 	sllx	%o1, 32, %o3
    100 	or	%o1, %o3, %o1		! now o1 has 8 bytes of c
    101 
    102         bgu,a,pn %ncc, .blkwr		! Do block write for large count
    103         andcc   %o5, 63, %o3            ! is sp1 block aligned?
    104 
    105 	and	%o2, 24, %o3		! o3 is {0, 8, 16, 24}
    106 
    107 1:	subcc	%o3, 8, %o3		! double-word loop
    108 	add	%o5, 8, %o5
    109 	bgeu,a,pt %ncc, 1b
    110 	stx	%o1, [%o5 - 8]		! already incremented the address
    111 
    112 	andncc	%o2, 31, %o4		! o4 has 32 byte aligned count
    113 	bz,pn	%ncc, 3f		! First instruction of icache line
    114 2:
    115 	subcc	%o4, 32, %o4		! main loop, 32 bytes per iteration
    116 	stx	%o1, [%o5 - 8]
    117 	stx	%o1, [%o5]
    118 	stx	%o1, [%o5 + 8]
    119 	stx	%o1, [%o5 + 16]
    120 	bnz,pt	%ncc, 2b
    121 	add	%o5, 32, %o5
    122 
    123 3:
    124 	and	%o2, 7, %o2		! o2 has the remaining bytes (<8)
    125 
    126 4:
    127 	deccc   %o2                     ! byte clearing loop
    128         inc     %o5
    129         bgeu,a,pt %ncc, 4b
    130         stb     %o1, [%o5 - 9]		! already incremented the address
    131 
    132 	retl
    133 	nop				! %o0 still preserved
    134 
    135 .blkwr:
    136         bz,pn   %ncc, .blalign		! now block aligned
    137         sub	%o3, 64, %o3		! o3 is -(bytes till block aligned)
    138 	add	%o2, %o3, %o2		! o2 is the remainder
    139 
    140         ! Store -(%o3) bytes till dst is block (64 byte) aligned.
    141         ! Use double word stores.
    142 	! Recall that dst is already double word aligned
    143 1:
    144         stx     %o1, [%o5]
    145 	addcc   %o3, 8, %o3
    146 	bl,pt	%ncc, 1b
    147 	add     %o5, 8, %o5
    148 
    149 	! sp1 is block aligned
    150 .blalign:
    151         rd      %fprs, %g1              ! g1 = fprs
    152 
    153 	and	%o2, 63, %o3		! calc bytes left after blk store.
    154 
    155 	andcc	%g1, 0x4, %g1		! fprs.du = fprs.dl = 0
    156 	bz,a	%ncc, 2f		! Is fprs.fef == 0
    157         wr      %g0, 0x4, %fprs         ! fprs.fef = 1
    158 2:
    159 	brnz,pn	%o1, 3f			! %o1 is safe to check all 64-bits
    160 	andn	%o2, 63, %o4		! calc size of blocks in bytes
    161 	fzero   %d0
    162 	fzero   %d2
    163 	fzero   %d4
    164 	fzero   %d6
    165 	fmuld   %d0, %d0, %d8
    166 	fzero   %d10
    167 	ba	4f
    168 	fmuld   %d0, %d0, %d12
    169 
    170 3:
    171 	! allocate 8 bytes of scratch space on the stack
    172 	add	%sp, -SA(16), %sp
    173 	stx	%o1, [%sp + STACK_BIAS + ALIGN8(MINFRAME)]  ! move %o1 to %d0
    174 	ldd	[%sp + STACK_BIAS + ALIGN8(MINFRAME)], %d0
    175 
    176 	fmovd	%d0, %d2
    177 	add	%sp, SA(16), %sp	! deallocate the scratch space
    178 	fmovd	%d0, %d4
    179 	fmovd	%d0, %d6
    180 	fmovd	%d0, %d8
    181 	fmovd	%d0, %d10
    182 	fmovd	%d0, %d12
    183 4:
    184 	fmovd	%d0, %d14
    185 
    186 	! 1st quadrant has 64 bytes of c
    187 	! instructions 32-byte aligned here
    188 
    189         stda    %d0, [%o5]ASI_BLK_P
    190         subcc   %o4, 64, %o4
    191         bgu,pt	%ncc, 4b
    192         add     %o5, 64, %o5
    193 
    194 	! Set the remaining doubles
    195 	subcc   %o3, 8, %o3		! Can we store any doubles?
    196 	blu,pn  %ncc, 6f
    197 	and	%o2, 7, %o2		! calc bytes left after doubles
    198 
    199 5:
    200 	std     %d0, [%o5]		! store the doubles
    201 	subcc   %o3, 8, %o3
    202 	bgeu,pt	%ncc, 5b
    203         add     %o5, 8, %o5
    204 6:
    205 	! Set the remaining bytes
    206 	brz	%o2, .exit		! safe to check all 64-bits
    207 
    208 #if 0
    209 	! Terminate the copy with a partial store. (bug 1200071 does not apply)
    210 	! The data should be at d0
    211         dec     %o2                     ! needed to get the mask right
    212 	edge8n	%g0, %o2, %o4
    213 	stda	%d0, [%o5]%o4, ASI_PST8_P
    214 #else
    215 7:
    216 	deccc	%o2
    217 	stb	%o1, [%o5]
    218 	bgu,pt	%ncc, 7b
    219 	inc	%o5
    220 #endif
    221 
    222 .exit:
    223         membar  #StoreLoad|#StoreStore
    224         retl				! %o0 was preserved
    225         wr	%g1, %g0, %fprs         ! fprs = g1  restore fprs
    226 
    227 	SET_SIZE(memset)
    228