1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285
|
/*
* (c) Copyright 1986 HEWLETT-PACKARD COMPANY
*
* To anyone who acknowledges that this file is provided "AS IS"
* without any express or implied warranty:
* permission to use, copy, modify, and distribute this file
* for any purpose is hereby granted without fee, provided that
* the above copyright notice and this notice appears in all
* copies, and that the name of Hewlett-Packard Company not be
* used in advertising or publicity pertaining to distribution
* of the software without specific, written prior permission.
* Hewlett-Packard Company makes no representations about the
* suitability of this software for any purpose.
*/
/*
A faster strcpy.
by
Jerry Huck (aligned case)
Daryl Odnert (equal-alignment case)
Edgar Circenis (non-aligned case)
*/
/*
* strcpy(s1, s2)
*
* Copy string s2 to s1. s1 must be large enough.
* return s1
*/
#include "DEFS.h"
#define d_addr r26
#define s_addr r25
#define tmp6 r24
#define tmp1 r19
#define evenside r19
#define tmp2 r20
#define oddside r20
#define tmp3 r21
#define tmp4 r22
#define tmp5 arg3
#define save r1
ENTRY(strcpy)
/* Do some quick alignment checking on and fast path both word aligned */
extru,<> s_addr,31,2,tmp6 /*Is source word aligned? */
ldwm 4(0,s_addr),oddside /*Assume yes and guess that it
is double-word aligned. */
dep,= d_addr,29,2,tmp6 /*Is target word aligned? */
b case_analysis
copy d_addr,ret0
/* Both are aligned. First source word already loaded assuming that
source was oddword aligned. Fall through (therefore fastest) code
shuffles the registers to join the main loop */
bothaligned:
bb,>= s_addr,29,twoatatime /*Branch if source was odd aligned*/
uxor,nbz oddside,r0,save
/* Even aligned source. save holds that operand.
Do one iteration of the main copy loop juggling the registers to avoid
one copy. */
b,n nullfound
ldwm 4(s_addr),oddside
stwm save,4(d_addr)
uxor,nbz oddside,r0,save
b,n nullfound
ldwm 4(s_addr),evenside
stwm oddside,4(d_addr)
uxor,nbz evenside,r0,save
b,n nullfound
ldwm 4(s_addr),oddside
/* Main loop body. Entry expects evenside still to be stored, oddside
just loaded. */
loop:
stwm evenside,4(d_addr)
uxor,nbz oddside,r0,save
/* mid loop entry */
twoatatime:
b,n nullfound
ldwm 4(s_addr),evenside
stwm oddside,4(d_addr)
uxor,sbz evenside,r0,save
b loop
ldwm 4(s_addr),oddside
/* fall through when null found in evenside. oddside actually loaded */
nullfound: /* adjust d_addr and store final word */
extru,<> save,7,8,r0 /* pick up leftmost byte */
addib,tr,n 1,d_addr,store_final
extru,<> save,15,8,r0
addib,tr,n 2,d_addr,store_final
extru,<> save,23,8,r0
addib,tr 3,d_addr,store_final2
bv 0(rp)
stw save,0(d_addr)
store_final:
bv 0(rp)
store_final2:
stbys,e save,0(d_addr) /* delay slot */
case_analysis:
blr tmp6,r0
nop
/* NOTE: the delay slots for the non-aligned cases load a */
/* shift quantity which is TGT-SRC into tmp3. */
/* Note also, the case for both strings being word aligned */
/* is already checked before the BLR is executed, so that */
/* case can never occur. */
/* TGT SRC */
nop /* 00 00 can't happen */
nop
b neg_aligned_copy /* 00 01 */
ldi -1,tmp3 /* load shift quantity. delay slot */
b neg_aligned_copy /* 00 10 */
ldi -2,tmp3 /* load shift quantity. delay slot */
b neg_aligned_copy /* 00 11 */
ldi -3,tmp3 /* load shift quantity. delay slot */
b pos_aligned_copy0 /* 01 00 */
ldi 1,tmp3 /* load shift quantity. delay slot */
b equal_alignment_1 /* 01 01 */
ldbs,ma 1(s_addr),tmp1
b neg_aligned_copy /* 01 10 */
ldi -1,tmp3 /* load shift quantity. delay slot */
b neg_aligned_copy /* 01 11 */
ldi -2,tmp3 /* load shift quantity. delay slot */
b pos_aligned_copy0 /* 10 00 */
ldi 2,tmp3 /* load shift quantity. delay slot */
b pos_aligned_copy /* 10 01 */
ldi 1,tmp3 /* load shift quantity. delay slot */
b equal_alignment_2 /* 10 10 */
ldhs,ma 2(s_addr),tmp1
b neg_aligned_copy /* 10 11 */
ldi -1,tmp3 /* load shift quantity. delay slot */
b pos_aligned_copy0 /* 11 00 */
ldi 3,tmp3 /* load shift quantity. delay slot */
b pos_aligned_copy /* 11 01 */
ldi 2,tmp3 /* load shift quantity. delay slot */
b pos_aligned_copy /* 11 10 */
ldi 1,tmp3 /* load shift quantity. delay slot */
ldbs,ma 1(s_addr),tmp1 /* 11 11 */
comiclr,<> r0,tmp1,r0
bv 0(rp) /* return if 1st byte was null */
stbs,ma tmp1,1(d_addr) /* store a byte to dst string */
b bothaligned /* can now goto word_aligned */
ldwm 4(s_addr),oddside /* load next word of source */
equal_alignment_1:
comiclr,<> r0,tmp1,r0 /* nullify next if tmp1 <> 0 */
bv 0(rp) /* return if null byte found */
stbs,ma tmp1,1(d_addr) /* store a byte to dst string */
ldhs,ma 2(s_addr),tmp1 /* load next halfword */
equal_alignment_2:
extru,<> tmp1,23,8,tmp6 /* look at left byte of halfword */
bv 0(rp) /* return if 1st byte was null */
stbs,ma tmp6,1(d_addr)
extru,<> tmp1,31,8,r0
bv 0(rp) /* return if 2nd byte was null */
stbs,ma tmp1,1(d_addr)
b bothaligned
ldwm 4(s_addr),oddside /* load next word */
/* source and destination are not aligned, so we do it the hard way. */
/* target alignment is greater than source alignment */
pos_aligned_copy0:
addi -4,s_addr,s_addr
pos_aligned_copy:
extru d_addr,31,2,tmp6 /* Extract low 2 bits of the dest addr */
extru s_addr,31,2,tmp1 /* Extract low 2 bits of the src addr */
dep r0,31,2,s_addr /* Compute word address of the source. */
sh3add tmp3,r0,tmp4 /* compute shift amt */
ldwm 4(0,s_addr),tmp2 /* get 1st source word */
sh3add tmp1,r0,save /* setup mask shift amount */
mtctl save,r11 /* set-up cr11 for mask */
zvdepi -2,32,save /* create mask */
or save,tmp2,tmp2 /* mask unused bytes in src */
ldi -1,tmp1 /* load tmp1 with 0xffffffff */
mtctl tmp4,r11 /* shift count -> shift count reg */
vshd tmp1,tmp2,tmp3 /* position data ! */
uxor,nbz tmp3,r0,save
b,n first_null
uxor,nbz tmp2,r0,save
b nullfound1
mtctl tmp4,r11 /* re-load shift cnt (delay slot) */
b loop_entry
ldwm 4(0,s_addr),tmp1 /* get next word. delay slot */
neg_aligned_copy:
extru d_addr,31,2,tmp6 /* Extract low 2 bits of the dest addr */
extru s_addr,31,2,tmp2 /* Extract low 2 bits of the src addr */
dep r0,31,2,s_addr /* Compute word address of the source. */
sh3add tmp3,r0,tmp4 /* compute shift amt */
ldwm 4(0,s_addr),tmp1 /* load first word from source. */
/* check to see if next word can be read safely */
sh3add tmp2,r0,save
mtctl save,r11 /* shift count -> shift count reg */
zvdepi -2,32,save
or save, tmp1, tmp1
uxor,nbz tmp1,r0,save /* any nulls in first word? */
b first_null0
mtctl tmp4,r11
ldwm 4(0,s_addr),tmp2 /* load second word from source */
combt,= tmp6,r0,chunk1 /* don't mask if whole word valid */
vshd tmp1,tmp2,tmp3 /* position data ! */
sh3add tmp6,r0,save /* setup r1 */
mtctl save,r11 /* set-up cr11 for mask */
zvdepi -2,32,save
or save, tmp3, tmp3
uxor,nbz tmp3,r0,save
b,n first_null
uxor,nbz tmp2,r0,save
b nullfound1
mtctl tmp4,r11 /* re-load shift cnt (delay slot) */
b loop_entry
ldwm 4(0,s_addr),tmp1 /* get next word. delay slot */
chunk1:
uxor,nbz tmp2,r0,save
b nullfound0
vshd tmp1,tmp2,tmp3
did_mask:
ldwm 4(0,s_addr),tmp1 /* get next word ! */
loop_entry:
stbys,b,m tmp3,4(0,d_addr) /* store ! */
uxor,nbz tmp1, r0, save
b nullfound2
vshd tmp2,tmp1,tmp3 /* position data ! */
ldwm 4(s_addr),tmp2
stwm tmp3,4(d_addr)
uxor,sbz tmp2,r0,save
b did_mask
nullfound0:
vshd tmp1,tmp2,tmp3 /* delay slot */
uxor,nbz tmp3,r0,save
b,n nullfound
nullfound1:
stbys,b,m tmp3,4(0,d_addr)
b nullfound
vshd tmp2,r0,save /* delay slot */
nullfound2:
uxor,nbz tmp3,r0,save
b,n nullfound
stwm tmp3,4(d_addr)
b nullfound
/* notice that delay slot is in next routine */
first_null0: /* null found in first word of non-aligned (wrt d_addr) */
vshd tmp1,r0,save /* delay slot */
combt,= tmp6,r0,check4
extru save,7,8,tmp4
first_null:
addibt,= -1,tmp6,check3 /* check last 3 bytes of word */
extru save,15,8,tmp4
addibt,=,n -1,tmp6,check2 /* check last 2 bytes */
bv 0(rp) /* null in last byte--store and exit */
stbys,b save, 0(d_addr)
check4:
combt,= tmp4,r0,done
stbs,ma tmp4,1(d_addr)
extru,<> save,15,8,tmp4
check3:
combt,= tmp4,r0,done
stbs,ma tmp4,1(d_addr)
check2:
extru,<> save,23,8,tmp4
bv 0(rp)
stbs,ma tmp4,1(d_addr)
bv 0(rp)
stbs r0,0(d_addr)
done:
EXIT(strcpy)
|