1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176
|
/*
* Copyright (C) 2013, 2014-2015, 2017, 2022 Synopsys, Inc. (www.synopsys.com)
* Copyright (C) 2007 ARC International (UK) LTD
*
* Licensed under the LGPL v2.1 or later, see the file COPYING.LIB in this tarball.
*/
#include <features.h>
#include <sysdep.h>
#include <asm.h>
ENTRY(strcmp)
#if defined(__ARC700__) || defined(__ARC64_ARCH32__)
/* This is optimized primarily for the ARC700.
It would be possible to speed up the loops by one cycle / word
respective one cycle / byte by forcing double source 1 alignment, unrolling
by a factor of two, and speculatively loading the second word / byte of
source 1; however, that would increase the overhead for loop setup / finish,
and strcmp might often terminate early. */
or r2,r0,r1
bmsk_s r2,r2,1
brne r2,0,.Lcharloop
mov_s r12,0x01010101
ror r5,r12
.Lwordloop:
ld.ab r2,[r0,4]
ld.ab r3,[r1,4]
nop_s
sub r4,r2,r12
bic r4,r4,r2
and r4,r4,r5
brne r4,0,.Lfound0
breq r2,r3,.Lwordloop
#ifdef __LITTLE_ENDIAN__
xor r0,r2,r3 ; mask for difference
SUBR_S r1,r0,1
bic_s r0,r0,r1 ; mask for least significant difference bit
sub r1,r5,r0
xor r0,r5,r1 ; mask for least significant difference byte
and_s r2,r2,r0
and_s r3,r3,r0
#endif /* LITTLE ENDIAN */
cmp_s r2,r3
mov_s r0,1
j_s.d [blink]
bset.lo r0,r0,31
.balign 4
#ifdef __LITTLE_ENDIAN__
.Lfound0:
xor r0,r2,r3 ; mask for difference
or r0,r0,r4 ; or in zero indicator
SUBR_S r1,r0,1
bic_s r0,r0,r1 ; mask for least significant difference bit
sub r1,r5,r0
xor r0,r5,r1 ; mask for least significant difference byte
and_s r2,r2,r0
and_s r3,r3,r0
sub.f r0,r2,r3
mov.hi r0,1
j_s.d [blink]
bset.lo r0,r0,31
#else /* BIG ENDIAN */
/* The zero-detection above can mis-detect 0x01 bytes as zeroes
because of carry-propagateion from a lower significant zero byte.
We can compensate for this by checking that bit0 is zero.
This compensation is not necessary in the step where we
get a low estimate for r2, because in any affected bytes
we already have 0x00 or 0x01, which will remain unchanged
when bit 7 is cleared. */
.balign 4
.Lfound0:
lsr r0,r4,8
lsr_s r1,r2
bic_s r2,r2,r0 ; get low estimate for r2 and get ...
bic_s r0,r0,r1 ; <this is the adjusted mask for zeros>
or_s r3,r3,r0 ; ... high estimate r3 so that r2 > r3 will ...
cmp_s r3,r2 ; ... be independent of trailing garbage
or_s r2,r2,r0 ; likewise for r3 > r2
bic_s r3,r3,r0
rlc r0,0 ; r0 := r2 > r3 ? 1 : 0
cmp_s r2,r3
j_s.d [blink]
bset.lo r0,r0,31
#endif /* ENDIAN */
.balign 4
.Lcharloop:
ldb.ab r2,[r0,1]
ldb.ab r3,[r1,1]
nop_s
breq r2,0,.Lcmpend
breq r2,r3,.Lcharloop
.Lcmpend:
j_s.d [blink]
sub r0,r2,r3
#elif defined(__ARCHS__)
or r2, r0, r1
bmsk_s r2, r2, 1
brne r2, 0, @.Lcharloop
;;; s1 and s2 are word aligned
mov_s r12, 0x01010101
ror r11, r12
.align 4
.LwordLoop:
ld.ab r2, [r0, 4]
sub r4, r2, r12
ld.ab r3, [r1, 4]
;; Detect NULL char in str1
bic r4, r4, r2
and r4, r4, r11
brne.d.nt r4, 0, .LfoundNULL
;; Check if the read locations are the same
cmp r2, r3
beq .LwordLoop
;; A match is found, spot it out
#ifdef __LITTLE_ENDIAN__
swape r3, r3
mov_s r0, 1
swape r2, r2
#else
mov_s r0, 1
#endif
cmp_s r2, r3
j_s.d [blink]
bset.lo r0, r0, 31
.align 4
.LfoundNULL:
#ifdef __BIG_ENDIAN__
swape r4, r4
swape r2, r2
swape r3, r3
#endif
;; Find null byte
ffs r0, r4
bmsk r2, r2, r0
bmsk r3, r3, r0
swape r2, r2
swape r3, r3
;; make the return value
sub.f r0, r2, r3
mov.hi r0, 1
j_s.d [blink]
bset.lo r0, r0, 31
.align 4
.Lcharloop:
ldb.ab r2, [r0, 1]
ldb.ab r3, [r1, 1]
nop
breq r2, 0, .Lcmpend
breq r2, r3, .Lcharloop
.align 4
.Lcmpend:
j_s.d [blink]
sub r0, r2, r3
#else
#error "Unsupported ARC CPU type"
#endif
END(strcmp)
libc_hidden_def(strcmp)
#ifndef __UCLIBC_HAS_LOCALE__
strong_alias(strcmp,strcoll)
libc_hidden_def(strcoll)
#endif
|