File: min_max_sse4_amd64.s

package info (click to toggle)
golang-github-apache-arrow-go 18.2.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 32,200 kB
  • sloc: asm: 477,547; ansic: 5,369; cpp: 759; sh: 585; makefile: 319; python: 190; sed: 5
file content (1044 lines) | stat: -rw-r--r-- 44,341 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
//+build !noasm !appengine
// AUTO-GENERATED BY C2GOASM -- DO NOT EDIT

DATA LCDATA1<>+0x000(SB)/8, $0x8080808080808080
DATA LCDATA1<>+0x008(SB)/8, $0x8080808080808080
DATA LCDATA1<>+0x010(SB)/8, $0x7f7f7f7f7f7f7f7f
DATA LCDATA1<>+0x018(SB)/8, $0x7f7f7f7f7f7f7f7f
GLOBL LCDATA1<>(SB), 8, $32

TEXT ·_int8_max_min_sse4(SB), $0-32

	MOVQ values+0(FP), DI
	MOVQ length+8(FP), SI
	MOVQ minout+16(FP), DX
	MOVQ maxout+24(FP), CX
	LEAQ LCDATA1<>(SB), BP

	WORD $0xf685             // test    esi, esi
	JLE  LBB0_1
	WORD $0x8941; BYTE $0xf1 // mov    r9d, esi
	WORD $0xfe83; BYTE $0x1f // cmp    esi, 31
	JA   LBB0_4
	WORD $0xb041; BYTE $0x80 // mov    r8b, -128
	WORD $0xb640; BYTE $0x7f // mov    sil, 127
	WORD $0x3145; BYTE $0xdb // xor    r11d, r11d
	JMP  LBB0_11

LBB0_1:
	WORD $0xb640; BYTE $0x7f // mov    sil, 127
	WORD $0xb041; BYTE $0x80 // mov    r8b, -128
	JMP  LBB0_12

LBB0_4:
	WORD $0x8945; BYTE $0xcb     // mov    r11d, r9d
	LONG $0xe0e38341             // and    r11d, -32
	LONG $0xe0438d49             // lea    rax, [r11 - 32]
	WORD $0x8949; BYTE $0xc0     // mov    r8, rax
	LONG $0x05e8c149             // shr    r8, 5
	LONG $0x01c08349             // add    r8, 1
	WORD $0x8548; BYTE $0xc0     // test    rax, rax
	JE   LBB0_5
	WORD $0x894d; BYTE $0xc2     // mov    r10, r8
	LONG $0xfee28349             // and    r10, -2
	WORD $0xf749; BYTE $0xda     // neg    r10
	LONG $0x4d6f0f66; BYTE $0x00 // movdqa    xmm1, oword 0[rbp] /* [rip + .LCPI0_0] */
	LONG $0x456f0f66; BYTE $0x10 // movdqa    xmm0, oword 16[rbp] /* [rip + .LCPI0_1] */
	WORD $0xc031                 // xor    eax, eax
	LONG $0xd06f0f66             // movdqa    xmm2, xmm0
	LONG $0xd96f0f66             // movdqa    xmm3, xmm1

LBB0_7:
	LONG $0x246f0ff3; BYTE $0x07   // movdqu    xmm4, oword [rdi + rax]
	LONG $0x6c6f0ff3; WORD $0x1007 // movdqu    xmm5, oword [rdi + rax + 16]
	LONG $0x746f0ff3; WORD $0x2007 // movdqu    xmm6, oword [rdi + rax + 32]
	LONG $0x7c6f0ff3; WORD $0x3007 // movdqu    xmm7, oword [rdi + rax + 48]
	LONG $0x38380f66; BYTE $0xc4   // pminsb    xmm0, xmm4
	LONG $0x38380f66; BYTE $0xd5   // pminsb    xmm2, xmm5
	LONG $0x3c380f66; BYTE $0xcc   // pmaxsb    xmm1, xmm4
	LONG $0x3c380f66; BYTE $0xdd   // pmaxsb    xmm3, xmm5
	LONG $0x38380f66; BYTE $0xc6   // pminsb    xmm0, xmm6
	LONG $0x38380f66; BYTE $0xd7   // pminsb    xmm2, xmm7
	LONG $0x3c380f66; BYTE $0xce   // pmaxsb    xmm1, xmm6
	LONG $0x3c380f66; BYTE $0xdf   // pmaxsb    xmm3, xmm7
	LONG $0x40c08348               // add    rax, 64
	LONG $0x02c28349               // add    r10, 2
	JNE  LBB0_7
	LONG $0x01c0f641               // test    r8b, 1
	JE   LBB0_10

LBB0_9:
	LONG $0x246f0ff3; BYTE $0x07   // movdqu    xmm4, oword [rdi + rax]
	LONG $0x6c6f0ff3; WORD $0x1007 // movdqu    xmm5, oword [rdi + rax + 16]
	LONG $0x3c380f66; BYTE $0xdd   // pmaxsb    xmm3, xmm5
	LONG $0x3c380f66; BYTE $0xcc   // pmaxsb    xmm1, xmm4
	LONG $0x38380f66; BYTE $0xd5   // pminsb    xmm2, xmm5
	LONG $0x38380f66; BYTE $0xc4   // pminsb    xmm0, xmm4

LBB0_10:
	LONG $0x38380f66; BYTE $0xc2 // pminsb    xmm0, xmm2
	LONG $0x3c380f66; BYTE $0xcb // pmaxsb    xmm1, xmm3
	LONG $0x4def0f66; BYTE $0x10 // pxor    xmm1, oword 16[rbp] /* [rip + .LCPI0_1] */
	LONG $0xd16f0f66             // movdqa    xmm2, xmm1
	LONG $0xd2710f66; BYTE $0x08 // psrlw    xmm2, 8
	LONG $0xd1da0f66             // pminub    xmm2, xmm1
	LONG $0x41380f66; BYTE $0xca // phminposuw    xmm1, xmm2
	LONG $0x7e0f4166; BYTE $0xc8 // movd    r8d, xmm1
	LONG $0x7ff08041             // xor    r8b, 127
	LONG $0x45ef0f66; BYTE $0x00 // pxor    xmm0, oword 0[rbp] /* [rip + .LCPI0_0] */
	LONG $0xc86f0f66             // movdqa    xmm1, xmm0
	LONG $0xd1710f66; BYTE $0x08 // psrlw    xmm1, 8
	LONG $0xc8da0f66             // pminub    xmm1, xmm0
	LONG $0x41380f66; BYTE $0xc1 // phminposuw    xmm0, xmm1
	LONG $0xc67e0f66             // movd    esi, xmm0
	LONG $0x80f68040             // xor    sil, -128
	WORD $0x394d; BYTE $0xcb     // cmp    r11, r9
	JE   LBB0_12

LBB0_11:
	LONG $0x04b60f42; BYTE $0x1f // movzx    eax, byte [rdi + r11]
	WORD $0x3840; BYTE $0xc6     // cmp    sil, al
	LONG $0xf6b60f40             // movzx    esi, sil
	WORD $0x4f0f; BYTE $0xf0     // cmovg    esi, eax
	WORD $0x3841; BYTE $0xc0     // cmp    r8b, al
	LONG $0xc0b60f45             // movzx    r8d, r8b
	LONG $0xc04c0f44             // cmovl    r8d, eax
	LONG $0x01c38349             // add    r11, 1
	WORD $0x394d; BYTE $0xd9     // cmp    r9, r11
	JNE  LBB0_11

LBB0_12:
	WORD $0x8844; BYTE $0x01 // mov    byte [rcx], r8b
	WORD $0x8840; BYTE $0x32 // mov    byte [rdx], sil
	RET

LBB0_5:
	LONG $0x4d6f0f66; BYTE $0x00 // movdqa    xmm1, oword 0[rbp] /* [rip + .LCPI0_0] */
	LONG $0x456f0f66; BYTE $0x10 // movdqa    xmm0, oword 16[rbp] /* [rip + .LCPI0_1] */
	WORD $0xc031                 // xor    eax, eax
	LONG $0xd06f0f66             // movdqa    xmm2, xmm0
	LONG $0xd96f0f66             // movdqa    xmm3, xmm1
	LONG $0x01c0f641             // test    r8b, 1
	JNE  LBB0_9
	JMP  LBB0_10

TEXT ·_uint8_max_min_sse4(SB), $0-32

	MOVQ values+0(FP), DI
	MOVQ length+8(FP), SI
	MOVQ minout+16(FP), DX
	MOVQ maxout+24(FP), CX

	WORD $0xf685             // test    esi, esi
	JLE  LBB1_1
	WORD $0x8941; BYTE $0xf1 // mov    r9d, esi
	WORD $0xfe83; BYTE $0x1f // cmp    esi, 31
	JA   LBB1_4
	WORD $0xb640; BYTE $0xff // mov    sil, -1
	WORD $0x3145; BYTE $0xdb // xor    r11d, r11d
	WORD $0xc031             // xor    eax, eax
	JMP  LBB1_11

LBB1_1:
	WORD $0xb640; BYTE $0xff // mov    sil, -1
	WORD $0xc031             // xor    eax, eax
	JMP  LBB1_12

LBB1_4:
	WORD $0x8945; BYTE $0xcb // mov    r11d, r9d
	LONG $0xe0e38341         // and    r11d, -32
	LONG $0xe0438d49         // lea    rax, [r11 - 32]
	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
	LONG $0x05e8c149         // shr    r8, 5
	LONG $0x01c08349         // add    r8, 1
	WORD $0x8548; BYTE $0xc0 // test    rax, rax
	JE   LBB1_5
	WORD $0x894d; BYTE $0xc2 // mov    r10, r8
	LONG $0xfee28349         // and    r10, -2
	WORD $0xf749; BYTE $0xda // neg    r10
	LONG $0xc9ef0f66         // pxor    xmm1, xmm1
	LONG $0xc0760f66         // pcmpeqd    xmm0, xmm0
	WORD $0xc031             // xor    eax, eax
	LONG $0xd2760f66         // pcmpeqd    xmm2, xmm2
	LONG $0xdbef0f66         // pxor    xmm3, xmm3

LBB1_7:
	LONG $0x246f0ff3; BYTE $0x07   // movdqu    xmm4, oword [rdi + rax]
	LONG $0x6c6f0ff3; WORD $0x1007 // movdqu    xmm5, oword [rdi + rax + 16]
	LONG $0x746f0ff3; WORD $0x2007 // movdqu    xmm6, oword [rdi + rax + 32]
	LONG $0x7c6f0ff3; WORD $0x3007 // movdqu    xmm7, oword [rdi + rax + 48]
	LONG $0xc4da0f66               // pminub    xmm0, xmm4
	LONG $0xd5da0f66               // pminub    xmm2, xmm5
	LONG $0xccde0f66               // pmaxub    xmm1, xmm4
	LONG $0xddde0f66               // pmaxub    xmm3, xmm5
	LONG $0xc6da0f66               // pminub    xmm0, xmm6
	LONG $0xd7da0f66               // pminub    xmm2, xmm7
	LONG $0xcede0f66               // pmaxub    xmm1, xmm6
	LONG $0xdfde0f66               // pmaxub    xmm3, xmm7
	LONG $0x40c08348               // add    rax, 64
	LONG $0x02c28349               // add    r10, 2
	JNE  LBB1_7
	LONG $0x01c0f641               // test    r8b, 1
	JE   LBB1_10

LBB1_9:
	LONG $0x246f0ff3; BYTE $0x07   // movdqu    xmm4, oword [rdi + rax]
	LONG $0x6c6f0ff3; WORD $0x1007 // movdqu    xmm5, oword [rdi + rax + 16]
	LONG $0xddde0f66               // pmaxub    xmm3, xmm5
	LONG $0xccde0f66               // pmaxub    xmm1, xmm4
	LONG $0xd5da0f66               // pminub    xmm2, xmm5
	LONG $0xc4da0f66               // pminub    xmm0, xmm4

LBB1_10:
	LONG $0xc2da0f66             // pminub    xmm0, xmm2
	LONG $0xcbde0f66             // pmaxub    xmm1, xmm3
	LONG $0xd2760f66             // pcmpeqd    xmm2, xmm2
	LONG $0xd1ef0f66             // pxor    xmm2, xmm1
	LONG $0xca6f0f66             // movdqa    xmm1, xmm2
	LONG $0xd1710f66; BYTE $0x08 // psrlw    xmm1, 8
	LONG $0xcada0f66             // pminub    xmm1, xmm2
	LONG $0x41380f66; BYTE $0xc9 // phminposuw    xmm1, xmm1
	LONG $0xc87e0f66             // movd    eax, xmm1
	WORD $0xd0f6                 // not    al
	LONG $0xc86f0f66             // movdqa    xmm1, xmm0
	LONG $0xd1710f66; BYTE $0x08 // psrlw    xmm1, 8
	LONG $0xc8da0f66             // pminub    xmm1, xmm0
	LONG $0x41380f66; BYTE $0xc1 // phminposuw    xmm0, xmm1
	LONG $0xc67e0f66             // movd    esi, xmm0
	WORD $0x394d; BYTE $0xcb     // cmp    r11, r9
	JE   LBB1_12

LBB1_11:
	LONG $0x04b60f46; BYTE $0x1f // movzx    r8d, byte [rdi + r11]
	WORD $0x3844; BYTE $0xc6     // cmp    sil, r8b
	LONG $0xf6b60f40             // movzx    esi, sil
	LONG $0xf0430f41             // cmovae    esi, r8d
	WORD $0x3844; BYTE $0xc0     // cmp    al, r8b
	WORD $0xb60f; BYTE $0xc0     // movzx    eax, al
	LONG $0xc0460f41             // cmovbe    eax, r8d
	LONG $0x01c38349             // add    r11, 1
	WORD $0x394d; BYTE $0xd9     // cmp    r9, r11
	JNE  LBB1_11

LBB1_12:
	WORD $0x0188             // mov    byte [rcx], al
	WORD $0x8840; BYTE $0x32 // mov    byte [rdx], sil
	RET

LBB1_5:
	LONG $0xc9ef0f66 // pxor    xmm1, xmm1
	LONG $0xc0760f66 // pcmpeqd    xmm0, xmm0
	WORD $0xc031     // xor    eax, eax
	LONG $0xd2760f66 // pcmpeqd    xmm2, xmm2
	LONG $0xdbef0f66 // pxor    xmm3, xmm3
	LONG $0x01c0f641 // test    r8b, 1
	JNE  LBB1_9
	JMP  LBB1_10

DATA LCDATA2<>+0x000(SB)/8, $0x8000800080008000
DATA LCDATA2<>+0x008(SB)/8, $0x8000800080008000
DATA LCDATA2<>+0x010(SB)/8, $0x7fff7fff7fff7fff
DATA LCDATA2<>+0x018(SB)/8, $0x7fff7fff7fff7fff
GLOBL LCDATA2<>(SB), 8, $32

TEXT ·_int16_max_min_sse4(SB), $0-32

	MOVQ values+0(FP), DI
	MOVQ length+8(FP), SI
	MOVQ minout+16(FP), DX
	MOVQ maxout+24(FP), CX
	LEAQ LCDATA2<>(SB), BP

	WORD $0xf685                 // test    esi, esi
	JLE  LBB2_1
	WORD $0x8941; BYTE $0xf1     // mov    r9d, esi
	WORD $0xfe83; BYTE $0x0f     // cmp    esi, 15
	JA   LBB2_4
	LONG $0x00b84166; BYTE $0x80 // mov    r8w, -32768
	LONG $0x7fffbe66             // mov    si, 32767
	WORD $0x3145; BYTE $0xdb     // xor    r11d, r11d
	JMP  LBB2_11

LBB2_1:
	LONG $0x7fffbe66             // mov    si, 32767
	LONG $0x00b84166; BYTE $0x80 // mov    r8w, -32768
	JMP  LBB2_12

LBB2_4:
	WORD $0x8945; BYTE $0xcb     // mov    r11d, r9d
	LONG $0xf0e38341             // and    r11d, -16
	LONG $0xf0438d49             // lea    rax, [r11 - 16]
	WORD $0x8949; BYTE $0xc0     // mov    r8, rax
	LONG $0x04e8c149             // shr    r8, 4
	LONG $0x01c08349             // add    r8, 1
	WORD $0x8548; BYTE $0xc0     // test    rax, rax
	JE   LBB2_5
	WORD $0x894d; BYTE $0xc2     // mov    r10, r8
	LONG $0xfee28349             // and    r10, -2
	WORD $0xf749; BYTE $0xda     // neg    r10
	LONG $0x4d6f0f66; BYTE $0x00 // movdqa    xmm1, oword 0[rbp] /* [rip + .LCPI2_0] */
	LONG $0x456f0f66; BYTE $0x10 // movdqa    xmm0, oword 16[rbp] /* [rip + .LCPI2_1] */
	WORD $0xc031                 // xor    eax, eax
	LONG $0xd06f0f66             // movdqa    xmm2, xmm0
	LONG $0xd96f0f66             // movdqa    xmm3, xmm1

LBB2_7:
	LONG $0x246f0ff3; BYTE $0x47   // movdqu    xmm4, oword [rdi + 2*rax]
	LONG $0x6c6f0ff3; WORD $0x1047 // movdqu    xmm5, oword [rdi + 2*rax + 16]
	LONG $0x746f0ff3; WORD $0x2047 // movdqu    xmm6, oword [rdi + 2*rax + 32]
	LONG $0x7c6f0ff3; WORD $0x3047 // movdqu    xmm7, oword [rdi + 2*rax + 48]
	LONG $0xc4ea0f66               // pminsw    xmm0, xmm4
	LONG $0xd5ea0f66               // pminsw    xmm2, xmm5
	LONG $0xccee0f66               // pmaxsw    xmm1, xmm4
	LONG $0xddee0f66               // pmaxsw    xmm3, xmm5
	LONG $0xc6ea0f66               // pminsw    xmm0, xmm6
	LONG $0xd7ea0f66               // pminsw    xmm2, xmm7
	LONG $0xceee0f66               // pmaxsw    xmm1, xmm6
	LONG $0xdfee0f66               // pmaxsw    xmm3, xmm7
	LONG $0x20c08348               // add    rax, 32
	LONG $0x02c28349               // add    r10, 2
	JNE  LBB2_7
	LONG $0x01c0f641               // test    r8b, 1
	JE   LBB2_10

LBB2_9:
	LONG $0x246f0ff3; BYTE $0x47   // movdqu    xmm4, oword [rdi + 2*rax]
	LONG $0x6c6f0ff3; WORD $0x1047 // movdqu    xmm5, oword [rdi + 2*rax + 16]
	LONG $0xddee0f66               // pmaxsw    xmm3, xmm5
	LONG $0xccee0f66               // pmaxsw    xmm1, xmm4
	LONG $0xd5ea0f66               // pminsw    xmm2, xmm5
	LONG $0xc4ea0f66               // pminsw    xmm0, xmm4

LBB2_10:
	LONG $0xc2ea0f66                           // pminsw    xmm0, xmm2
	LONG $0xcbee0f66                           // pmaxsw    xmm1, xmm3
	LONG $0x4def0f66; BYTE $0x10               // pxor    xmm1, oword 16[rbp] /* [rip + .LCPI2_1] */
	LONG $0x41380f66; BYTE $0xc9               // phminposuw    xmm1, xmm1
	LONG $0x7e0f4166; BYTE $0xc8               // movd    r8d, xmm1
	LONG $0xfff08141; WORD $0x007f; BYTE $0x00 // xor    r8d, 32767
	LONG $0x45ef0f66; BYTE $0x00               // pxor    xmm0, oword 0[rbp] /* [rip + .LCPI2_0] */
	LONG $0x41380f66; BYTE $0xc0               // phminposuw    xmm0, xmm0
	LONG $0xc67e0f66                           // movd    esi, xmm0
	LONG $0x8000f681; WORD $0x0000             // xor    esi, 32768
	WORD $0x394d; BYTE $0xcb                   // cmp    r11, r9
	JE   LBB2_12

LBB2_11:
	LONG $0x04b70f42; BYTE $0x5f // movzx    eax, word [rdi + 2*r11]
	WORD $0x3966; BYTE $0xc6     // cmp    si, ax
	WORD $0x4f0f; BYTE $0xf0     // cmovg    esi, eax
	LONG $0xc0394166             // cmp    r8w, ax
	LONG $0xc04c0f44             // cmovl    r8d, eax
	LONG $0x01c38349             // add    r11, 1
	WORD $0x394d; BYTE $0xd9     // cmp    r9, r11
	JNE  LBB2_11

LBB2_12:
	LONG $0x01894466         // mov    word [rcx], r8w
	WORD $0x8966; BYTE $0x32 // mov    word [rdx], si
	RET

LBB2_5:
	LONG $0x4d6f0f66; BYTE $0x00 // movdqa    xmm1, oword 0[rbp] /* [rip + .LCPI2_0] */
	LONG $0x456f0f66; BYTE $0x10 // movdqa    xmm0, oword 16[rbp] /* [rip + .LCPI2_1] */
	WORD $0xc031                 // xor    eax, eax
	LONG $0xd06f0f66             // movdqa    xmm2, xmm0
	LONG $0xd96f0f66             // movdqa    xmm3, xmm1
	LONG $0x01c0f641             // test    r8b, 1
	JNE  LBB2_9
	JMP  LBB2_10

TEXT ·_uint16_max_min_sse4(SB), $0-32

	MOVQ values+0(FP), DI
	MOVQ length+8(FP), SI
	MOVQ minout+16(FP), DX
	MOVQ maxout+24(FP), CX

	WORD $0xf685                 // test    esi, esi
	JLE  LBB3_1
	WORD $0x8941; BYTE $0xf1     // mov    r9d, esi
	WORD $0xfe83; BYTE $0x0f     // cmp    esi, 15
	JA   LBB3_4
	LONG $0xffb84166; BYTE $0xff // mov    r8w, -1
	WORD $0x3145; BYTE $0xdb     // xor    r11d, r11d
	WORD $0xf631                 // xor    esi, esi
	JMP  LBB3_11

LBB3_1:
	LONG $0xffb84166; BYTE $0xff // mov    r8w, -1
	WORD $0xf631                 // xor    esi, esi
	JMP  LBB3_12

LBB3_4:
	WORD $0x8945; BYTE $0xcb // mov    r11d, r9d
	LONG $0xf0e38341         // and    r11d, -16
	LONG $0xf0438d49         // lea    rax, [r11 - 16]
	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
	LONG $0x04e8c149         // shr    r8, 4
	LONG $0x01c08349         // add    r8, 1
	WORD $0x8548; BYTE $0xc0 // test    rax, rax
	JE   LBB3_5
	WORD $0x894d; BYTE $0xc2 // mov    r10, r8
	LONG $0xfee28349         // and    r10, -2
	WORD $0xf749; BYTE $0xda // neg    r10
	LONG $0xc9ef0f66         // pxor    xmm1, xmm1
	LONG $0xc0760f66         // pcmpeqd    xmm0, xmm0
	WORD $0xc031             // xor    eax, eax
	LONG $0xd2760f66         // pcmpeqd    xmm2, xmm2
	LONG $0xdbef0f66         // pxor    xmm3, xmm3

LBB3_7:
	LONG $0x246f0ff3; BYTE $0x47   // movdqu    xmm4, oword [rdi + 2*rax]
	LONG $0x6c6f0ff3; WORD $0x1047 // movdqu    xmm5, oword [rdi + 2*rax + 16]
	LONG $0x746f0ff3; WORD $0x2047 // movdqu    xmm6, oword [rdi + 2*rax + 32]
	LONG $0x7c6f0ff3; WORD $0x3047 // movdqu    xmm7, oword [rdi + 2*rax + 48]
	LONG $0x3a380f66; BYTE $0xc4   // pminuw    xmm0, xmm4
	LONG $0x3a380f66; BYTE $0xd5   // pminuw    xmm2, xmm5
	LONG $0x3e380f66; BYTE $0xcc   // pmaxuw    xmm1, xmm4
	LONG $0x3e380f66; BYTE $0xdd   // pmaxuw    xmm3, xmm5
	LONG $0x3a380f66; BYTE $0xc6   // pminuw    xmm0, xmm6
	LONG $0x3a380f66; BYTE $0xd7   // pminuw    xmm2, xmm7
	LONG $0x3e380f66; BYTE $0xce   // pmaxuw    xmm1, xmm6
	LONG $0x3e380f66; BYTE $0xdf   // pmaxuw    xmm3, xmm7
	LONG $0x20c08348               // add    rax, 32
	LONG $0x02c28349               // add    r10, 2
	JNE  LBB3_7
	LONG $0x01c0f641               // test    r8b, 1
	JE   LBB3_10

LBB3_9:
	LONG $0x246f0ff3; BYTE $0x47   // movdqu    xmm4, oword [rdi + 2*rax]
	LONG $0x6c6f0ff3; WORD $0x1047 // movdqu    xmm5, oword [rdi + 2*rax + 16]
	LONG $0x3e380f66; BYTE $0xdd   // pmaxuw    xmm3, xmm5
	LONG $0x3e380f66; BYTE $0xcc   // pmaxuw    xmm1, xmm4
	LONG $0x3a380f66; BYTE $0xd5   // pminuw    xmm2, xmm5
	LONG $0x3a380f66; BYTE $0xc4   // pminuw    xmm0, xmm4

LBB3_10:
	LONG $0x3a380f66; BYTE $0xc2 // pminuw    xmm0, xmm2
	LONG $0x3e380f66; BYTE $0xcb // pmaxuw    xmm1, xmm3
	LONG $0xd2760f66             // pcmpeqd    xmm2, xmm2
	LONG $0xd1ef0f66             // pxor    xmm2, xmm1
	LONG $0x41380f66; BYTE $0xca // phminposuw    xmm1, xmm2
	LONG $0xce7e0f66             // movd    esi, xmm1
	WORD $0xd6f7                 // not    esi
	LONG $0x41380f66; BYTE $0xc0 // phminposuw    xmm0, xmm0
	LONG $0x7e0f4166; BYTE $0xc0 // movd    r8d, xmm0
	WORD $0x394d; BYTE $0xcb     // cmp    r11, r9
	JE   LBB3_12

LBB3_11:
	LONG $0x04b70f42; BYTE $0x5f // movzx    eax, word [rdi + 2*r11]
	LONG $0xc0394166             // cmp    r8w, ax
	LONG $0xc0430f44             // cmovae    r8d, eax
	WORD $0x3966; BYTE $0xc6     // cmp    si, ax
	WORD $0x460f; BYTE $0xf0     // cmovbe    esi, eax
	LONG $0x01c38349             // add    r11, 1
	WORD $0x394d; BYTE $0xd9     // cmp    r9, r11
	JNE  LBB3_11

LBB3_12:
	WORD $0x8966; BYTE $0x31 // mov    word [rcx], si
	LONG $0x02894466         // mov    word [rdx], r8w
	RET

LBB3_5:
	LONG $0xc9ef0f66 // pxor    xmm1, xmm1
	LONG $0xc0760f66 // pcmpeqd    xmm0, xmm0
	WORD $0xc031     // xor    eax, eax
	LONG $0xd2760f66 // pcmpeqd    xmm2, xmm2
	LONG $0xdbef0f66 // pxor    xmm3, xmm3
	LONG $0x01c0f641 // test    r8b, 1
	JNE  LBB3_9
	JMP  LBB3_10

DATA LCDATA3<>+0x000(SB)/8, $0x8000000080000000
DATA LCDATA3<>+0x008(SB)/8, $0x8000000080000000
DATA LCDATA3<>+0x010(SB)/8, $0x7fffffff7fffffff
DATA LCDATA3<>+0x018(SB)/8, $0x7fffffff7fffffff
GLOBL LCDATA3<>(SB), 8, $32

TEXT ·_int32_max_min_sse4(SB), $0-32

	MOVQ values+0(FP), DI
	MOVQ length+8(FP), SI
	MOVQ minout+16(FP), DX
	MOVQ maxout+24(FP), CX
	LEAQ LCDATA3<>(SB), BP

	WORD $0xf685                   // test    esi, esi
	JLE  LBB4_1
	WORD $0x8941; BYTE $0xf1       // mov    r9d, esi
	WORD $0xfe83; BYTE $0x07       // cmp    esi, 7
	JA   LBB4_6
	LONG $0x000000b8; BYTE $0x80   // mov    eax, -2147483648
	LONG $0xffffb841; WORD $0x7fff // mov    r8d, 2147483647
	WORD $0x3145; BYTE $0xdb       // xor    r11d, r11d
	JMP  LBB4_4

LBB4_1:
	LONG $0xffffb841; WORD $0x7fff // mov    r8d, 2147483647
	LONG $0x000000b8; BYTE $0x80   // mov    eax, -2147483648
	JMP  LBB4_13

LBB4_6:
	WORD $0x8945; BYTE $0xcb     // mov    r11d, r9d
	LONG $0xf8e38341             // and    r11d, -8
	LONG $0xf8438d49             // lea    rax, [r11 - 8]
	WORD $0x8949; BYTE $0xc0     // mov    r8, rax
	LONG $0x03e8c149             // shr    r8, 3
	LONG $0x01c08349             // add    r8, 1
	WORD $0x8548; BYTE $0xc0     // test    rax, rax
	JE   LBB4_7
	WORD $0x894d; BYTE $0xc2     // mov    r10, r8
	LONG $0xfee28349             // and    r10, -2
	WORD $0xf749; BYTE $0xda     // neg    r10
	LONG $0x4d6f0f66; BYTE $0x00 // movdqa    xmm1, oword 0[rbp] /* [rip + .LCPI4_0] */
	LONG $0x456f0f66; BYTE $0x10 // movdqa    xmm0, oword 16[rbp] /* [rip + .LCPI4_1] */
	WORD $0xc031                 // xor    eax, eax
	LONG $0xd06f0f66             // movdqa    xmm2, xmm0
	LONG $0xd96f0f66             // movdqa    xmm3, xmm1

LBB4_9:
	LONG $0x246f0ff3; BYTE $0x87   // movdqu    xmm4, oword [rdi + 4*rax]
	LONG $0x6c6f0ff3; WORD $0x1087 // movdqu    xmm5, oword [rdi + 4*rax + 16]
	LONG $0x746f0ff3; WORD $0x2087 // movdqu    xmm6, oword [rdi + 4*rax + 32]
	LONG $0x7c6f0ff3; WORD $0x3087 // movdqu    xmm7, oword [rdi + 4*rax + 48]
	LONG $0x39380f66; BYTE $0xc4   // pminsd    xmm0, xmm4
	LONG $0x39380f66; BYTE $0xd5   // pminsd    xmm2, xmm5
	LONG $0x3d380f66; BYTE $0xcc   // pmaxsd    xmm1, xmm4
	LONG $0x3d380f66; BYTE $0xdd   // pmaxsd    xmm3, xmm5
	LONG $0x39380f66; BYTE $0xc6   // pminsd    xmm0, xmm6
	LONG $0x39380f66; BYTE $0xd7   // pminsd    xmm2, xmm7
	LONG $0x3d380f66; BYTE $0xce   // pmaxsd    xmm1, xmm6
	LONG $0x3d380f66; BYTE $0xdf   // pmaxsd    xmm3, xmm7
	LONG $0x10c08348               // add    rax, 16
	LONG $0x02c28349               // add    r10, 2
	JNE  LBB4_9
	LONG $0x01c0f641               // test    r8b, 1
	JE   LBB4_12

LBB4_11:
	LONG $0x246f0ff3; BYTE $0x87   // movdqu    xmm4, oword [rdi + 4*rax]
	LONG $0x6c6f0ff3; WORD $0x1087 // movdqu    xmm5, oword [rdi + 4*rax + 16]
	LONG $0x3d380f66; BYTE $0xdd   // pmaxsd    xmm3, xmm5
	LONG $0x3d380f66; BYTE $0xcc   // pmaxsd    xmm1, xmm4
	LONG $0x39380f66; BYTE $0xd5   // pminsd    xmm2, xmm5
	LONG $0x39380f66; BYTE $0xc4   // pminsd    xmm0, xmm4

LBB4_12:
	LONG $0x39380f66; BYTE $0xc2 // pminsd    xmm0, xmm2
	LONG $0x3d380f66; BYTE $0xcb // pmaxsd    xmm1, xmm3
	LONG $0xd1700f66; BYTE $0x4e // pshufd    xmm2, xmm1, 78
	LONG $0x3d380f66; BYTE $0xd1 // pmaxsd    xmm2, xmm1
	LONG $0xca700f66; BYTE $0xe5 // pshufd    xmm1, xmm2, 229
	LONG $0x3d380f66; BYTE $0xca // pmaxsd    xmm1, xmm2
	LONG $0xc87e0f66             // movd    eax, xmm1
	LONG $0xc8700f66; BYTE $0x4e // pshufd    xmm1, xmm0, 78
	LONG $0x39380f66; BYTE $0xc8 // pminsd    xmm1, xmm0
	LONG $0xc1700f66; BYTE $0xe5 // pshufd    xmm0, xmm1, 229
	LONG $0x39380f66; BYTE $0xc1 // pminsd    xmm0, xmm1
	LONG $0x7e0f4166; BYTE $0xc0 // movd    r8d, xmm0
	WORD $0x394d; BYTE $0xcb     // cmp    r11, r9
	JE   LBB4_13

LBB4_4:
	WORD $0xc689 // mov    esi, eax

LBB4_5:
	LONG $0x9f048b42         // mov    eax, dword [rdi + 4*r11]
	WORD $0x3941; BYTE $0xc0 // cmp    r8d, eax
	LONG $0xc04f0f44         // cmovg    r8d, eax
	WORD $0xc639             // cmp    esi, eax
	WORD $0x4d0f; BYTE $0xc6 // cmovge    eax, esi
	LONG $0x01c38349         // add    r11, 1
	WORD $0xc689             // mov    esi, eax
	WORD $0x394d; BYTE $0xd9 // cmp    r9, r11
	JNE  LBB4_5

LBB4_13:
	WORD $0x0189             // mov    dword [rcx], eax
	WORD $0x8944; BYTE $0x02 // mov    dword [rdx], r8d
	RET

LBB4_7:
	LONG $0x4d6f0f66; BYTE $0x00 // movdqa    xmm1, oword 0[rbp] /* [rip + .LCPI4_0] */
	LONG $0x456f0f66; BYTE $0x10 // movdqa    xmm0, oword 16[rbp] /* [rip + .LCPI4_1] */
	WORD $0xc031                 // xor    eax, eax
	LONG $0xd06f0f66             // movdqa    xmm2, xmm0
	LONG $0xd96f0f66             // movdqa    xmm3, xmm1
	LONG $0x01c0f641             // test    r8b, 1
	JNE  LBB4_11
	JMP  LBB4_12

TEXT ·_uint32_max_min_sse4(SB), $0-32

	MOVQ values+0(FP), DI
	MOVQ length+8(FP), SI
	MOVQ minout+16(FP), DX
	MOVQ maxout+24(FP), CX

	WORD $0xf685                   // test    esi, esi
	JLE  LBB5_1
	WORD $0x8941; BYTE $0xf1       // mov    r9d, esi
	WORD $0xfe83; BYTE $0x07       // cmp    esi, 7
	JA   LBB5_6
	WORD $0x3145; BYTE $0xdb       // xor    r11d, r11d
	LONG $0xffffb841; WORD $0xffff // mov    r8d, -1
	WORD $0xf631                   // xor    esi, esi
	JMP  LBB5_4

LBB5_1:
	LONG $0xffffb841; WORD $0xffff // mov    r8d, -1
	WORD $0xf631                   // xor    esi, esi
	JMP  LBB5_13

LBB5_6:
	WORD $0x8945; BYTE $0xcb // mov    r11d, r9d
	LONG $0xf8e38341         // and    r11d, -8
	LONG $0xf8438d49         // lea    rax, [r11 - 8]
	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
	LONG $0x03e8c149         // shr    r8, 3
	LONG $0x01c08349         // add    r8, 1
	WORD $0x8548; BYTE $0xc0 // test    rax, rax
	JE   LBB5_7
	WORD $0x894d; BYTE $0xc2 // mov    r10, r8
	LONG $0xfee28349         // and    r10, -2
	WORD $0xf749; BYTE $0xda // neg    r10
	LONG $0xc9ef0f66         // pxor    xmm1, xmm1
	LONG $0xc0760f66         // pcmpeqd    xmm0, xmm0
	WORD $0xc031             // xor    eax, eax
	LONG $0xd2760f66         // pcmpeqd    xmm2, xmm2
	LONG $0xdbef0f66         // pxor    xmm3, xmm3

LBB5_9:
	LONG $0x246f0ff3; BYTE $0x87   // movdqu    xmm4, oword [rdi + 4*rax]
	LONG $0x6c6f0ff3; WORD $0x1087 // movdqu    xmm5, oword [rdi + 4*rax + 16]
	LONG $0x746f0ff3; WORD $0x2087 // movdqu    xmm6, oword [rdi + 4*rax + 32]
	LONG $0x7c6f0ff3; WORD $0x3087 // movdqu    xmm7, oword [rdi + 4*rax + 48]
	LONG $0x3b380f66; BYTE $0xc4   // pminud    xmm0, xmm4
	LONG $0x3b380f66; BYTE $0xd5   // pminud    xmm2, xmm5
	LONG $0x3f380f66; BYTE $0xcc   // pmaxud    xmm1, xmm4
	LONG $0x3f380f66; BYTE $0xdd   // pmaxud    xmm3, xmm5
	LONG $0x3b380f66; BYTE $0xc6   // pminud    xmm0, xmm6
	LONG $0x3b380f66; BYTE $0xd7   // pminud    xmm2, xmm7
	LONG $0x3f380f66; BYTE $0xce   // pmaxud    xmm1, xmm6
	LONG $0x3f380f66; BYTE $0xdf   // pmaxud    xmm3, xmm7
	LONG $0x10c08348               // add    rax, 16
	LONG $0x02c28349               // add    r10, 2
	JNE  LBB5_9
	LONG $0x01c0f641               // test    r8b, 1
	JE   LBB5_12

LBB5_11:
	LONG $0x246f0ff3; BYTE $0x87   // movdqu    xmm4, oword [rdi + 4*rax]
	LONG $0x6c6f0ff3; WORD $0x1087 // movdqu    xmm5, oword [rdi + 4*rax + 16]
	LONG $0x3f380f66; BYTE $0xdd   // pmaxud    xmm3, xmm5
	LONG $0x3f380f66; BYTE $0xcc   // pmaxud    xmm1, xmm4
	LONG $0x3b380f66; BYTE $0xd5   // pminud    xmm2, xmm5
	LONG $0x3b380f66; BYTE $0xc4   // pminud    xmm0, xmm4

LBB5_12:
	LONG $0x3b380f66; BYTE $0xc2 // pminud    xmm0, xmm2
	LONG $0x3f380f66; BYTE $0xcb // pmaxud    xmm1, xmm3
	LONG $0xd1700f66; BYTE $0x4e // pshufd    xmm2, xmm1, 78
	LONG $0x3f380f66; BYTE $0xd1 // pmaxud    xmm2, xmm1
	LONG $0xca700f66; BYTE $0xe5 // pshufd    xmm1, xmm2, 229
	LONG $0x3f380f66; BYTE $0xca // pmaxud    xmm1, xmm2
	LONG $0xce7e0f66             // movd    esi, xmm1
	LONG $0xc8700f66; BYTE $0x4e // pshufd    xmm1, xmm0, 78
	LONG $0x3b380f66; BYTE $0xc8 // pminud    xmm1, xmm0
	LONG $0xc1700f66; BYTE $0xe5 // pshufd    xmm0, xmm1, 229
	LONG $0x3b380f66; BYTE $0xc1 // pminud    xmm0, xmm1
	LONG $0x7e0f4166; BYTE $0xc0 // movd    r8d, xmm0
	WORD $0x394d; BYTE $0xcb     // cmp    r11, r9
	JE   LBB5_13

LBB5_4:
	WORD $0xf089 // mov    eax, esi

LBB5_5:
	LONG $0x9f348b42         // mov    esi, dword [rdi + 4*r11]
	WORD $0x3941; BYTE $0xf0 // cmp    r8d, esi
	LONG $0xc6430f44         // cmovae    r8d, esi
	WORD $0xf039             // cmp    eax, esi
	WORD $0x470f; BYTE $0xf0 // cmova    esi, eax
	LONG $0x01c38349         // add    r11, 1
	WORD $0xf089             // mov    eax, esi
	WORD $0x394d; BYTE $0xd9 // cmp    r9, r11
	JNE  LBB5_5

LBB5_13:
	WORD $0x3189             // mov    dword [rcx], esi
	WORD $0x8944; BYTE $0x02 // mov    dword [rdx], r8d
	RET

LBB5_7:
	LONG $0xc9ef0f66 // pxor    xmm1, xmm1
	LONG $0xc0760f66 // pcmpeqd    xmm0, xmm0
	WORD $0xc031     // xor    eax, eax
	LONG $0xd2760f66 // pcmpeqd    xmm2, xmm2
	LONG $0xdbef0f66 // pxor    xmm3, xmm3
	LONG $0x01c0f641 // test    r8b, 1
	JNE  LBB5_11
	JMP  LBB5_12

DATA LCDATA4<>+0x000(SB)/8, $0x8000000000000000
DATA LCDATA4<>+0x008(SB)/8, $0x8000000000000000
DATA LCDATA4<>+0x010(SB)/8, $0x7fffffffffffffff
DATA LCDATA4<>+0x018(SB)/8, $0x7fffffffffffffff
GLOBL LCDATA4<>(SB), 8, $32

TEXT ·_int64_max_min_sse4(SB), $0-32

	MOVQ values+0(FP), DI
	MOVQ length+8(FP), SI
	MOVQ minout+16(FP), DX
	MOVQ maxout+24(FP), CX
	LEAQ LCDATA4<>(SB), BP

	QUAD $0xffffffffffffb849; WORD $0x7fff // mov    r8, 9223372036854775807
	WORD $0xf685                           // test    esi, esi
	JLE  LBB6_1
	WORD $0x8941; BYTE $0xf1               // mov    r9d, esi
	WORD $0xfe83; BYTE $0x03               // cmp    esi, 3
	JA   LBB6_6
	LONG $0x01708d49                       // lea    rsi, [r8 + 1]
	WORD $0x3145; BYTE $0xdb               // xor    r11d, r11d
	JMP  LBB6_4

LBB6_1:
	LONG $0x01708d49 // lea    rsi, [r8 + 1]
	JMP  LBB6_13

LBB6_6:
	WORD $0x8945; BYTE $0xcb       // mov    r11d, r9d
	LONG $0xfce38341               // and    r11d, -4
	LONG $0xfc438d49               // lea    rax, [r11 - 4]
	WORD $0x8949; BYTE $0xc0       // mov    r8, rax
	LONG $0x02e8c149               // shr    r8, 2
	LONG $0x01c08349               // add    r8, 1
	WORD $0x8548; BYTE $0xc0       // test    rax, rax
	JE   LBB6_7
	WORD $0x894d; BYTE $0xc2       // mov    r10, r8
	LONG $0xfee28349               // and    r10, -2
	WORD $0xf749; BYTE $0xda       // neg    r10
	LONG $0x6f0f4466; WORD $0x004d // movdqa    xmm9, oword 0[rbp] /* [rip + .LCPI6_0] */
	LONG $0x6f0f4466; WORD $0x1045 // movdqa    xmm8, oword 16[rbp] /* [rip + .LCPI6_1] */
	WORD $0xc031                   // xor    eax, eax
	LONG $0x6f0f4166; BYTE $0xd0   // movdqa    xmm2, xmm8
	LONG $0x6f0f4166; BYTE $0xf1   // movdqa    xmm6, xmm9

LBB6_9:
	LONG $0x3c6f0ff3; BYTE $0xc7   // movdqu    xmm7, oword [rdi + 8*rax]
	LONG $0xc76f0f66               // movdqa    xmm0, xmm7
	LONG $0x380f4166; WORD $0xc037 // pcmpgtq    xmm0, xmm8
	LONG $0xe76f0f66               // movdqa    xmm4, xmm7
	LONG $0x380f4166; WORD $0xe015 // blendvpd    xmm4, xmm8, xmm0
	LONG $0x4c6f0ff3; WORD $0x10c7 // movdqu    xmm1, oword [rdi + 8*rax + 16]
	LONG $0xc16f0f66               // movdqa    xmm0, xmm1
	LONG $0x37380f66; BYTE $0xc2   // pcmpgtq    xmm0, xmm2
	LONG $0xe96f0f66               // movdqa    xmm5, xmm1
	LONG $0x15380f66; BYTE $0xea   // blendvpd    xmm5, xmm2, xmm0
	LONG $0x6f0f4166; BYTE $0xc1   // movdqa    xmm0, xmm9
	LONG $0x37380f66; BYTE $0xc7   // pcmpgtq    xmm0, xmm7
	LONG $0x380f4166; WORD $0xf915 // blendvpd    xmm7, xmm9, xmm0
	LONG $0xc66f0f66               // movdqa    xmm0, xmm6
	LONG $0x37380f66; BYTE $0xc1   // pcmpgtq    xmm0, xmm1
	LONG $0x15380f66; BYTE $0xce   // blendvpd    xmm1, xmm6, xmm0
	LONG $0x5c6f0ff3; WORD $0x20c7 // movdqu    xmm3, oword [rdi + 8*rax + 32]
	LONG $0xc36f0f66               // movdqa    xmm0, xmm3
	LONG $0x37380f66; BYTE $0xc4   // pcmpgtq    xmm0, xmm4
	LONG $0x6f0f4466; BYTE $0xc3   // movdqa    xmm8, xmm3
	LONG $0x380f4466; WORD $0xc415 // blendvpd    xmm8, xmm4, xmm0
	LONG $0x646f0ff3; WORD $0x30c7 // movdqu    xmm4, oword [rdi + 8*rax + 48]
	LONG $0xc46f0f66               // movdqa    xmm0, xmm4
	LONG $0x37380f66; BYTE $0xc5   // pcmpgtq    xmm0, xmm5
	LONG $0xd46f0f66               // movdqa    xmm2, xmm4
	LONG $0x15380f66; BYTE $0xd5   // blendvpd    xmm2, xmm5, xmm0
	LONG $0xc7280f66               // movapd    xmm0, xmm7
	LONG $0x37380f66; BYTE $0xc3   // pcmpgtq    xmm0, xmm3
	LONG $0x15380f66; BYTE $0xdf   // blendvpd    xmm3, xmm7, xmm0
	LONG $0xc1280f66               // movapd    xmm0, xmm1
	LONG $0x37380f66; BYTE $0xc4   // pcmpgtq    xmm0, xmm4
	LONG $0x15380f66; BYTE $0xe1   // blendvpd    xmm4, xmm1, xmm0
	LONG $0x08c08348               // add    rax, 8
	LONG $0x280f4466; BYTE $0xcb   // movapd    xmm9, xmm3
	LONG $0xf4280f66               // movapd    xmm6, xmm4
	LONG $0x02c28349               // add    r10, 2
	JNE  LBB6_9
	LONG $0x01c0f641               // test    r8b, 1
	JE   LBB6_12

LBB6_11:
	LONG $0x4c6f0ff3; WORD $0x10c7 // movdqu    xmm1, oword [rdi + 8*rax + 16]
	LONG $0xc4280f66               // movapd    xmm0, xmm4
	LONG $0x37380f66; BYTE $0xc1   // pcmpgtq    xmm0, xmm1
	LONG $0xe96f0f66               // movdqa    xmm5, xmm1
	LONG $0x15380f66; BYTE $0xec   // blendvpd    xmm5, xmm4, xmm0
	LONG $0x246f0ff3; BYTE $0xc7   // movdqu    xmm4, oword [rdi + 8*rax]
	LONG $0xc3280f66               // movapd    xmm0, xmm3
	LONG $0x37380f66; BYTE $0xc4   // pcmpgtq    xmm0, xmm4
	LONG $0xf46f0f66               // movdqa    xmm6, xmm4
	LONG $0x15380f66; BYTE $0xf3   // blendvpd    xmm6, xmm3, xmm0
	LONG $0xc16f0f66               // movdqa    xmm0, xmm1
	LONG $0x37380f66; BYTE $0xc2   // pcmpgtq    xmm0, xmm2
	LONG $0x15380f66; BYTE $0xca   // blendvpd    xmm1, xmm2, xmm0
	LONG $0xc46f0f66               // movdqa    xmm0, xmm4
	LONG $0x380f4166; WORD $0xc037 // pcmpgtq    xmm0, xmm8
	LONG $0x380f4166; WORD $0xe015 // blendvpd    xmm4, xmm8, xmm0
	LONG $0x280f4466; BYTE $0xc4   // movapd    xmm8, xmm4
	LONG $0xd1280f66               // movapd    xmm2, xmm1
	LONG $0xde280f66               // movapd    xmm3, xmm6
	LONG $0xe5280f66               // movapd    xmm4, xmm5

LBB6_12:
	LONG $0xc3280f66               // movapd    xmm0, xmm3
	LONG $0x37380f66; BYTE $0xc4   // pcmpgtq    xmm0, xmm4
	LONG $0x15380f66; BYTE $0xe3   // blendvpd    xmm4, xmm3, xmm0
	LONG $0xcc700f66; BYTE $0x4e   // pshufd    xmm1, xmm4, 78
	LONG $0xc46f0f66               // movdqa    xmm0, xmm4
	LONG $0x37380f66; BYTE $0xc1   // pcmpgtq    xmm0, xmm1
	LONG $0x15380f66; BYTE $0xcc   // blendvpd    xmm1, xmm4, xmm0
	LONG $0x7e0f4866; BYTE $0xce   // movq    rsi, xmm1
	LONG $0xc26f0f66               // movdqa    xmm0, xmm2
	LONG $0x380f4166; WORD $0xc037 // pcmpgtq    xmm0, xmm8
	LONG $0x380f4166; WORD $0xd015 // blendvpd    xmm2, xmm8, xmm0
	LONG $0xca700f66; BYTE $0x4e   // pshufd    xmm1, xmm2, 78
	LONG $0xc16f0f66               // movdqa    xmm0, xmm1
	LONG $0x37380f66; BYTE $0xc2   // pcmpgtq    xmm0, xmm2
	LONG $0x15380f66; BYTE $0xca   // blendvpd    xmm1, xmm2, xmm0
	LONG $0x7e0f4966; BYTE $0xc8   // movq    r8, xmm1
	WORD $0x394d; BYTE $0xcb       // cmp    r11, r9
	JE   LBB6_13

LBB6_4:
	WORD $0x8948; BYTE $0xf0 // mov    rax, rsi

LBB6_5:
	LONG $0xdf348b4a         // mov    rsi, qword [rdi + 8*r11]
	WORD $0x3949; BYTE $0xf0 // cmp    r8, rsi
	LONG $0xc64f0f4c         // cmovg    r8, rsi
	WORD $0x3948; BYTE $0xf0 // cmp    rax, rsi
	LONG $0xf04d0f48         // cmovge    rsi, rax
	LONG $0x01c38349         // add    r11, 1
	WORD $0x8948; BYTE $0xf0 // mov    rax, rsi
	WORD $0x394d; BYTE $0xd9 // cmp    r9, r11
	JNE  LBB6_5

LBB6_13:
	WORD $0x8948; BYTE $0x31 // mov    qword [rcx], rsi
	WORD $0x894c; BYTE $0x02 // mov    qword [rdx], r8
	RET

LBB6_7:
	LONG $0x5d280f66; BYTE $0x00   // movapd    xmm3, oword 0[rbp] /* [rip + .LCPI6_0] */
	LONG $0x6f0f4466; WORD $0x1045 // movdqa    xmm8, oword 16[rbp] /* [rip + .LCPI6_1] */
	WORD $0xc031                   // xor    eax, eax
	LONG $0x6f0f4166; BYTE $0xd0   // movdqa    xmm2, xmm8
	LONG $0xe3280f66               // movapd    xmm4, xmm3
	LONG $0x01c0f641               // test    r8b, 1
	JNE  LBB6_11
	JMP  LBB6_12

DATA LCDATA5<>+0x000(SB)/8, $0x8000000000000000
DATA LCDATA5<>+0x008(SB)/8, $0x8000000000000000
GLOBL LCDATA5<>(SB), 8, $16

TEXT ·_uint64_max_min_sse4(SB), $0-32

	MOVQ values+0(FP), DI
	MOVQ length+8(FP), SI
	MOVQ minout+16(FP), DX
	MOVQ maxout+24(FP), CX
	LEAQ LCDATA5<>(SB), BP

	WORD $0xf685                               // test    esi, esi
	JLE  LBB7_1
	WORD $0x8941; BYTE $0xf1                   // mov    r9d, esi
	WORD $0xfe83; BYTE $0x03                   // cmp    esi, 3
	JA   LBB7_6
	LONG $0xffc0c749; WORD $0xffff; BYTE $0xff // mov    r8, -1
	WORD $0x3145; BYTE $0xdb                   // xor    r11d, r11d
	WORD $0xc031                               // xor    eax, eax
	JMP  LBB7_4

LBB7_1:
	LONG $0xffc0c749; WORD $0xffff; BYTE $0xff // mov    r8, -1
	WORD $0xc031                               // xor    eax, eax
	JMP  LBB7_13

LBB7_6:
	WORD $0x8945; BYTE $0xcb       // mov    r11d, r9d
	LONG $0xfce38341               // and    r11d, -4
	LONG $0xfc438d49               // lea    rax, [r11 - 4]
	WORD $0x8949; BYTE $0xc0       // mov    r8, rax
	LONG $0x02e8c149               // shr    r8, 2
	LONG $0x01c08349               // add    r8, 1
	WORD $0x8548; BYTE $0xc0       // test    rax, rax
	JE   LBB7_7
	WORD $0x894d; BYTE $0xc2       // mov    r10, r8
	LONG $0xfee28349               // and    r10, -2
	WORD $0xf749; BYTE $0xda       // neg    r10
	LONG $0xef0f4566; BYTE $0xc9   // pxor    xmm9, xmm9
	LONG $0x760f4566; BYTE $0xd2   // pcmpeqd    xmm10, xmm10
	WORD $0xc031                   // xor    eax, eax
	LONG $0x6f0f4466; WORD $0x0045 // movdqa    xmm8, oword 0[rbp] /* [rip + .LCPI7_0] */
	LONG $0x760f4566; BYTE $0xdb   // pcmpeqd    xmm11, xmm11
	LONG $0xef0f4566; BYTE $0xe4   // pxor    xmm12, xmm12

LBB7_9:
	LONG $0x6f0f4166; BYTE $0xd2               // movdqa    xmm2, xmm10
	LONG $0xef0f4166; BYTE $0xd0               // pxor    xmm2, xmm8
	LONG $0x246f0ff3; BYTE $0xc7               // movdqu    xmm4, oword [rdi + 8*rax]
	LONG $0x6c6f0ff3; WORD $0x10c7             // movdqu    xmm5, oword [rdi + 8*rax + 16]
	LONG $0x6f0f44f3; WORD $0xc76c; BYTE $0x20 // movdqu    xmm13, oword [rdi + 8*rax + 32]
	LONG $0xc46f0f66                           // movdqa    xmm0, xmm4
	LONG $0xef0f4166; BYTE $0xc0               // pxor    xmm0, xmm8
	LONG $0x6f0f4166; BYTE $0xc9               // movdqa    xmm1, xmm9
	LONG $0xef0f4166; BYTE $0xc8               // pxor    xmm1, xmm8
	LONG $0x37380f66; BYTE $0xc8               // pcmpgtq    xmm1, xmm0
	LONG $0x37380f66; BYTE $0xc2               // pcmpgtq    xmm0, xmm2
	LONG $0xdc6f0f66                           // movdqa    xmm3, xmm4
	LONG $0x380f4166; WORD $0xda15             // blendvpd    xmm3, xmm10, xmm0
	LONG $0x746f0ff3; WORD $0x30c7             // movdqu    xmm6, oword [rdi + 8*rax + 48]
	LONG $0x6f0f4166; BYTE $0xfb               // movdqa    xmm7, xmm11
	LONG $0xef0f4166; BYTE $0xf8               // pxor    xmm7, xmm8
	LONG $0xc56f0f66                           // movdqa    xmm0, xmm5
	LONG $0xef0f4166; BYTE $0xc0               // pxor    xmm0, xmm8
	LONG $0x6f0f4166; BYTE $0xd4               // movdqa    xmm2, xmm12
	LONG $0xef0f4166; BYTE $0xd0               // pxor    xmm2, xmm8
	LONG $0x37380f66; BYTE $0xd0               // pcmpgtq    xmm2, xmm0
	LONG $0x37380f66; BYTE $0xc7               // pcmpgtq    xmm0, xmm7
	LONG $0xfd6f0f66                           // movdqa    xmm7, xmm5
	LONG $0x380f4166; WORD $0xfb15             // blendvpd    xmm7, xmm11, xmm0
	LONG $0xc16f0f66                           // movdqa    xmm0, xmm1
	LONG $0x380f4166; WORD $0xe115             // blendvpd    xmm4, xmm9, xmm0
	LONG $0xc26f0f66                           // movdqa    xmm0, xmm2
	LONG $0x380f4166; WORD $0xec15             // blendvpd    xmm5, xmm12, xmm0
	LONG $0xd3280f66                           // movapd    xmm2, xmm3
	LONG $0x570f4166; BYTE $0xd0               // xorpd    xmm2, xmm8
	LONG $0x6f0f4166; BYTE $0xc5               // movdqa    xmm0, xmm13
	LONG $0xef0f4166; BYTE $0xc0               // pxor    xmm0, xmm8
	LONG $0xcc280f66                           // movapd    xmm1, xmm4
	LONG $0x570f4166; BYTE $0xc8               // xorpd    xmm1, xmm8
	LONG $0x37380f66; BYTE $0xc8               // pcmpgtq    xmm1, xmm0
	LONG $0x37380f66; BYTE $0xc2               // pcmpgtq    xmm0, xmm2
	LONG $0x6f0f4566; BYTE $0xd5               // movdqa    xmm10, xmm13
	LONG $0x380f4466; WORD $0xd315             // blendvpd    xmm10, xmm3, xmm0
	LONG $0xdf280f66                           // movapd    xmm3, xmm7
	LONG $0x570f4166; BYTE $0xd8               // xorpd    xmm3, xmm8
	LONG $0xc66f0f66                           // movdqa    xmm0, xmm6
	LONG $0xef0f4166; BYTE $0xc0               // pxor    xmm0, xmm8
	LONG $0xd5280f66                           // movapd    xmm2, xmm5
	LONG $0x570f4166; BYTE $0xd0               // xorpd    xmm2, xmm8
	LONG $0x37380f66; BYTE $0xd0               // pcmpgtq    xmm2, xmm0
	LONG $0x37380f66; BYTE $0xc3               // pcmpgtq    xmm0, xmm3
	LONG $0x6f0f4466; BYTE $0xde               // movdqa    xmm11, xmm6
	LONG $0x380f4466; WORD $0xdf15             // blendvpd    xmm11, xmm7, xmm0
	LONG $0xc16f0f66                           // movdqa    xmm0, xmm1
	LONG $0x380f4466; WORD $0xec15             // blendvpd    xmm13, xmm4, xmm0
	LONG $0xc26f0f66                           // movdqa    xmm0, xmm2
	LONG $0x15380f66; BYTE $0xf5               // blendvpd    xmm6, xmm5, xmm0
	LONG $0x08c08348                           // add    rax, 8
	LONG $0x280f4566; BYTE $0xcd               // movapd    xmm9, xmm13
	LONG $0x280f4466; BYTE $0xe6               // movapd    xmm12, xmm6
	LONG $0x02c28349                           // add    r10, 2
	JNE  LBB7_9
	LONG $0x01c0f641                           // test    r8b, 1
	JE   LBB7_12

LBB7_11:
	LONG $0x24100f66; BYTE $0xc7   // movupd    xmm4, oword [rdi + 8*rax]
	LONG $0x5c100f66; WORD $0x10c7 // movupd    xmm3, oword [rdi + 8*rax + 16]
	LONG $0x6d280f66; BYTE $0x00   // movapd    xmm5, oword 0[rbp] /* [rip + .LCPI7_0] */
	LONG $0xc6280f66               // movapd    xmm0, xmm6
	LONG $0xc5570f66               // xorpd    xmm0, xmm5
	LONG $0xcb280f66               // movapd    xmm1, xmm3
	LONG $0xcd570f66               // xorpd    xmm1, xmm5
	LONG $0x37380f66; BYTE $0xc1   // pcmpgtq    xmm0, xmm1
	LONG $0xfb280f66               // movapd    xmm7, xmm3
	LONG $0x15380f66; BYTE $0xfe   // blendvpd    xmm7, xmm6, xmm0
	LONG $0x280f4166; BYTE $0xc5   // movapd    xmm0, xmm13
	LONG $0xc5570f66               // xorpd    xmm0, xmm5
	LONG $0xd4280f66               // movapd    xmm2, xmm4
	LONG $0xd5570f66               // xorpd    xmm2, xmm5
	LONG $0x37380f66; BYTE $0xc2   // pcmpgtq    xmm0, xmm2
	LONG $0xf4280f66               // movapd    xmm6, xmm4
	LONG $0x380f4166; WORD $0xf515 // blendvpd    xmm6, xmm13, xmm0
	LONG $0x280f4166; BYTE $0xc3   // movapd    xmm0, xmm11
	LONG $0xc5570f66               // xorpd    xmm0, xmm5
	LONG $0x37380f66; BYTE $0xc8   // pcmpgtq    xmm1, xmm0
	LONG $0xc16f0f66               // movdqa    xmm0, xmm1
	LONG $0x380f4166; WORD $0xdb15 // blendvpd    xmm3, xmm11, xmm0
	LONG $0x570f4166; BYTE $0xea   // xorpd    xmm5, xmm10
	LONG $0x37380f66; BYTE $0xd5   // pcmpgtq    xmm2, xmm5
	LONG $0xc26f0f66               // movdqa    xmm0, xmm2
	LONG $0x380f4166; WORD $0xe215 // blendvpd    xmm4, xmm10, xmm0
	LONG $0x280f4466; BYTE $0xd4   // movapd    xmm10, xmm4
	LONG $0x280f4466; BYTE $0xdb   // movapd    xmm11, xmm3
	LONG $0x280f4466; BYTE $0xee   // movapd    xmm13, xmm6
	LONG $0xf7280f66               // movapd    xmm6, xmm7

LBB7_12:
	LONG $0x4d280f66; BYTE $0x00   // movapd    xmm1, oword 0[rbp] /* [rip + .LCPI7_0] */
	LONG $0xd6280f66               // movapd    xmm2, xmm6
	LONG $0xd1570f66               // xorpd    xmm2, xmm1
	LONG $0x280f4166; BYTE $0xc5   // movapd    xmm0, xmm13
	LONG $0xc1570f66               // xorpd    xmm0, xmm1
	LONG $0x37380f66; BYTE $0xc2   // pcmpgtq    xmm0, xmm2
	LONG $0x380f4166; WORD $0xf515 // blendvpd    xmm6, xmm13, xmm0
	LONG $0xd6700f66; BYTE $0x4e   // pshufd    xmm2, xmm6, 78
	LONG $0xc6280f66               // movapd    xmm0, xmm6
	LONG $0xc1570f66               // xorpd    xmm0, xmm1
	LONG $0xda6f0f66               // movdqa    xmm3, xmm2
	LONG $0xd9ef0f66               // pxor    xmm3, xmm1
	LONG $0x37380f66; BYTE $0xc3   // pcmpgtq    xmm0, xmm3
	LONG $0x15380f66; BYTE $0xd6   // blendvpd    xmm2, xmm6, xmm0
	LONG $0x7e0f4866; BYTE $0xd0   // movq    rax, xmm2
	LONG $0x6f0f4166; BYTE $0xd2   // movdqa    xmm2, xmm10
	LONG $0xd1ef0f66               // pxor    xmm2, xmm1
	LONG $0x6f0f4166; BYTE $0xc3   // movdqa    xmm0, xmm11
	LONG $0xc1ef0f66               // pxor    xmm0, xmm1
	LONG $0x37380f66; BYTE $0xc2   // pcmpgtq    xmm0, xmm2
	LONG $0x380f4566; WORD $0xda15 // blendvpd    xmm11, xmm10, xmm0
	LONG $0x700f4166; WORD $0x4ed3 // pshufd    xmm2, xmm11, 78
	LONG $0x6f0f4166; BYTE $0xc3   // movdqa    xmm0, xmm11
	LONG $0xc1ef0f66               // pxor    xmm0, xmm1
	LONG $0xcaef0f66               // pxor    xmm1, xmm2
	LONG $0x37380f66; BYTE $0xc8   // pcmpgtq    xmm1, xmm0
	LONG $0xc16f0f66               // movdqa    xmm0, xmm1
	LONG $0x380f4166; WORD $0xd315 // blendvpd    xmm2, xmm11, xmm0
	LONG $0x7e0f4966; BYTE $0xd0   // movq    r8, xmm2
	WORD $0x394d; BYTE $0xcb       // cmp    r11, r9
	JE   LBB7_13

LBB7_4:
	WORD $0x8948; BYTE $0xc6 // mov    rsi, rax

LBB7_5:
	LONG $0xdf048b4a         // mov    rax, qword [rdi + 8*r11]
	WORD $0x3949; BYTE $0xc0 // cmp    r8, rax
	LONG $0xc0430f4c         // cmovae    r8, rax
	WORD $0x3948; BYTE $0xc6 // cmp    rsi, rax
	LONG $0xc6470f48         // cmova    rax, rsi
	LONG $0x01c38349         // add    r11, 1
	WORD $0x8948; BYTE $0xc6 // mov    rsi, rax
	WORD $0x394d; BYTE $0xd9 // cmp    r9, r11
	JNE  LBB7_5

LBB7_13:
	WORD $0x8948; BYTE $0x01 // mov    qword [rcx], rax
	WORD $0x894c; BYTE $0x02 // mov    qword [rdx], r8
	RET

LBB7_7:
	LONG $0x570f4566; BYTE $0xed // xorpd    xmm13, xmm13
	LONG $0x760f4566; BYTE $0xd2 // pcmpeqd    xmm10, xmm10
	WORD $0xc031                 // xor    eax, eax
	LONG $0x760f4566; BYTE $0xdb // pcmpeqd    xmm11, xmm11
	LONG $0xf6570f66             // xorpd    xmm6, xmm6
	LONG $0x01c0f641             // test    r8b, 1
	JNE  LBB7_11
	JMP  LBB7_12