File: min_max_avx2_amd64.s

package info (click to toggle)
golang-github-apache-arrow-go 18.2.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 32,200 kB
  • sloc: asm: 477,547; ansic: 5,369; cpp: 759; sh: 585; makefile: 319; python: 190; sed: 5
file content (927 lines) | stat: -rw-r--r-- 41,397 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
//+build !noasm !appengine
// AUTO-GENERATED BY C2GOASM -- DO NOT EDIT

DATA LCDATA1<>+0x000(SB)/8, $0x8080808080808080
DATA LCDATA1<>+0x008(SB)/8, $0x8080808080808080
DATA LCDATA1<>+0x010(SB)/8, $0x8080808080808080
DATA LCDATA1<>+0x018(SB)/8, $0x8080808080808080
DATA LCDATA1<>+0x020(SB)/8, $0x7f7f7f7f7f7f7f7f
DATA LCDATA1<>+0x028(SB)/8, $0x7f7f7f7f7f7f7f7f
DATA LCDATA1<>+0x030(SB)/8, $0x7f7f7f7f7f7f7f7f
DATA LCDATA1<>+0x038(SB)/8, $0x7f7f7f7f7f7f7f7f
DATA LCDATA1<>+0x040(SB)/8, $0x7f7f7f7f7f7f7f7f
DATA LCDATA1<>+0x048(SB)/8, $0x7f7f7f7f7f7f7f7f
DATA LCDATA1<>+0x050(SB)/8, $0x8080808080808080
DATA LCDATA1<>+0x058(SB)/8, $0x8080808080808080
GLOBL LCDATA1<>(SB), 8, $96

TEXT ·_int8_max_min_avx2(SB), $8-32

	MOVQ values+0(FP), DI
	MOVQ length+8(FP), SI
	MOVQ minout+16(FP), DX
	MOVQ maxout+24(FP), CX
	LEAQ LCDATA1<>(SB), BP

	WORD $0xf685             // test    esi, esi
	JLE  LBB0_1
	WORD $0x8941; BYTE $0xf1 // mov    r9d, esi
	WORD $0xfe83; BYTE $0x3f // cmp    esi, 63
	JA   LBB0_4
	WORD $0xb041; BYTE $0x80 // mov    r8b, -128
	WORD $0xb640; BYTE $0x7f // mov    sil, 127
	WORD $0x3145; BYTE $0xd2 // xor    r10d, r10d
	JMP  LBB0_11

LBB0_1:
	WORD $0xb640; BYTE $0x7f // mov    sil, 127
	WORD $0xb041; BYTE $0x80 // mov    r8b, -128
	JMP  LBB0_12

LBB0_4:
	WORD $0x8945; BYTE $0xca     // mov    r10d, r9d
	LONG $0xc0e28341             // and    r10d, -64
	LONG $0xc0428d49             // lea    rax, [r10 - 64]
	WORD $0x8949; BYTE $0xc0     // mov    r8, rax
	LONG $0x06e8c149             // shr    r8, 6
	LONG $0x01c08349             // add    r8, 1
	WORD $0x8548; BYTE $0xc0     // test    rax, rax
	JE   LBB0_5
	WORD $0x894c; BYTE $0xc6     // mov    rsi, r8
	LONG $0xfee68348             // and    rsi, -2
	WORD $0xf748; BYTE $0xde     // neg    rsi
	LONG $0x4d6ffdc5; BYTE $0x00 // vmovdqa    ymm1, yword 0[rbp] /* [rip + .LCPI0_0] */
	LONG $0x456ffdc5; BYTE $0x20 // vmovdqa    ymm0, yword 32[rbp] /* [rip + .LCPI0_1] */
	WORD $0xc031                 // xor    eax, eax
	LONG $0xd06ffdc5             // vmovdqa    ymm2, ymm0
	LONG $0xd96ffdc5             // vmovdqa    ymm3, ymm1

LBB0_7:
	LONG $0x246ffec5; BYTE $0x07   // vmovdqu    ymm4, yword [rdi + rax]
	LONG $0x6c6ffec5; WORD $0x2007 // vmovdqu    ymm5, yword [rdi + rax + 32]
	LONG $0x746ffec5; WORD $0x4007 // vmovdqu    ymm6, yword [rdi + rax + 64]
	LONG $0x7c6ffec5; WORD $0x6007 // vmovdqu    ymm7, yword [rdi + rax + 96]
	LONG $0x387de2c4; BYTE $0xc4   // vpminsb    ymm0, ymm0, ymm4
	LONG $0x386de2c4; BYTE $0xd5   // vpminsb    ymm2, ymm2, ymm5
	LONG $0x3c75e2c4; BYTE $0xcc   // vpmaxsb    ymm1, ymm1, ymm4
	LONG $0x3c65e2c4; BYTE $0xdd   // vpmaxsb    ymm3, ymm3, ymm5
	LONG $0x387de2c4; BYTE $0xc6   // vpminsb    ymm0, ymm0, ymm6
	LONG $0x386de2c4; BYTE $0xd7   // vpminsb    ymm2, ymm2, ymm7
	LONG $0x3c75e2c4; BYTE $0xce   // vpmaxsb    ymm1, ymm1, ymm6
	LONG $0x3c65e2c4; BYTE $0xdf   // vpmaxsb    ymm3, ymm3, ymm7
	LONG $0x80e88348               // sub    rax, -128
	LONG $0x02c68348               // add    rsi, 2
	JNE  LBB0_7
	LONG $0x01c0f641               // test    r8b, 1
	JE   LBB0_10

LBB0_9:
	LONG $0x246ffec5; BYTE $0x07   // vmovdqu    ymm4, yword [rdi + rax]
	LONG $0x6c6ffec5; WORD $0x2007 // vmovdqu    ymm5, yword [rdi + rax + 32]
	LONG $0x3c65e2c4; BYTE $0xdd   // vpmaxsb    ymm3, ymm3, ymm5
	LONG $0x3c75e2c4; BYTE $0xcc   // vpmaxsb    ymm1, ymm1, ymm4
	LONG $0x386de2c4; BYTE $0xd5   // vpminsb    ymm2, ymm2, ymm5
	LONG $0x387de2c4; BYTE $0xc4   // vpminsb    ymm0, ymm0, ymm4

LBB0_10:
	LONG $0x3c75e2c4; BYTE $0xcb   // vpmaxsb    ymm1, ymm1, ymm3
	LONG $0x397de3c4; WORD $0x01cb // vextracti128    xmm3, ymm1, 1
	LONG $0x3c71e2c4; BYTE $0xcb   // vpmaxsb    xmm1, xmm1, xmm3
	LONG $0x4deff1c5; BYTE $0x40   // vpxor    xmm1, xmm1, oword 64[rbp] /* [rip + .LCPI0_2] */
	LONG $0x387de2c4; BYTE $0xc2   // vpminsb    ymm0, ymm0, ymm2
	LONG $0xd171e9c5; BYTE $0x08   // vpsrlw    xmm2, xmm1, 8
	LONG $0xcadaf1c5               // vpminub    xmm1, xmm1, xmm2
	LONG $0x4179e2c4; BYTE $0xc9   // vphminposuw    xmm1, xmm1
	LONG $0x7e79c1c4; BYTE $0xc8   // vmovd    r8d, xmm1
	LONG $0x7ff08041               // xor    r8b, 127
	LONG $0x397de3c4; WORD $0x01c1 // vextracti128    xmm1, ymm0, 1
	LONG $0x3879e2c4; BYTE $0xc1   // vpminsb    xmm0, xmm0, xmm1
	LONG $0x45eff9c5; BYTE $0x50   // vpxor    xmm0, xmm0, oword 80[rbp] /* [rip + .LCPI0_3] */
	LONG $0xd071f1c5; BYTE $0x08   // vpsrlw    xmm1, xmm0, 8
	LONG $0xc1daf9c5               // vpminub    xmm0, xmm0, xmm1
	LONG $0x4179e2c4; BYTE $0xc0   // vphminposuw    xmm0, xmm0
	LONG $0xc67ef9c5               // vmovd    esi, xmm0
	LONG $0x80f68040               // xor    sil, -128
	WORD $0x394d; BYTE $0xca       // cmp    r10, r9
	JE   LBB0_12

LBB0_11:
	LONG $0x04b60f42; BYTE $0x17 // movzx    eax, byte [rdi + r10]
	WORD $0x3840; BYTE $0xc6     // cmp    sil, al
	LONG $0xf6b60f40             // movzx    esi, sil
	WORD $0x4f0f; BYTE $0xf0     // cmovg    esi, eax
	WORD $0x3841; BYTE $0xc0     // cmp    r8b, al
	LONG $0xc0b60f45             // movzx    r8d, r8b
	LONG $0xc04c0f44             // cmovl    r8d, eax
	LONG $0x01c28349             // add    r10, 1
	WORD $0x394d; BYTE $0xd1     // cmp    r9, r10
	JNE  LBB0_11

LBB0_12:
	WORD $0x8844; BYTE $0x01 // mov    byte [rcx], r8b
	WORD $0x8840; BYTE $0x32 // mov    byte [rdx], sil
	VZEROUPPER
	RET

LBB0_5:
	LONG $0x4d6ffdc5; BYTE $0x00 // vmovdqa    ymm1, yword 0[rbp] /* [rip + .LCPI0_0] */
	LONG $0x456ffdc5; BYTE $0x20 // vmovdqa    ymm0, yword 32[rbp] /* [rip + .LCPI0_1] */
	WORD $0xc031                 // xor    eax, eax
	LONG $0xd06ffdc5             // vmovdqa    ymm2, ymm0
	LONG $0xd96ffdc5             // vmovdqa    ymm3, ymm1
	LONG $0x01c0f641             // test    r8b, 1
	JNE  LBB0_9
	JMP  LBB0_10

TEXT ·_uint8_max_min_avx2(SB), NOSPLIT, $0-32

	MOVQ values+0(FP), DI
	MOVQ length+8(FP), SI
	MOVQ minout+16(FP), DX
	MOVQ maxout+24(FP), CX

	WORD $0xf685             // test    esi, esi
	JLE  LBB1_1
	WORD $0x8941; BYTE $0xf1 // mov    r9d, esi
	WORD $0xfe83; BYTE $0x3f // cmp    esi, 63
	JA   LBB1_4
	WORD $0xb640; BYTE $0xff // mov    sil, -1
	WORD $0x3145; BYTE $0xd2 // xor    r10d, r10d
	WORD $0xc031             // xor    eax, eax
	JMP  LBB1_11

LBB1_1:
	WORD $0xb640; BYTE $0xff // mov    sil, -1
	WORD $0xc031             // xor    eax, eax
	JMP  LBB1_12

LBB1_4:
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0xc0e28341         // and    r10d, -64
	LONG $0xc0428d49         // lea    rax, [r10 - 64]
	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
	LONG $0x06e8c149         // shr    r8, 6
	LONG $0x01c08349         // add    r8, 1
	WORD $0x8548; BYTE $0xc0 // test    rax, rax
	JE   LBB1_5
	WORD $0x894c; BYTE $0xc6 // mov    rsi, r8
	LONG $0xfee68348         // and    rsi, -2
	WORD $0xf748; BYTE $0xde // neg    rsi
	LONG $0xc0eff9c5         // vpxor    xmm0, xmm0, xmm0
	LONG $0xc976f5c5         // vpcmpeqd    ymm1, ymm1, ymm1
	WORD $0xc031             // xor    eax, eax
	LONG $0xd276edc5         // vpcmpeqd    ymm2, ymm2, ymm2
	LONG $0xdbefe1c5         // vpxor    xmm3, xmm3, xmm3

LBB1_7:
	LONG $0x246ffec5; BYTE $0x07   // vmovdqu    ymm4, yword [rdi + rax]
	LONG $0x6c6ffec5; WORD $0x2007 // vmovdqu    ymm5, yword [rdi + rax + 32]
	LONG $0x746ffec5; WORD $0x4007 // vmovdqu    ymm6, yword [rdi + rax + 64]
	LONG $0x7c6ffec5; WORD $0x6007 // vmovdqu    ymm7, yword [rdi + rax + 96]
	LONG $0xccdaf5c5               // vpminub    ymm1, ymm1, ymm4
	LONG $0xd5daedc5               // vpminub    ymm2, ymm2, ymm5
	LONG $0xc4defdc5               // vpmaxub    ymm0, ymm0, ymm4
	LONG $0xdddee5c5               // vpmaxub    ymm3, ymm3, ymm5
	LONG $0xcedaf5c5               // vpminub    ymm1, ymm1, ymm6
	LONG $0xd7daedc5               // vpminub    ymm2, ymm2, ymm7
	LONG $0xc6defdc5               // vpmaxub    ymm0, ymm0, ymm6
	LONG $0xdfdee5c5               // vpmaxub    ymm3, ymm3, ymm7
	LONG $0x80e88348               // sub    rax, -128
	LONG $0x02c68348               // add    rsi, 2
	JNE  LBB1_7
	LONG $0x01c0f641               // test    r8b, 1
	JE   LBB1_10

LBB1_9:
	LONG $0x246ffec5; BYTE $0x07   // vmovdqu    ymm4, yword [rdi + rax]
	LONG $0x6c6ffec5; WORD $0x2007 // vmovdqu    ymm5, yword [rdi + rax + 32]
	LONG $0xdddee5c5               // vpmaxub    ymm3, ymm3, ymm5
	LONG $0xc4defdc5               // vpmaxub    ymm0, ymm0, ymm4
	LONG $0xd5daedc5               // vpminub    ymm2, ymm2, ymm5
	LONG $0xccdaf5c5               // vpminub    ymm1, ymm1, ymm4

LBB1_10:
	LONG $0xcadaf5c5               // vpminub    ymm1, ymm1, ymm2
	LONG $0xc3defdc5               // vpmaxub    ymm0, ymm0, ymm3
	LONG $0x397de3c4; WORD $0x01c2 // vextracti128    xmm2, ymm0, 1
	LONG $0xc2def9c5               // vpmaxub    xmm0, xmm0, xmm2
	LONG $0xd276e9c5               // vpcmpeqd    xmm2, xmm2, xmm2
	LONG $0xc2eff9c5               // vpxor    xmm0, xmm0, xmm2
	LONG $0xd071e9c5; BYTE $0x08   // vpsrlw    xmm2, xmm0, 8
	LONG $0xc2daf9c5               // vpminub    xmm0, xmm0, xmm2
	LONG $0x4179e2c4; BYTE $0xc0   // vphminposuw    xmm0, xmm0
	LONG $0xc07ef9c5               // vmovd    eax, xmm0
	WORD $0xd0f6                   // not    al
	LONG $0x397de3c4; WORD $0x01c8 // vextracti128    xmm0, ymm1, 1
	LONG $0xc0daf1c5               // vpminub    xmm0, xmm1, xmm0
	LONG $0xd071f1c5; BYTE $0x08   // vpsrlw    xmm1, xmm0, 8
	LONG $0xc1daf9c5               // vpminub    xmm0, xmm0, xmm1
	LONG $0x4179e2c4; BYTE $0xc0   // vphminposuw    xmm0, xmm0
	LONG $0xc67ef9c5               // vmovd    esi, xmm0
	WORD $0x394d; BYTE $0xca       // cmp    r10, r9
	JE   LBB1_12

LBB1_11:
	LONG $0x04b60f46; BYTE $0x17 // movzx    r8d, byte [rdi + r10]
	WORD $0x3844; BYTE $0xc6     // cmp    sil, r8b
	LONG $0xf6b60f40             // movzx    esi, sil
	LONG $0xf0430f41             // cmovae    esi, r8d
	WORD $0x3844; BYTE $0xc0     // cmp    al, r8b
	WORD $0xb60f; BYTE $0xc0     // movzx    eax, al
	LONG $0xc0460f41             // cmovbe    eax, r8d
	LONG $0x01c28349             // add    r10, 1
	WORD $0x394d; BYTE $0xd1     // cmp    r9, r10
	JNE  LBB1_11

LBB1_12:
	WORD $0x0188             // mov    byte [rcx], al
	WORD $0x8840; BYTE $0x32 // mov    byte [rdx], sil
	VZEROUPPER
	RET

LBB1_5:
	LONG $0xc0eff9c5 // vpxor    xmm0, xmm0, xmm0
	LONG $0xc976f5c5 // vpcmpeqd    ymm1, ymm1, ymm1
	WORD $0xc031     // xor    eax, eax
	LONG $0xd276edc5 // vpcmpeqd    ymm2, ymm2, ymm2
	LONG $0xdbefe1c5 // vpxor    xmm3, xmm3, xmm3
	LONG $0x01c0f641 // test    r8b, 1
	JNE  LBB1_9
	JMP  LBB1_10

DATA LCDATA2<>+0x000(SB)/8, $0x8000800080008000
DATA LCDATA2<>+0x008(SB)/8, $0x8000800080008000
DATA LCDATA2<>+0x010(SB)/8, $0x8000800080008000
DATA LCDATA2<>+0x018(SB)/8, $0x8000800080008000
DATA LCDATA2<>+0x020(SB)/8, $0x7fff7fff7fff7fff
DATA LCDATA2<>+0x028(SB)/8, $0x7fff7fff7fff7fff
DATA LCDATA2<>+0x030(SB)/8, $0x7fff7fff7fff7fff
DATA LCDATA2<>+0x038(SB)/8, $0x7fff7fff7fff7fff
DATA LCDATA2<>+0x040(SB)/8, $0x7fff7fff7fff7fff
DATA LCDATA2<>+0x048(SB)/8, $0x7fff7fff7fff7fff
DATA LCDATA2<>+0x050(SB)/8, $0x8000800080008000
DATA LCDATA2<>+0x058(SB)/8, $0x8000800080008000
GLOBL LCDATA2<>(SB), 8, $96

TEXT ·_int16_max_min_avx2(SB), NOSPLIT, $8-32

	MOVQ values+0(FP), DI
	MOVQ length+8(FP), SI
	MOVQ minout+16(FP), DX
	MOVQ maxout+24(FP), CX
	LEAQ LCDATA2<>(SB), BP

	WORD $0xf685                 // test    esi, esi
	JLE  LBB2_1
	WORD $0x8941; BYTE $0xf1     // mov    r9d, esi
	WORD $0xfe83; BYTE $0x1f     // cmp    esi, 31
	JA   LBB2_4
	LONG $0x00b84166; BYTE $0x80 // mov    r8w, -32768
	LONG $0x7fffbe66             // mov    si, 32767
	WORD $0x3145; BYTE $0xd2     // xor    r10d, r10d
	JMP  LBB2_11

LBB2_1:
	LONG $0x7fffbe66             // mov    si, 32767
	LONG $0x00b84166; BYTE $0x80 // mov    r8w, -32768
	JMP  LBB2_12

LBB2_4:
	WORD $0x8945; BYTE $0xca     // mov    r10d, r9d
	LONG $0xe0e28341             // and    r10d, -32
	LONG $0xe0428d49             // lea    rax, [r10 - 32]
	WORD $0x8949; BYTE $0xc0     // mov    r8, rax
	LONG $0x05e8c149             // shr    r8, 5
	LONG $0x01c08349             // add    r8, 1
	WORD $0x8548; BYTE $0xc0     // test    rax, rax
	JE   LBB2_5
	WORD $0x894c; BYTE $0xc6     // mov    rsi, r8
	LONG $0xfee68348             // and    rsi, -2
	WORD $0xf748; BYTE $0xde     // neg    rsi
	LONG $0x4d6ffdc5; BYTE $0x00 // vmovdqa    ymm1, yword 0[rbp] /* [rip + .LCPI2_0] */
	LONG $0x456ffdc5; BYTE $0x20 // vmovdqa    ymm0, yword 32[rbp] /* [rip + .LCPI2_1] */
	WORD $0xc031                 // xor    eax, eax
	LONG $0xd06ffdc5             // vmovdqa    ymm2, ymm0
	LONG $0xd96ffdc5             // vmovdqa    ymm3, ymm1

LBB2_7:
	LONG $0x246ffec5; BYTE $0x47   // vmovdqu    ymm4, yword [rdi + 2*rax]
	LONG $0x6c6ffec5; WORD $0x2047 // vmovdqu    ymm5, yword [rdi + 2*rax + 32]
	LONG $0x746ffec5; WORD $0x4047 // vmovdqu    ymm6, yword [rdi + 2*rax + 64]
	LONG $0x7c6ffec5; WORD $0x6047 // vmovdqu    ymm7, yword [rdi + 2*rax + 96]
	LONG $0xc4eafdc5               // vpminsw    ymm0, ymm0, ymm4
	LONG $0xd5eaedc5               // vpminsw    ymm2, ymm2, ymm5
	LONG $0xcceef5c5               // vpmaxsw    ymm1, ymm1, ymm4
	LONG $0xddeee5c5               // vpmaxsw    ymm3, ymm3, ymm5
	LONG $0xc6eafdc5               // vpminsw    ymm0, ymm0, ymm6
	LONG $0xd7eaedc5               // vpminsw    ymm2, ymm2, ymm7
	LONG $0xceeef5c5               // vpmaxsw    ymm1, ymm1, ymm6
	LONG $0xdfeee5c5               // vpmaxsw    ymm3, ymm3, ymm7
	LONG $0x40c08348               // add    rax, 64
	LONG $0x02c68348               // add    rsi, 2
	JNE  LBB2_7
	LONG $0x01c0f641               // test    r8b, 1
	JE   LBB2_10

LBB2_9:
	LONG $0x246ffec5; BYTE $0x47   // vmovdqu    ymm4, yword [rdi + 2*rax]
	LONG $0x6c6ffec5; WORD $0x2047 // vmovdqu    ymm5, yword [rdi + 2*rax + 32]
	LONG $0xddeee5c5               // vpmaxsw    ymm3, ymm3, ymm5
	LONG $0xcceef5c5               // vpmaxsw    ymm1, ymm1, ymm4
	LONG $0xd5eaedc5               // vpminsw    ymm2, ymm2, ymm5
	LONG $0xc4eafdc5               // vpminsw    ymm0, ymm0, ymm4

LBB2_10:
	LONG $0xcbeef5c5                           // vpmaxsw    ymm1, ymm1, ymm3
	LONG $0x397de3c4; WORD $0x01cb             // vextracti128    xmm3, ymm1, 1
	LONG $0xcbeef1c5                           // vpmaxsw    xmm1, xmm1, xmm3
	LONG $0x4deff1c5; BYTE $0x40               // vpxor    xmm1, xmm1, oword 64[rbp] /* [rip + .LCPI2_2] */
	LONG $0xc2eafdc5                           // vpminsw    ymm0, ymm0, ymm2
	LONG $0x4179e2c4; BYTE $0xc9               // vphminposuw    xmm1, xmm1
	LONG $0x7e79c1c4; BYTE $0xc8               // vmovd    r8d, xmm1
	LONG $0xfff08141; WORD $0x007f; BYTE $0x00 // xor    r8d, 32767
	LONG $0x397de3c4; WORD $0x01c1             // vextracti128    xmm1, ymm0, 1
	LONG $0xc1eaf9c5                           // vpminsw    xmm0, xmm0, xmm1
	LONG $0x45eff9c5; BYTE $0x50               // vpxor    xmm0, xmm0, oword 80[rbp] /* [rip + .LCPI2_3] */
	LONG $0x4179e2c4; BYTE $0xc0               // vphminposuw    xmm0, xmm0
	LONG $0xc67ef9c5                           // vmovd    esi, xmm0
	LONG $0x8000f681; WORD $0x0000             // xor    esi, 32768
	WORD $0x394d; BYTE $0xca                   // cmp    r10, r9
	JE   LBB2_12

LBB2_11:
	LONG $0x04b70f42; BYTE $0x57 // movzx    eax, word [rdi + 2*r10]
	WORD $0x3966; BYTE $0xc6     // cmp    si, ax
	WORD $0x4f0f; BYTE $0xf0     // cmovg    esi, eax
	LONG $0xc0394166             // cmp    r8w, ax
	LONG $0xc04c0f44             // cmovl    r8d, eax
	LONG $0x01c28349             // add    r10, 1
	WORD $0x394d; BYTE $0xd1     // cmp    r9, r10
	JNE  LBB2_11

LBB2_12:
	LONG $0x01894466         // mov    word [rcx], r8w
	WORD $0x8966; BYTE $0x32 // mov    word [rdx], si
	VZEROUPPER
	RET

LBB2_5:
	LONG $0x4d6ffdc5; BYTE $0x00 // vmovdqa    ymm1, yword 0[rbp] /* [rip + .LCPI2_0] */
	LONG $0x456ffdc5; BYTE $0x20 // vmovdqa    ymm0, yword 32[rbp] /* [rip + .LCPI2_1] */
	WORD $0xc031                 // xor    eax, eax
	LONG $0xd06ffdc5             // vmovdqa    ymm2, ymm0
	LONG $0xd96ffdc5             // vmovdqa    ymm3, ymm1
	LONG $0x01c0f641             // test    r8b, 1
	JNE  LBB2_9
	JMP  LBB2_10

TEXT ·_uint16_max_min_avx2(SB), $0-32

	MOVQ values+0(FP), DI
	MOVQ length+8(FP), SI
	MOVQ minout+16(FP), DX
	MOVQ maxout+24(FP), CX

	WORD $0xf685                 // test    esi, esi
	JLE  LBB3_1
	WORD $0x8941; BYTE $0xf1     // mov    r9d, esi
	WORD $0xfe83; BYTE $0x1f     // cmp    esi, 31
	JA   LBB3_4
	LONG $0xffb84166; BYTE $0xff // mov    r8w, -1
	WORD $0x3145; BYTE $0xd2     // xor    r10d, r10d
	WORD $0xf631                 // xor    esi, esi
	JMP  LBB3_11

LBB3_1:
	LONG $0xffb84166; BYTE $0xff // mov    r8w, -1
	WORD $0xf631                 // xor    esi, esi
	JMP  LBB3_12

LBB3_4:
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0xe0e28341         // and    r10d, -32
	LONG $0xe0428d49         // lea    rax, [r10 - 32]
	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
	LONG $0x05e8c149         // shr    r8, 5
	LONG $0x01c08349         // add    r8, 1
	WORD $0x8548; BYTE $0xc0 // test    rax, rax
	JE   LBB3_5
	WORD $0x894c; BYTE $0xc6 // mov    rsi, r8
	LONG $0xfee68348         // and    rsi, -2
	WORD $0xf748; BYTE $0xde // neg    rsi
	LONG $0xc0eff9c5         // vpxor    xmm0, xmm0, xmm0
	LONG $0xc976f5c5         // vpcmpeqd    ymm1, ymm1, ymm1
	WORD $0xc031             // xor    eax, eax
	LONG $0xd276edc5         // vpcmpeqd    ymm2, ymm2, ymm2
	LONG $0xdbefe1c5         // vpxor    xmm3, xmm3, xmm3

LBB3_7:
	LONG $0x246ffec5; BYTE $0x47   // vmovdqu    ymm4, yword [rdi + 2*rax]
	LONG $0x6c6ffec5; WORD $0x2047 // vmovdqu    ymm5, yword [rdi + 2*rax + 32]
	LONG $0x746ffec5; WORD $0x4047 // vmovdqu    ymm6, yword [rdi + 2*rax + 64]
	LONG $0x7c6ffec5; WORD $0x6047 // vmovdqu    ymm7, yword [rdi + 2*rax + 96]
	LONG $0x3a75e2c4; BYTE $0xcc   // vpminuw    ymm1, ymm1, ymm4
	LONG $0x3a6de2c4; BYTE $0xd5   // vpminuw    ymm2, ymm2, ymm5
	LONG $0x3e7de2c4; BYTE $0xc4   // vpmaxuw    ymm0, ymm0, ymm4
	LONG $0x3e65e2c4; BYTE $0xdd   // vpmaxuw    ymm3, ymm3, ymm5
	LONG $0x3a75e2c4; BYTE $0xce   // vpminuw    ymm1, ymm1, ymm6
	LONG $0x3a6de2c4; BYTE $0xd7   // vpminuw    ymm2, ymm2, ymm7
	LONG $0x3e7de2c4; BYTE $0xc6   // vpmaxuw    ymm0, ymm0, ymm6
	LONG $0x3e65e2c4; BYTE $0xdf   // vpmaxuw    ymm3, ymm3, ymm7
	LONG $0x40c08348               // add    rax, 64
	LONG $0x02c68348               // add    rsi, 2
	JNE  LBB3_7
	LONG $0x01c0f641               // test    r8b, 1
	JE   LBB3_10

LBB3_9:
	LONG $0x246ffec5; BYTE $0x47   // vmovdqu    ymm4, yword [rdi + 2*rax]
	LONG $0x6c6ffec5; WORD $0x2047 // vmovdqu    ymm5, yword [rdi + 2*rax + 32]
	LONG $0x3e65e2c4; BYTE $0xdd   // vpmaxuw    ymm3, ymm3, ymm5
	LONG $0x3e7de2c4; BYTE $0xc4   // vpmaxuw    ymm0, ymm0, ymm4
	LONG $0x3a6de2c4; BYTE $0xd5   // vpminuw    ymm2, ymm2, ymm5
	LONG $0x3a75e2c4; BYTE $0xcc   // vpminuw    ymm1, ymm1, ymm4

LBB3_10:
	LONG $0x3a75e2c4; BYTE $0xca   // vpminuw    ymm1, ymm1, ymm2
	LONG $0x3e7de2c4; BYTE $0xc3   // vpmaxuw    ymm0, ymm0, ymm3
	LONG $0x397de3c4; WORD $0x01c2 // vextracti128    xmm2, ymm0, 1
	LONG $0x3e79e2c4; BYTE $0xc2   // vpmaxuw    xmm0, xmm0, xmm2
	LONG $0xd276e9c5               // vpcmpeqd    xmm2, xmm2, xmm2
	LONG $0xc2eff9c5               // vpxor    xmm0, xmm0, xmm2
	LONG $0x4179e2c4; BYTE $0xc0   // vphminposuw    xmm0, xmm0
	LONG $0xc67ef9c5               // vmovd    esi, xmm0
	WORD $0xd6f7                   // not    esi
	LONG $0x397de3c4; WORD $0x01c8 // vextracti128    xmm0, ymm1, 1
	LONG $0x3a71e2c4; BYTE $0xc0   // vpminuw    xmm0, xmm1, xmm0
	LONG $0x4179e2c4; BYTE $0xc0   // vphminposuw    xmm0, xmm0
	LONG $0x7e79c1c4; BYTE $0xc0   // vmovd    r8d, xmm0
	WORD $0x394d; BYTE $0xca       // cmp    r10, r9
	JE   LBB3_12

LBB3_11:
	LONG $0x04b70f42; BYTE $0x57 // movzx    eax, word [rdi + 2*r10]
	LONG $0xc0394166             // cmp    r8w, ax
	LONG $0xc0430f44             // cmovae    r8d, eax
	WORD $0x3966; BYTE $0xc6     // cmp    si, ax
	WORD $0x460f; BYTE $0xf0     // cmovbe    esi, eax
	LONG $0x01c28349             // add    r10, 1
	WORD $0x394d; BYTE $0xd1     // cmp    r9, r10
	JNE  LBB3_11

LBB3_12:
	WORD $0x8966; BYTE $0x31 // mov    word [rcx], si
	LONG $0x02894466         // mov    word [rdx], r8w
	VZEROUPPER
	RET

LBB3_5:
	LONG $0xc0eff9c5 // vpxor    xmm0, xmm0, xmm0
	LONG $0xc976f5c5 // vpcmpeqd    ymm1, ymm1, ymm1
	WORD $0xc031     // xor    eax, eax
	LONG $0xd276edc5 // vpcmpeqd    ymm2, ymm2, ymm2
	LONG $0xdbefe1c5 // vpxor    xmm3, xmm3, xmm3
	LONG $0x01c0f641 // test    r8b, 1
	JNE  LBB3_9
	JMP  LBB3_10

DATA LCDATA3<>+0x000(SB)/8, $0x7fffffff80000000
GLOBL LCDATA3<>(SB), 8, $8

TEXT ·_int32_max_min_avx2(SB), NOSPLIT, $8-32

	MOVQ values+0(FP), DI
	MOVQ length+8(FP), SI
	MOVQ minout+16(FP), DX
	MOVQ maxout+24(FP), CX
	LEAQ LCDATA3<>(SB), BP

	WORD $0xf685                   // test    esi, esi
	JLE  LBB4_1
	WORD $0x8941; BYTE $0xf0       // mov    r8d, esi
	WORD $0xfe83; BYTE $0x1f       // cmp    esi, 31
	JA   LBB4_4
	LONG $0x0000ba41; WORD $0x8000 // mov    r10d, -2147483648
	LONG $0xffffffb8; BYTE $0x7f   // mov    eax, 2147483647
	WORD $0x3145; BYTE $0xc9       // xor    r9d, r9d
	JMP  LBB4_7

LBB4_1:
	LONG $0xffffffb8; BYTE $0x7f // mov    eax, 2147483647
	LONG $0x000000be; BYTE $0x80 // mov    esi, -2147483648
	JMP  LBB4_8

LBB4_4:
	WORD $0x8945; BYTE $0xc1       // mov    r9d, r8d
	LONG $0x587de2c4; WORD $0x0065 // vpbroadcastd    ymm4, dword 0[rbp] /* [rip + .LCPI4_0] */
	LONG $0xe0e18341               // and    r9d, -32
	LONG $0x587de2c4; WORD $0x0445 // vpbroadcastd    ymm0, dword 4[rbp] /* [rip + .LCPI4_1] */
	WORD $0xc031                   // xor    eax, eax
	LONG $0xc86ffdc5               // vmovdqa    ymm1, ymm0
	LONG $0xd06ffdc5               // vmovdqa    ymm2, ymm0
	LONG $0xd86ffdc5               // vmovdqa    ymm3, ymm0
	LONG $0xec6ffdc5               // vmovdqa    ymm5, ymm4
	LONG $0xf46ffdc5               // vmovdqa    ymm6, ymm4
	LONG $0xfc6ffdc5               // vmovdqa    ymm7, ymm4

LBB4_5:
	LONG $0x046f7ec5; BYTE $0x87   // vmovdqu    ymm8, yword [rdi + 4*rax]
	LONG $0x4c6f7ec5; WORD $0x2087 // vmovdqu    ymm9, yword [rdi + 4*rax + 32]
	LONG $0x546f7ec5; WORD $0x4087 // vmovdqu    ymm10, yword [rdi + 4*rax + 64]
	LONG $0x5c6f7ec5; WORD $0x6087 // vmovdqu    ymm11, yword [rdi + 4*rax + 96]
	LONG $0x397dc2c4; BYTE $0xc0   // vpminsd    ymm0, ymm0, ymm8
	LONG $0x3975c2c4; BYTE $0xc9   // vpminsd    ymm1, ymm1, ymm9
	LONG $0x396dc2c4; BYTE $0xd2   // vpminsd    ymm2, ymm2, ymm10
	LONG $0x3965c2c4; BYTE $0xdb   // vpminsd    ymm3, ymm3, ymm11
	LONG $0x3d5dc2c4; BYTE $0xe0   // vpmaxsd    ymm4, ymm4, ymm8
	LONG $0x3d55c2c4; BYTE $0xe9   // vpmaxsd    ymm5, ymm5, ymm9
	LONG $0x3d4dc2c4; BYTE $0xf2   // vpmaxsd    ymm6, ymm6, ymm10
	LONG $0x3d45c2c4; BYTE $0xfb   // vpmaxsd    ymm7, ymm7, ymm11
	LONG $0x20c08348               // add    rax, 32
	WORD $0x3949; BYTE $0xc1       // cmp    r9, rax
	JNE  LBB4_5
	LONG $0x3d5de2c4; BYTE $0xe5   // vpmaxsd    ymm4, ymm4, ymm5
	LONG $0x3d5de2c4; BYTE $0xe6   // vpmaxsd    ymm4, ymm4, ymm6
	LONG $0x3d5de2c4; BYTE $0xe7   // vpmaxsd    ymm4, ymm4, ymm7
	LONG $0x397de3c4; WORD $0x01e5 // vextracti128    xmm5, ymm4, 1
	LONG $0x3d59e2c4; BYTE $0xe5   // vpmaxsd    xmm4, xmm4, xmm5
	LONG $0xec70f9c5; BYTE $0x4e   // vpshufd    xmm5, xmm4, 78
	LONG $0x3d59e2c4; BYTE $0xe5   // vpmaxsd    xmm4, xmm4, xmm5
	LONG $0xec70f9c5; BYTE $0xe5   // vpshufd    xmm5, xmm4, 229
	LONG $0x3d59e2c4; BYTE $0xe5   // vpmaxsd    xmm4, xmm4, xmm5
	LONG $0x7e79c1c4; BYTE $0xe2   // vmovd    r10d, xmm4
	LONG $0x397de2c4; BYTE $0xc1   // vpminsd    ymm0, ymm0, ymm1
	LONG $0x397de2c4; BYTE $0xc2   // vpminsd    ymm0, ymm0, ymm2
	LONG $0x397de2c4; BYTE $0xc3   // vpminsd    ymm0, ymm0, ymm3
	LONG $0x397de3c4; WORD $0x01c1 // vextracti128    xmm1, ymm0, 1
	LONG $0x3979e2c4; BYTE $0xc1   // vpminsd    xmm0, xmm0, xmm1
	LONG $0xc870f9c5; BYTE $0x4e   // vpshufd    xmm1, xmm0, 78
	LONG $0x3979e2c4; BYTE $0xc1   // vpminsd    xmm0, xmm0, xmm1
	LONG $0xc870f9c5; BYTE $0xe5   // vpshufd    xmm1, xmm0, 229
	LONG $0x3979e2c4; BYTE $0xc1   // vpminsd    xmm0, xmm0, xmm1
	LONG $0xc07ef9c5               // vmovd    eax, xmm0
	WORD $0x8944; BYTE $0xd6       // mov    esi, r10d
	WORD $0x394d; BYTE $0xc1       // cmp    r9, r8
	JE   LBB4_8

LBB4_7:
	LONG $0x8f348b42         // mov    esi, dword [rdi + 4*r9]
	WORD $0xf039             // cmp    eax, esi
	WORD $0x4f0f; BYTE $0xc6 // cmovg    eax, esi
	WORD $0x3941; BYTE $0xf2 // cmp    r10d, esi
	LONG $0xf24d0f41         // cmovge    esi, r10d
	LONG $0x01c18349         // add    r9, 1
	WORD $0x8941; BYTE $0xf2 // mov    r10d, esi
	WORD $0x394d; BYTE $0xc8 // cmp    r8, r9
	JNE  LBB4_7

LBB4_8:
	WORD $0x3189 // mov    dword [rcx], esi
	WORD $0x0289 // mov    dword [rdx], eax
	VZEROUPPER
	RET

TEXT ·_uint32_max_min_avx2(SB), $0-32

	MOVQ values+0(FP), DI
	MOVQ length+8(FP), SI
	MOVQ minout+16(FP), DX
	MOVQ maxout+24(FP), CX

	WORD $0xf685                 // test    esi, esi
	JLE  LBB5_1
	WORD $0x8941; BYTE $0xf0     // mov    r8d, esi
	WORD $0xfe83; BYTE $0x1f     // cmp    esi, 31
	JA   LBB5_4
	WORD $0x3145; BYTE $0xc9     // xor    r9d, r9d
	LONG $0xffffffb8; BYTE $0xff // mov    eax, -1
	WORD $0x3145; BYTE $0xd2     // xor    r10d, r10d
	JMP  LBB5_7

LBB5_1:
	LONG $0xffffffb8; BYTE $0xff // mov    eax, -1
	WORD $0xf631                 // xor    esi, esi
	JMP  LBB5_8

LBB5_4:
	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
	LONG $0xe0e18341         // and    r9d, -32
	LONG $0xe4efd9c5         // vpxor    xmm4, xmm4, xmm4
	LONG $0xc076fdc5         // vpcmpeqd    ymm0, ymm0, ymm0
	WORD $0xc031             // xor    eax, eax
	LONG $0xc976f5c5         // vpcmpeqd    ymm1, ymm1, ymm1
	LONG $0xd276edc5         // vpcmpeqd    ymm2, ymm2, ymm2
	LONG $0xdb76e5c5         // vpcmpeqd    ymm3, ymm3, ymm3
	LONG $0xedefd1c5         // vpxor    xmm5, xmm5, xmm5
	LONG $0xf6efc9c5         // vpxor    xmm6, xmm6, xmm6
	LONG $0xffefc1c5         // vpxor    xmm7, xmm7, xmm7

LBB5_5:
	LONG $0x046f7ec5; BYTE $0x87   // vmovdqu    ymm8, yword [rdi + 4*rax]
	LONG $0x4c6f7ec5; WORD $0x2087 // vmovdqu    ymm9, yword [rdi + 4*rax + 32]
	LONG $0x546f7ec5; WORD $0x4087 // vmovdqu    ymm10, yword [rdi + 4*rax + 64]
	LONG $0x5c6f7ec5; WORD $0x6087 // vmovdqu    ymm11, yword [rdi + 4*rax + 96]
	LONG $0x3b7dc2c4; BYTE $0xc0   // vpminud    ymm0, ymm0, ymm8
	LONG $0x3b75c2c4; BYTE $0xc9   // vpminud    ymm1, ymm1, ymm9
	LONG $0x3b6dc2c4; BYTE $0xd2   // vpminud    ymm2, ymm2, ymm10
	LONG $0x3b65c2c4; BYTE $0xdb   // vpminud    ymm3, ymm3, ymm11
	LONG $0x3f5dc2c4; BYTE $0xe0   // vpmaxud    ymm4, ymm4, ymm8
	LONG $0x3f55c2c4; BYTE $0xe9   // vpmaxud    ymm5, ymm5, ymm9
	LONG $0x3f4dc2c4; BYTE $0xf2   // vpmaxud    ymm6, ymm6, ymm10
	LONG $0x3f45c2c4; BYTE $0xfb   // vpmaxud    ymm7, ymm7, ymm11
	LONG $0x20c08348               // add    rax, 32
	WORD $0x3949; BYTE $0xc1       // cmp    r9, rax
	JNE  LBB5_5
	LONG $0x3f5de2c4; BYTE $0xe5   // vpmaxud    ymm4, ymm4, ymm5
	LONG $0x3f5de2c4; BYTE $0xe6   // vpmaxud    ymm4, ymm4, ymm6
	LONG $0x3f5de2c4; BYTE $0xe7   // vpmaxud    ymm4, ymm4, ymm7
	LONG $0x397de3c4; WORD $0x01e5 // vextracti128    xmm5, ymm4, 1
	LONG $0x3f59e2c4; BYTE $0xe5   // vpmaxud    xmm4, xmm4, xmm5
	LONG $0xec70f9c5; BYTE $0x4e   // vpshufd    xmm5, xmm4, 78
	LONG $0x3f59e2c4; BYTE $0xe5   // vpmaxud    xmm4, xmm4, xmm5
	LONG $0xec70f9c5; BYTE $0xe5   // vpshufd    xmm5, xmm4, 229
	LONG $0x3f59e2c4; BYTE $0xe5   // vpmaxud    xmm4, xmm4, xmm5
	LONG $0x7e79c1c4; BYTE $0xe2   // vmovd    r10d, xmm4
	LONG $0x3b7de2c4; BYTE $0xc1   // vpminud    ymm0, ymm0, ymm1
	LONG $0x3b7de2c4; BYTE $0xc2   // vpminud    ymm0, ymm0, ymm2
	LONG $0x3b7de2c4; BYTE $0xc3   // vpminud    ymm0, ymm0, ymm3
	LONG $0x397de3c4; WORD $0x01c1 // vextracti128    xmm1, ymm0, 1
	LONG $0x3b79e2c4; BYTE $0xc1   // vpminud    xmm0, xmm0, xmm1
	LONG $0xc870f9c5; BYTE $0x4e   // vpshufd    xmm1, xmm0, 78
	LONG $0x3b79e2c4; BYTE $0xc1   // vpminud    xmm0, xmm0, xmm1
	LONG $0xc870f9c5; BYTE $0xe5   // vpshufd    xmm1, xmm0, 229
	LONG $0x3b79e2c4; BYTE $0xc1   // vpminud    xmm0, xmm0, xmm1
	LONG $0xc07ef9c5               // vmovd    eax, xmm0
	WORD $0x8944; BYTE $0xd6       // mov    esi, r10d
	WORD $0x394d; BYTE $0xc1       // cmp    r9, r8
	JE   LBB5_8

LBB5_7:
	LONG $0x8f348b42         // mov    esi, dword [rdi + 4*r9]
	WORD $0xf039             // cmp    eax, esi
	WORD $0x430f; BYTE $0xc6 // cmovae    eax, esi
	WORD $0x3941; BYTE $0xf2 // cmp    r10d, esi
	LONG $0xf2470f41         // cmova    esi, r10d
	LONG $0x01c18349         // add    r9, 1
	WORD $0x8941; BYTE $0xf2 // mov    r10d, esi
	WORD $0x394d; BYTE $0xc8 // cmp    r8, r9
	JNE  LBB5_7

LBB5_8:
	WORD $0x3189 // mov    dword [rcx], esi
	WORD $0x0289 // mov    dword [rdx], eax
	VZEROUPPER
	RET

DATA LCDATA4<>+0x000(SB)/8, $0x8000000000000000
DATA LCDATA4<>+0x008(SB)/8, $0x7fffffffffffffff
GLOBL LCDATA4<>(SB), 8, $16

TEXT ·_int64_max_min_avx2(SB), $8-32

	MOVQ values+0(FP), DI
	MOVQ length+8(FP), SI
	MOVQ minout+16(FP), DX
	MOVQ maxout+24(FP), CX
	LEAQ LCDATA4<>(SB), BP

	QUAD $0xffffffffffffb848; WORD $0x7fff // mov    rax, 9223372036854775807
	WORD $0xf685                           // test    esi, esi
	JLE  LBB6_1
	WORD $0x8941; BYTE $0xf0               // mov    r8d, esi
	WORD $0xfe83; BYTE $0x0f               // cmp    esi, 15
	JA   LBB6_4
	LONG $0x01508d4c                       // lea    r10, [rax + 1]
	WORD $0x3145; BYTE $0xc9               // xor    r9d, r9d
	JMP  LBB6_7

LBB6_1:
	LONG $0x01708d48 // lea    rsi, [rax + 1]
	JMP  LBB6_8

LBB6_4:
	WORD $0x8945; BYTE $0xc1       // mov    r9d, r8d
	LONG $0x597de2c4; WORD $0x0065 // vpbroadcastq    ymm4, qword 0[rbp] /* [rip + .LCPI6_0] */
	LONG $0xf0e18341               // and    r9d, -16
	LONG $0x597de2c4; WORD $0x0845 // vpbroadcastq    ymm0, qword 8[rbp] /* [rip + .LCPI6_1] */
	WORD $0xc031                   // xor    eax, eax
	LONG $0xd86ffdc5               // vmovdqa    ymm3, ymm0
	LONG $0xd06ffdc5               // vmovdqa    ymm2, ymm0
	LONG $0xc86ffdc5               // vmovdqa    ymm1, ymm0
	LONG $0xfc6ffdc5               // vmovdqa    ymm7, ymm4
	LONG $0xf46ffdc5               // vmovdqa    ymm6, ymm4
	LONG $0xec6ffdc5               // vmovdqa    ymm5, ymm4

LBB6_5:
	LONG $0x046f7ec5; BYTE $0xc7   // vmovdqu    ymm8, yword [rdi + 8*rax]
	LONG $0x373d62c4; BYTE $0xc8   // vpcmpgtq    ymm9, ymm8, ymm0
	LONG $0x4b3de3c4; WORD $0x90c0 // vblendvpd    ymm0, ymm8, ymm0, ymm9
	LONG $0x4c6f7ec5; WORD $0x20c7 // vmovdqu    ymm9, yword [rdi + 8*rax + 32]
	LONG $0x373562c4; BYTE $0xd3   // vpcmpgtq    ymm10, ymm9, ymm3
	LONG $0x4b35e3c4; WORD $0xa0db // vblendvpd    ymm3, ymm9, ymm3, ymm10
	LONG $0x546f7ec5; WORD $0x40c7 // vmovdqu    ymm10, yword [rdi + 8*rax + 64]
	LONG $0x372d62c4; BYTE $0xda   // vpcmpgtq    ymm11, ymm10, ymm2
	LONG $0x4b2de3c4; WORD $0xb0d2 // vblendvpd    ymm2, ymm10, ymm2, ymm11
	LONG $0x5c6f7ec5; WORD $0x60c7 // vmovdqu    ymm11, yword [rdi + 8*rax + 96]
	LONG $0x372562c4; BYTE $0xe1   // vpcmpgtq    ymm12, ymm11, ymm1
	LONG $0x4b25e3c4; WORD $0xc0c9 // vblendvpd    ymm1, ymm11, ymm1, ymm12
	LONG $0x375d42c4; BYTE $0xe0   // vpcmpgtq    ymm12, ymm4, ymm8
	LONG $0x4b3de3c4; WORD $0xc0e4 // vblendvpd    ymm4, ymm8, ymm4, ymm12
	LONG $0x374542c4; BYTE $0xc1   // vpcmpgtq    ymm8, ymm7, ymm9
	LONG $0x4b35e3c4; WORD $0x80ff // vblendvpd    ymm7, ymm9, ymm7, ymm8
	LONG $0x374d42c4; BYTE $0xc2   // vpcmpgtq    ymm8, ymm6, ymm10
	LONG $0x4b2de3c4; WORD $0x80f6 // vblendvpd    ymm6, ymm10, ymm6, ymm8
	LONG $0x375542c4; BYTE $0xc3   // vpcmpgtq    ymm8, ymm5, ymm11
	LONG $0x4b25e3c4; WORD $0x80ed // vblendvpd    ymm5, ymm11, ymm5, ymm8
	LONG $0x10c08348               // add    rax, 16
	WORD $0x3949; BYTE $0xc1       // cmp    r9, rax
	JNE  LBB6_5
	LONG $0x375d62c4; BYTE $0xc7   // vpcmpgtq    ymm8, ymm4, ymm7
	LONG $0x4b45e3c4; WORD $0x80e4 // vblendvpd    ymm4, ymm7, ymm4, ymm8
	LONG $0x375de2c4; BYTE $0xfe   // vpcmpgtq    ymm7, ymm4, ymm6
	LONG $0x4b4de3c4; WORD $0x70e4 // vblendvpd    ymm4, ymm6, ymm4, ymm7
	LONG $0x375de2c4; BYTE $0xf5   // vpcmpgtq    ymm6, ymm4, ymm5
	LONG $0x4b55e3c4; WORD $0x60e4 // vblendvpd    ymm4, ymm5, ymm4, ymm6
	LONG $0x197de3c4; WORD $0x01e5 // vextractf128    xmm5, ymm4, 1
	LONG $0x3759e2c4; BYTE $0xf5   // vpcmpgtq    xmm6, xmm4, xmm5
	LONG $0x4b51e3c4; WORD $0x60e4 // vblendvpd    xmm4, xmm5, xmm4, xmm6
	LONG $0x0479e3c4; WORD $0x4eec // vpermilps    xmm5, xmm4, 78
	LONG $0x3759e2c4; BYTE $0xf5   // vpcmpgtq    xmm6, xmm4, xmm5
	LONG $0x4b51e3c4; WORD $0x60e4 // vblendvpd    xmm4, xmm5, xmm4, xmm6
	LONG $0x7ef9c1c4; BYTE $0xe2   // vmovq    r10, xmm4
	LONG $0x3765e2c4; BYTE $0xe0   // vpcmpgtq    ymm4, ymm3, ymm0
	LONG $0x4b65e3c4; WORD $0x40c0 // vblendvpd    ymm0, ymm3, ymm0, ymm4
	LONG $0x376de2c4; BYTE $0xd8   // vpcmpgtq    ymm3, ymm2, ymm0
	LONG $0x4b6de3c4; WORD $0x30c0 // vblendvpd    ymm0, ymm2, ymm0, ymm3
	LONG $0x3775e2c4; BYTE $0xd0   // vpcmpgtq    ymm2, ymm1, ymm0
	LONG $0x4b75e3c4; WORD $0x20c0 // vblendvpd    ymm0, ymm1, ymm0, ymm2
	LONG $0x197de3c4; WORD $0x01c1 // vextractf128    xmm1, ymm0, 1
	LONG $0x3771e2c4; BYTE $0xd0   // vpcmpgtq    xmm2, xmm1, xmm0
	LONG $0x4b71e3c4; WORD $0x20c0 // vblendvpd    xmm0, xmm1, xmm0, xmm2
	LONG $0x0479e3c4; WORD $0x4ec8 // vpermilps    xmm1, xmm0, 78
	LONG $0x3771e2c4; BYTE $0xd0   // vpcmpgtq    xmm2, xmm1, xmm0
	LONG $0x4b71e3c4; WORD $0x20c0 // vblendvpd    xmm0, xmm1, xmm0, xmm2
	LONG $0x7ef9e1c4; BYTE $0xc0   // vmovq    rax, xmm0
	WORD $0x894c; BYTE $0xd6       // mov    rsi, r10
	WORD $0x394d; BYTE $0xc1       // cmp    r9, r8
	JE   LBB6_8

LBB6_7:
	LONG $0xcf348b4a         // mov    rsi, qword [rdi + 8*r9]
	WORD $0x3948; BYTE $0xf0 // cmp    rax, rsi
	LONG $0xc64f0f48         // cmovg    rax, rsi
	WORD $0x3949; BYTE $0xf2 // cmp    r10, rsi
	LONG $0xf24d0f49         // cmovge    rsi, r10
	LONG $0x01c18349         // add    r9, 1
	WORD $0x8949; BYTE $0xf2 // mov    r10, rsi
	WORD $0x394d; BYTE $0xc8 // cmp    r8, r9
	JNE  LBB6_7

LBB6_8:
	WORD $0x8948; BYTE $0x31 // mov    qword [rcx], rsi
	WORD $0x8948; BYTE $0x02 // mov    qword [rdx], rax
	VZEROUPPER
	RET

DATA LCDATA5<>+0x000(SB)/8, $0x8000000000000000
GLOBL LCDATA5<>(SB), 8, $8

TEXT ·_uint64_max_min_avx2(SB), $8-32

	MOVQ values+0(FP), DI
	MOVQ length+8(FP), SI
	MOVQ minout+16(FP), DX
	MOVQ maxout+24(FP), CX
	LEAQ LCDATA5<>(SB), BP

	WORD $0xf685                               // test    esi, esi
	JLE  LBB7_1
	WORD $0x8941; BYTE $0xf0                   // mov    r8d, esi
	WORD $0xfe83; BYTE $0x0f                   // cmp    esi, 15
	JA   LBB7_4
	LONG $0xffc0c748; WORD $0xffff; BYTE $0xff // mov    rax, -1
	WORD $0x3145; BYTE $0xc9                   // xor    r9d, r9d
	WORD $0x3145; BYTE $0xd2                   // xor    r10d, r10d
	JMP  LBB7_7

LBB7_1:
	LONG $0xffc0c748; WORD $0xffff; BYTE $0xff // mov    rax, -1
	WORD $0xf631                               // xor    esi, esi
	JMP  LBB7_8

LBB7_4:
	WORD $0x8945; BYTE $0xc1       // mov    r9d, r8d
	LONG $0xf0e18341               // and    r9d, -16
	LONG $0xedefd1c5               // vpxor    xmm5, xmm5, xmm5
	LONG $0xc976f5c5               // vpcmpeqd    ymm1, ymm1, ymm1
	WORD $0xc031                   // xor    eax, eax
	LONG $0x597de2c4; WORD $0x0045 // vpbroadcastq    ymm0, qword 0[rbp] /* [rip + .LCPI7_0] */
	LONG $0xe476ddc5               // vpcmpeqd    ymm4, ymm4, ymm4
	LONG $0xdb76e5c5               // vpcmpeqd    ymm3, ymm3, ymm3
	LONG $0xd276edc5               // vpcmpeqd    ymm2, ymm2, ymm2
	LONG $0xef3941c4; BYTE $0xc0   // vpxor    xmm8, xmm8, xmm8
	LONG $0xffefc1c5               // vpxor    xmm7, xmm7, xmm7
	LONG $0xf6efc9c5               // vpxor    xmm6, xmm6, xmm6

LBB7_5:
	LONG $0x0c6f7ec5; BYTE $0xc7   // vmovdqu    ymm9, yword [rdi + 8*rax]
	LONG $0xd0ef75c5               // vpxor    ymm10, ymm1, ymm0
	LONG $0xd8ef35c5               // vpxor    ymm11, ymm9, ymm0
	LONG $0x372542c4; BYTE $0xd2   // vpcmpgtq    ymm10, ymm11, ymm10
	LONG $0x4b35e3c4; WORD $0xa0c9 // vblendvpd    ymm1, ymm9, ymm1, ymm10
	LONG $0xd0ef55c5               // vpxor    ymm10, ymm5, ymm0
	LONG $0x372d42c4; BYTE $0xd3   // vpcmpgtq    ymm10, ymm10, ymm11
	LONG $0x4b35e3c4; WORD $0xa0ed // vblendvpd    ymm5, ymm9, ymm5, ymm10
	LONG $0x4c6f7ec5; WORD $0x20c7 // vmovdqu    ymm9, yword [rdi + 8*rax + 32]
	LONG $0xd0ef5dc5               // vpxor    ymm10, ymm4, ymm0
	LONG $0xd8ef35c5               // vpxor    ymm11, ymm9, ymm0
	LONG $0x372542c4; BYTE $0xd2   // vpcmpgtq    ymm10, ymm11, ymm10
	LONG $0x4b35e3c4; WORD $0xa0e4 // vblendvpd    ymm4, ymm9, ymm4, ymm10
	LONG $0xd0ef3dc5               // vpxor    ymm10, ymm8, ymm0
	LONG $0x372d42c4; BYTE $0xd3   // vpcmpgtq    ymm10, ymm10, ymm11
	LONG $0x5c6f7ec5; WORD $0x40c7 // vmovdqu    ymm11, yword [rdi + 8*rax + 64]
	LONG $0x4b3543c4; WORD $0xa0c0 // vblendvpd    ymm8, ymm9, ymm8, ymm10
	LONG $0xc8ef65c5               // vpxor    ymm9, ymm3, ymm0
	LONG $0xd0ef25c5               // vpxor    ymm10, ymm11, ymm0
	LONG $0x372d42c4; BYTE $0xc9   // vpcmpgtq    ymm9, ymm10, ymm9
	LONG $0x4b25e3c4; WORD $0x90db // vblendvpd    ymm3, ymm11, ymm3, ymm9
	LONG $0xc8ef45c5               // vpxor    ymm9, ymm7, ymm0
	LONG $0x373542c4; BYTE $0xca   // vpcmpgtq    ymm9, ymm9, ymm10
	LONG $0x4b25e3c4; WORD $0x90ff // vblendvpd    ymm7, ymm11, ymm7, ymm9
	LONG $0x4c6f7ec5; WORD $0x60c7 // vmovdqu    ymm9, yword [rdi + 8*rax + 96]
	LONG $0xd0ef6dc5               // vpxor    ymm10, ymm2, ymm0
	LONG $0xd8ef35c5               // vpxor    ymm11, ymm9, ymm0
	LONG $0x372542c4; BYTE $0xd2   // vpcmpgtq    ymm10, ymm11, ymm10
	LONG $0x4b35e3c4; WORD $0xa0d2 // vblendvpd    ymm2, ymm9, ymm2, ymm10
	LONG $0xd0ef4dc5               // vpxor    ymm10, ymm6, ymm0
	LONG $0x372d42c4; BYTE $0xd3   // vpcmpgtq    ymm10, ymm10, ymm11
	LONG $0x4b35e3c4; WORD $0xa0f6 // vblendvpd    ymm6, ymm9, ymm6, ymm10
	LONG $0x10c08348               // add    rax, 16
	WORD $0x3949; BYTE $0xc1       // cmp    r9, rax
	JNE  LBB7_5
	LONG $0xc8ef3dc5               // vpxor    ymm9, ymm8, ymm0
	LONG $0xd0ef55c5               // vpxor    ymm10, ymm5, ymm0
	LONG $0x372d42c4; BYTE $0xc9   // vpcmpgtq    ymm9, ymm10, ymm9
	LONG $0x4b3de3c4; WORD $0x90ed // vblendvpd    ymm5, ymm8, ymm5, ymm9
	LONG $0xc05755c5               // vxorpd    ymm8, ymm5, ymm0
	LONG $0xc8ef45c5               // vpxor    ymm9, ymm7, ymm0
	LONG $0x373d42c4; BYTE $0xc1   // vpcmpgtq    ymm8, ymm8, ymm9
	LONG $0x4b45e3c4; WORD $0x80ed // vblendvpd    ymm5, ymm7, ymm5, ymm8
	LONG $0xf857d5c5               // vxorpd    ymm7, ymm5, ymm0
	LONG $0xc0ef4dc5               // vpxor    ymm8, ymm6, ymm0
	LONG $0x3745c2c4; BYTE $0xf8   // vpcmpgtq    ymm7, ymm7, ymm8
	LONG $0x4b4de3c4; WORD $0x70ed // vblendvpd    ymm5, ymm6, ymm5, ymm7
	LONG $0x197de3c4; WORD $0x01ee // vextractf128    xmm6, ymm5, 1
	LONG $0xc05749c5               // vxorpd    xmm8, xmm6, xmm0
	LONG $0xf857d1c5               // vxorpd    xmm7, xmm5, xmm0
	LONG $0x3741c2c4; BYTE $0xf8   // vpcmpgtq    xmm7, xmm7, xmm8
	LONG $0x4b49e3c4; WORD $0x70ed // vblendvpd    xmm5, xmm6, xmm5, xmm7
	LONG $0x0479e3c4; WORD $0x4ef5 // vpermilps    xmm6, xmm5, 78
	LONG $0xc05751c5               // vxorpd    xmm8, xmm5, xmm0
	LONG $0xf857c9c5               // vxorpd    xmm7, xmm6, xmm0
	LONG $0x3739e2c4; BYTE $0xff   // vpcmpgtq    xmm7, xmm8, xmm7
	LONG $0x4b49e3c4; WORD $0x70ed // vblendvpd    xmm5, xmm6, xmm5, xmm7
	LONG $0xf0eff5c5               // vpxor    ymm6, ymm1, ymm0
	LONG $0xf8efddc5               // vpxor    ymm7, ymm4, ymm0
	LONG $0x3745e2c4; BYTE $0xf6   // vpcmpgtq    ymm6, ymm7, ymm6
	LONG $0x4b5de3c4; WORD $0x60c9 // vblendvpd    ymm1, ymm4, ymm1, ymm6
	LONG $0xe057f5c5               // vxorpd    ymm4, ymm1, ymm0
	LONG $0xf0efe5c5               // vpxor    ymm6, ymm3, ymm0
	LONG $0x374de2c4; BYTE $0xe4   // vpcmpgtq    ymm4, ymm6, ymm4
	LONG $0x4b65e3c4; WORD $0x40c9 // vblendvpd    ymm1, ymm3, ymm1, ymm4
	LONG $0x7ef9c1c4; BYTE $0xea   // vmovq    r10, xmm5
	LONG $0xd857f5c5               // vxorpd    ymm3, ymm1, ymm0
	LONG $0xe0efedc5               // vpxor    ymm4, ymm2, ymm0
	LONG $0x375de2c4; BYTE $0xdb   // vpcmpgtq    ymm3, ymm4, ymm3
	LONG $0x4b6de3c4; WORD $0x30c9 // vblendvpd    ymm1, ymm2, ymm1, ymm3
	LONG $0x197de3c4; WORD $0x01ca // vextractf128    xmm2, ymm1, 1
	LONG $0xd857f1c5               // vxorpd    xmm3, xmm1, xmm0
	LONG $0xe057e9c5               // vxorpd    xmm4, xmm2, xmm0
	LONG $0x3759e2c4; BYTE $0xdb   // vpcmpgtq    xmm3, xmm4, xmm3
	LONG $0x4b69e3c4; WORD $0x30c9 // vblendvpd    xmm1, xmm2, xmm1, xmm3
	LONG $0x0479e3c4; WORD $0x4ed1 // vpermilps    xmm2, xmm1, 78
	LONG $0xd857f1c5               // vxorpd    xmm3, xmm1, xmm0
	LONG $0xc057e9c5               // vxorpd    xmm0, xmm2, xmm0
	LONG $0x3779e2c4; BYTE $0xc3   // vpcmpgtq    xmm0, xmm0, xmm3
	LONG $0x4b69e3c4; WORD $0x00c1 // vblendvpd    xmm0, xmm2, xmm1, xmm0
	LONG $0x7ef9e1c4; BYTE $0xc0   // vmovq    rax, xmm0
	WORD $0x894c; BYTE $0xd6       // mov    rsi, r10
	WORD $0x394d; BYTE $0xc1       // cmp    r9, r8
	JE   LBB7_8

LBB7_7:
	LONG $0xcf348b4a         // mov    rsi, qword [rdi + 8*r9]
	WORD $0x3948; BYTE $0xf0 // cmp    rax, rsi
	LONG $0xc6430f48         // cmovae    rax, rsi
	WORD $0x3949; BYTE $0xf2 // cmp    r10, rsi
	LONG $0xf2470f49         // cmova    rsi, r10
	LONG $0x01c18349         // add    r9, 1
	WORD $0x8949; BYTE $0xf2 // mov    r10, rsi
	WORD $0x394d; BYTE $0xc8 // cmp    r8, r9
	JNE  LBB7_7

LBB7_8:
	WORD $0x8948; BYTE $0x31 // mov    qword [rcx], rsi
	WORD $0x8948; BYTE $0x02 // mov    qword [rdx], rax
	VZEROUPPER
	RET