1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64
|
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//go:build !noasm
// +build !noasm
package bmi
import (
"fmt"
"os"
"strings"
"github.com/klauspost/cpuid/v2"
)
func init() {
// Added ability to enable extension via environment:
// ARM_ENABLE_EXT=NEON go test
if ext, ok := os.LookupEnv("ARM_ENABLE_EXT"); ok {
if ext == "DISABLE" {
cpuid.CPU.Disable(cpuid.ASIMD, cpuid.AESARM, cpuid.PMULL)
} else {
exts := strings.Split(ext, ",")
for _, x := range exts {
switch x {
case "NEON":
cpuid.CPU.Enable(cpuid.ASIMD)
case "AES":
cpuid.CPU.Enable(cpuid.AESARM)
case "PMULL":
cpuid.CPU.Enable(cpuid.PMULL)
default:
fmt.Fprintln(os.Stderr, "unrecognized value for ARM_ENABLE_EXT:", x)
}
}
}
}
// after benchmarking, turns out the pure go lookup table version
// is nearly twice as fast as the non-lookup table assembly
// because arm doesn't have a PEXT instruction.
funclist.extractBits = extractBitsGo
if cpuid.CPU.Has(cpuid.ASIMD) {
funclist.gtbitmap = greaterThanBitmapNEON
} else {
funclist.gtbitmap = greaterThanBitmapGo
}
}
|