File: internal.h

package info (click to toggle)
rust-ring 0.17.14-3
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 9,292 kB
  • sloc: asm: 138,125; perl: 33,634; ansic: 26,517; makefile: 2
file content (154 lines) | stat: -rw-r--r-- 6,077 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
// Copyright 1995-2016 The OpenSSL Project Authors. All Rights Reserved.
// Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef OPENSSL_HEADER_BN_INTERNAL_H
#define OPENSSL_HEADER_BN_INTERNAL_H

#include <ring-core/base.h>

#if defined(OPENSSL_X86_64) && defined(_MSC_VER) && !defined(__clang__)
#pragma warning(push, 3)
#include <intrin.h>
#pragma warning(pop)
#pragma intrinsic(_umul128)
#endif

#include "../../internal.h"

typedef crypto_word_t BN_ULONG;

#if defined(OPENSSL_64_BIT)

#if defined(BORINGSSL_HAS_UINT128)
// MSVC doesn't support two-word integers on 64-bit.
#define BN_ULLONG uint128_t
#endif

#define BN_BITS2 64
#define BN_MONT_CTX_N0_LIMBS 1
#define BN_MONT_CTX_N0(hi, lo) TOBN(hi, lo), 0
#define TOBN(hi, lo) ((BN_ULONG)(hi) << 32 | (lo))

#elif defined(OPENSSL_32_BIT)

#define BN_ULLONG uint64_t
#define BN_BITS2 32
// On some 32-bit platforms, Montgomery multiplication is done using 64-bit
// arithmetic with SIMD instructions. On such platforms, |BN_MONT_CTX::n0|
// needs to be two words long. Only certain 32-bit platforms actually make use
// of n0[1] and shorter R value would suffice for the others. However,
// currently only the assembly files know which is which.
#define BN_MONT_CTX_N0_LIMBS 2
#define BN_MONT_CTX_N0(hi, lo) TOBN(hi, lo)
#define TOBN(hi, lo) (lo), (hi)

#else
#error "Must define either OPENSSL_32_BIT or OPENSSL_64_BIT"
#endif



// BN_MONTGOMERY_MAX_WORDS is the maximum numer of words allowed in a |BIGNUM|
// used with Montgomery reduction. Ideally this limit would be applied to all
// |BIGNUM|s, in |bn_wexpand|, but the exactfloat library needs to create 8 MiB
// values for other operations.
// #define BN_MONTGOMERY_MAX_WORDS (8 * 1024 / sizeof(BN_ULONG))

// bn_mul_mont writes |ap| * |bp| mod |np| to |rp|, each |num| words
// long. Inputs and outputs are in Montgomery form. |n0| is a pointer to
// an |N0|.
//
// If at least one of |ap| or |bp| is fully reduced, |rp| will be fully reduced.
// If neither is fully-reduced, the output may not be either.
//
// This function allocates |num| words on the stack, so |num| should be at most
// |BN_MONTGOMERY_MAX_WORDS|.
//
// TODO(davidben): The x86_64 implementation expects a 32-bit input and masks
// off upper bits. The aarch64 implementation expects a 64-bit input and does
// not. |size_t| is the safer option but not strictly correct for x86_64. But
// the |BN_MONTGOMERY_MAX_WORDS| bound makes this moot.
//
// See also discussion in |ToWord| in abi_test.h for notes on smaller-than-word
// inputs.
//
// |num| must be at least 4, at least on x86.
//
// In other forks, |bn_mul_mont| returns an |int| indicating whether it
// actually did the multiplication. All our implementations always do the
// multiplication, and forcing callers to deal with the possibility of it
// failing just leads to further problems.
OPENSSL_STATIC_ASSERT(sizeof(int) == sizeof(size_t) ||
                      (sizeof(int) == 4 && sizeof(size_t) == 8),
                      "int and size_t ABI mismatch");
#if defined(OPENSSL_X86_64)
void bn_mul_mont_nohw(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
                      const BN_ULONG *np, const BN_ULONG *n0, size_t num);
static inline void bn_mul_mont_small(
    BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
    const BN_ULONG *np, const BN_ULONG *n0, size_t num) {
    bn_mul_mont_nohw(rp, ap, bp, np, n0, num);
}
#elif defined(OPENSSL_AARCH64)
void bn_mul_mont_nohw(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
                      const BN_ULONG *np, const BN_ULONG *n0, size_t num);
static inline void bn_mul_mont_small(
    BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
    const BN_ULONG *np, const BN_ULONG *n0, size_t num) {
    // No point in optimizing for P-256 because P-256 doesn't call into
    // this on AArch64.
    bn_mul_mont_nohw(rp, ap, bp, np, n0, num);
}
#elif defined(OPENSSL_ARM)
void bn_mul8x_mont_neon(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
                        const BN_ULONG *np, const BN_ULONG *n0, size_t num);
void bn_mul_mont_nohw(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
                      const BN_ULONG *np, const BN_ULONG *n0, size_t num);
static inline void bn_mul_mont_small(
    BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
    const BN_ULONG *np, const BN_ULONG *n0, size_t num) {
    // Approximate what `bn_mul_mont` did so that the NEON version for P-256
    // when practical.
    if (num == 8) {
        // XXX: This should not be accessing `neon_available` directly.
        if (neon_available) {
            bn_mul8x_mont_neon(rp, ap, bp, np, n0, num);
            return;
        }
    }
    bn_mul_mont_nohw(rp, ap, bp, np, n0, num);
}
#else
void bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
                 const BN_ULONG *np, const BN_ULONG *n0, size_t num);
static inline void bn_mul_mont_small(
    BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
    const BN_ULONG *np, const BN_ULONG *n0, size_t num) {
    bn_mul_mont(rp, ap, bp, np, n0, num);
}
#endif

static inline void bn_umult_lohi(BN_ULONG *low_out, BN_ULONG *high_out,
                                 BN_ULONG a, BN_ULONG b) {
#if defined(OPENSSL_X86_64) && defined(_MSC_VER) && !defined(__clang__)
  *low_out = _umul128(a, b, high_out);
#else
  BN_ULLONG result = (BN_ULLONG)a * b;
  *low_out = (BN_ULONG)result;
  *high_out = (BN_ULONG)(result >> BN_BITS2);
#endif
}

#endif  // OPENSSL_HEADER_BN_INTERNAL_H