File: ArmFPURoundMode.cpp

package info (click to toggle)
dolphin-emu 2512%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 76,328 kB
  • sloc: cpp: 499,023; ansic: 119,674; python: 6,547; sh: 2,338; makefile: 1,093; asm: 726; pascal: 257; javascript: 183; perl: 97; objc: 75; xml: 30
file content (90 lines) | stat: -rw-r--r-- 2,460 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
// Copyright 2021 Dolphin Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later

#include "Common/FPURoundMode.h"

#ifdef _MSC_VER
#include <intrin.h>
#endif

#include "Common/CPUDetect.h"
#include "Common/CommonTypes.h"
#include "Common/Logging/Log.h"

namespace Common::FPU
{
static u64 GetFPCR()
{
#ifdef _MSC_VER
  return _ReadStatusReg(ARM64_FPCR);
#else
  u64 fpcr;
  __asm__ __volatile__("mrs %0, fpcr" : "=r"(fpcr));
  return fpcr;
#endif
}

static void SetFPCR(u64 fpcr)
{
#ifdef _MSC_VER
  _WriteStatusReg(ARM64_FPCR, fpcr);
#else
  __asm__ __volatile__("msr fpcr, %0" : : "ri"(fpcr));
#endif
}

static const u64 default_fpcr = GetFPCR();
static u64 saved_fpcr = default_fpcr;

void SetSIMDMode(RoundMode rounding_mode, bool non_ieee_mode)
{
  // When AH is disabled, FZ controls flush-to-zero for both inputs and outputs. When AH is enabled,
  // FZ controls flush-to-zero for outputs, and FIZ controls flush-to-zero for inputs.
  constexpr u32 FZ = 1 << 24;
  constexpr u32 AH = 1 << 1;
  constexpr u32 FIZ = 1 << 0;
  constexpr u32 flush_to_zero_mask = FZ | AH | FIZ;

  // On CPUs with FEAT_AFP support, setting AH = 1, FZ = 1, FIZ = 0 emulates the GC/Wii CPU's
  // "non-IEEE mode". Unfortunately, FEAT_AFP didn't exist until 2020, so we can't count on setting
  // AH actually doing anything. But flushing both inputs and outputs seems to cause less problems
  // than flushing nothing, so let's just set FZ and AH and roll with whatever behavior we get.
  const u32 flush_to_zero_bits = (non_ieee_mode ? FZ | AH : 0);
  static bool afp_warning_shown = false;
  if (!afp_warning_shown && !cpu_info.bAFP && non_ieee_mode)
  {
    afp_warning_shown = true;
    WARN_LOG_FMT(POWERPC,
                 "Non-IEEE mode was requested, but host CPU is not known to support FEAT_AFP");
  }

  // lookup table for FPSCR.RN-to-FPCR.RMode translation
  constexpr u32 rounding_mode_table[] = {
      (0 << 22),  // nearest
      (3 << 22),  // zero
      (1 << 22),  // +inf
      (2 << 22),  // -inf
  };
  constexpr u32 rounding_mode_mask = 3 << 22;
  const u32 rounding_mode_bits = rounding_mode_table[rounding_mode];

  const u64 base = default_fpcr & ~(flush_to_zero_mask | rounding_mode_mask);
  SetFPCR(base | rounding_mode_bits | flush_to_zero_bits);
}

void SaveSIMDState()
{
  saved_fpcr = GetFPCR();
}

void LoadSIMDState()
{
  SetFPCR(saved_fpcr);
}

void LoadDefaultSIMDState()
{
  SetFPCR(default_fpcr);
}

}  // namespace Common::FPU