File: fix_stats_bivariate_normal_cdf_PR23815.patch

package info (click to toggle)
scipy 1.16.2-4
links: PTS, VCS
area: main
in suites:
size: 235,092 kB
sloc: cpp: 503,162; python: 344,898; ansic: 195,670; javascript: 89,566; fortran: 56,210; cs: 3,081; f90: 1,150; sh: 856; makefile: 791; pascal: 284; csh: 135; lisp: 134; xml: 56; perl: 51
file content (244 lines) | stat: -rw-r--r-- 10,341 bytes
parent folder | download | duplicates (2)
From d91eb9e61aa3b74240b79e3a33077cef179518d1 Mon Sep 17 00:00:00 2001
From: Matt Haberland <matt.haberland@gmail.com>
Date: Sun, 19 Oct 2025 07:41:20 -0700
Subject: [PATCH 1/5] ENH: scipy.stats: speed up bivariate normal cdf

---
 scipy/stats/_qmvnt.py                  |  21 ++++
 scipy/stats/_stats_pythran.py          | 142 +++++++++++++++++++++++++
 scipy/stats/tests/test_multivariate.py |   4 +-
 3 files changed, 165 insertions(+), 2 deletions(-)

Index: scipy/scipy/stats/_qmvnt.py
===================================================================
--- scipy.orig/scipy/stats/_qmvnt.py	2025-10-27 00:20:27.016948021 +0100
+++ scipy/scipy/stats/_qmvnt.py	2025-10-27 00:20:27.006089715 +0100
@@ -32,11 +32,13 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
+import math
 import numpy as np
 
 from scipy.fft import fft, ifft
 from scipy.special import ndtr as phi, ndtri as phinv
 from scipy.stats._qmc import primes_from_2_to
+from scipy.stats._stats_pythran import _bvnu
 
 from ._qmvnt_cy import _qmvn_inner, _qmvt_inner
 
@@ -178,6 +180,9 @@
         prob = phi(high / covar**0.5) - phi(low / covar**0.5)
         # More or less
         est_error = 1e-15
+    elif n == 2:
+        prob = _bvn(low, high, covar)
+        est_error = 1e-15
     else:
         mi = min(limit, n * 1000)
         prob = 0.0
@@ -452,3 +457,19 @@
     t = x[slc1].copy()
     x[slc1] = x[slc2].copy()
     x[slc2] = t
+
+
+def _bvn(a, b, A):
+    # covariance matrix is written [[s1**2, rho*s1*s2], [rho*s1*s2, s2**2]]
+    # e.g. https://en.wikipedia.org/wiki/Multivariate_normal_distribution
+    # therefore, s12 = rho*s1*s2 -> rho = s12/(s1*s2)
+    s1 = math.sqrt(A[0, 0])
+    s2 = math.sqrt(A[1, 1])
+    s12 = A[0, 1]
+    r = s12 / (s1 * s2)
+    # the x and y coordinates seem to be normalized by the standard devs
+    xl, xu = a[0] / s1, b[0] / s1
+    yl, yu = a[1] / s2, b[1] / s2
+    p = _bvnu(xl, yl, r) - _bvnu(xu, yl, r) - _bvnu(xl, yu, r) + _bvnu(xu, yu, r)
+    p = max( 0., min( p, 1. ) )
+    return p
Index: scipy/scipy/stats/_stats_pythran.py
===================================================================
--- scipy.orig/scipy/stats/_stats_pythran.py	2025-10-27 00:20:27.016948021 +0100
+++ scipy/scipy/stats/_stats_pythran.py	2025-10-27 00:20:27.010392490 +0100
@@ -1,3 +1,4 @@
+import math
 import numpy as np
 
 
@@ -209,3 +210,146 @@
             cache[p] = np.cumsum(pmf) if tp=='cdf' else pmf
         out[i] = cache[p][k[i]]
     return out
+
+
+# function p = phid(z), p = erfc( -z/sqrt(2) )/2; % Normal cdf
+def phid(z):
+    return math.erfc(-z / math.sqrt(2)) / 2
+
+
+def np_dot(x, y):
+    return np.sum(x * y)
+
+
+# function p = bvnu( dh, dk, r )
+#pythran export _bvnu(float64, float64, float64)
+def _bvnu(dh, dk, r):
+    # Adapted from the MATLAB original implementation by Dr. Alan Genz;
+    # see license information in _qmvnt.py
+    math_inf, math_pi = np.inf, np.pi
+    # if dh ==  inf | dk ==  inf:p = 0;
+    if (dh == math_inf) or (dk == math_inf):
+        p = 0.
+    # elseif dh == -inf, if dk == -inf, p = 1; else p = phid(-dk); end
+    elif dh == -math_inf:
+        if dk == -math_inf:
+            p = 1.
+        else:
+            p = phid(-dk)
+    # elseif dk == -inf, p = phid(-dh);
+    elif dk == -math_inf:
+        p = phid(-dh)
+    # elseif r == 0, p = phid(-dh)*phid(-dk);
+    elif r == 0:
+        p = phid(-dh) * phid(-dk)
+    # else, tp = 2*pi; h = dh; k = dk; hk = h*k; bvn = 0;
+    else:
+        tp = 2*math_pi
+        h = dh
+        k = dk
+        hk = h*k
+        bvn = 0.
+        # if abs(r) < 0.3      % Gauss Legendre points and weights, n =  6
+        #     w(1:3) = [0.1713244923791705 0.3607615730481384 0.4679139345726904];
+        #     x(1:3) = [0.9324695142031522 0.6612093864662647 0.2386191860831970];
+        if abs(r) < 0.3:
+            w = [0.1713244923791705, 0.3607615730481384, 0.4679139345726904]
+            x = [0.9324695142031522, 0.6612093864662647, 0.2386191860831970]
+        # elseif abs(r) < 0.75 % Gauss Legendre points and weights, n = 12
+        #     w(1:3) = [.04717533638651177 0.1069393259953183 0.1600783285433464];
+        #     w(4:6) = [0.2031674267230659 0.2334925365383547 0.2491470458134029];
+        #     x(1:3) = [0.9815606342467191 0.9041172563704750 0.7699026741943050];
+        #     x(4:6) = [0.5873179542866171 0.3678314989981802 0.1252334085114692];
+        elif abs(r) < 0.75:
+            w = [.04717533638651177, 0.1069393259953183, 0.1600783285433464,
+                 0.2031674267230659, 0.2334925365383547, 0.2491470458134029]
+            x = [0.9815606342467191, 0.9041172563704750, 0.7699026741943050,
+                 0.5873179542866171, 0.3678314989981802, 0.1252334085114692]
+        # else,                % Gauss Legendre points and weights, n = 20
+        #     w(1:3) = [.01761400713915212 .04060142980038694 .06267204833410906];
+        #     w(4:6) = [.08327674157670475 0.1019301198172404 0.1181945319615184];
+        #     w(7:9) = [0.1316886384491766 0.1420961093183821 0.1491729864726037];
+        #     w(10) =   0.1527533871307259;
+        #     x(1:3) = [0.9931285991850949 0.9639719272779138 0.9122344282513259];
+        #     x(4:6) = [0.8391169718222188 0.7463319064601508 0.6360536807265150];
+        #     x(7:9) = [0.5108670019508271 0.3737060887154196 0.2277858511416451];
+        #     x(10) =   0.07652652113349733;
+        else:
+            w = [.01761400713915212, .04060142980038694, .06267204833410906,
+                 .08327674157670475, 0.1019301198172404, 0.1181945319615184,
+                 0.1316886384491766, 0.1420961093183821, 0.1491729864726037,
+                 0.1527533871307259]
+            x = [0.9931285991850949, 0.9639719272779138, 0.9122344282513259,
+                 0.8391169718222188, 0.7463319064601508, 0.6360536807265150,
+                 0.5108670019508271, 0.3737060887154196, 0.2277858511416451,
+                 0.07652652113349733]
+        # end, w = [w  w]; x = [1-x 1+x];
+        w, x = np.asarray(w), np.asarray(x)
+        w = np.concatenate((w, w))
+        x = np.concatenate((1-x, 1+x))
+        # if abs(r) < 0.925, hs = ( h*h + k*k )/2; asr = asin(r)/2;
+        if abs(r) < 0.925:
+            hs = ( h*h + k*k )/2
+            asr = math.asin(r)/2
+        #     sn = sin(asr*x); bvn = exp((sn*hk-hs)./(1-sn.^2))*w';
+            sn = np.sin(asr*x)
+            bvn = np_dot(np.exp((sn*hk-hs) / (1-sn**2)), w)
+        #     bvn = bvn*asr/tp + phid(-h)*phid(-k);
+            bvn = bvn*asr/tp + phid(-h)*phid(-k)
+        # else, if r < 0, k = -k; hk = -hk; end
+        else:
+            if r < 0:
+                k = -k
+                hk = -hk
+            # if abs(r) < 1, as = 1-r^2; a = sqrt(as); bs = (h-k)^2;
+            if abs(r) < 1:
+                as_ = 1-r**2
+                a = math.sqrt(as_)
+                bs = (h-k)**2
+                # asr = -( bs/as + hk )/2; c = (4-hk)/8 ; d = (12-hk)/80;
+                asr = -( bs/as_ + hk )/2
+                c = (4-hk)/8
+                d = (12-hk)/80
+                # if asr > -100, bvn = a*exp(asr)*(1-c*(bs-as)*(1-d*bs)/3+c*d*as^2); end
+                if asr > -100:
+                    bvn = a*math.exp(asr)*(1-c*(bs-as_)*(1-d*bs)/3+c*d*as_**2)
+                # if hk  > -100, b = sqrt(bs); sp = sqrt(tp)*phid(-b/a);
+                if hk  > -100:
+                    b = math.sqrt(bs)
+                    sp = math.sqrt(tp)*phid(-b/a)
+                    # bvn = bvn - exp(-hk/2)*sp*b*( 1 - c*bs*(1-d*bs)/3 );
+                    bvn = bvn - math.exp(-hk/2)*sp*b*( 1 - c*bs*(1-d*bs)/3 )
+
+                # end, a = a/2; xs = (a*x).^2; asr = -( bs./xs + hk )/2;
+                a = a/2
+                xs = (a*x)**2
+                asr = -( bs / xs + hk )/2
+                # ix = find( asr > -100 ); xs = xs(ix); sp = ( 1 + c*xs.*(1+5*d*xs) );
+                ix = asr > -100
+                xs = xs[ix]
+                sp = 1 + c*xs * (1+5*d*xs)
+                # rs = sqrt(1-xs); ep = exp( -(hk/2)*xs./(1+rs).^2 )./rs;
+                rs = np.sqrt(1-xs)
+                ep = np.exp( -(hk/2)*xs / (1+rs)**2 )/rs
+                # bvn = ( a*( (exp(asr(ix)).*(sp-ep))*w(ix)' ) - bvn )/tp;
+                bvn = ( a*np_dot( (np.exp(asr[ix]) * (sp-ep)), w[ix] ) - bvn )/tp
+            # end
+            # if r > 0, bvn =  bvn + phid( -max( h, k ) );
+            if r > 0:
+                bvn =  bvn + phid( -max( h, k ) )
+            # elseif h >= k, bvn = -bvn;
+            elif h >= k:
+                bvn = -bvn
+            # else, if h < 0, L = phid(k)-phid(h); else, L = phid(-h)-phid(-k); end
+            else:
+                if h < 0:
+                    L = phid(k)-phid(h)
+                else:
+                    L = phid(-h)-phid(-k)
+                # bvn =  L - bvn;
+                bvn =  L - bvn
+            # end
+        # end, p = max( 0, min( 1, bvn ) );
+        p = max( 0, min( 1, bvn ) )
+    # end
+    return p
Index: scipy/scipy/stats/tests/test_multivariate.py
===================================================================
--- scipy.orig/scipy/stats/tests/test_multivariate.py	2025-10-27 00:20:27.016948021 +0100
+++ scipy/scipy/stats/tests/test_multivariate.py	2025-10-27 00:20:27.008391611 +0100
@@ -1039,8 +1039,8 @@
         assert_allclose(cdf, cdf[0]*expected_signs)
 
     @pytest.mark.slow
-    def test_cdf_vs_cubature(self):
-        ndim = 3
+    @pytest.mark.parametrize("ndim", [2, 3])
+    def test_cdf_vs_cubature(self, ndim):
         rng = np.random.default_rng(123)
         a = rng.uniform(size=(ndim, ndim))
         cov = a.T @ a
Index: scipy/scipy/stats/meson.build
===================================================================
--- scipy.orig/scipy/stats/meson.build	2025-10-27 00:20:27.016948021 +0100
+++ scipy/scipy/stats/meson.build	2025-10-27 00:20:27.011288415 +0100
@@ -77,7 +77,8 @@
 if use_pythran
   py3.extension_module('_stats_pythran',
     pythran_gen.process('_stats_pythran.py'),
-    cpp_args: cpp_args_pythran,
+    # `_cpp_Wno_unused_local_typedefs` can be removed when we require pythran >= 0.19.0
+    cpp_args: [cpp_args_pythran, _cpp_Wno_unused_local_typedefs],
     dependencies: [pythran_dep, np_dep],
     link_args: version_link_args,
     install: true,