File: test_distance.py

package info (click to toggle)
python-scipy 0.18.1-2
links: PTS, VCS
area: main
in suites: stretch
size: 75,464 kB
ctags: 79,406
sloc: python: 143,495; cpp: 89,357; fortran: 81,650; ansic: 79,778; makefile: 364; sh: 265
file content (1666 lines) | stat: -rw-r--r-- 57,149 bytes
#! /usr/bin/env python
#
# Author: Damian Eads
# Date: April 17, 2008
#
# Copyright (C) 2008 Damian Eads
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# 1. Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above
#    copyright notice, this list of conditions and the following
#    disclaimer in the documentation and/or other materials provided
#    with the distribution.
#
# 3. The name of the author may not be used to endorse or promote
#    products derived from this software without specific prior
#    written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
# OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
# GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

from __future__ import division, print_function, absolute_import

import os.path
from scipy._lib.six import xrange, u

import numpy as np
from numpy.linalg import norm
from numpy.testing import (verbose, TestCase, run_module_suite, assert_,
        assert_raises, assert_array_equal, assert_equal, assert_almost_equal,
        assert_allclose)

from scipy.spatial.distance import (squareform, pdist, cdist, matching,
        jaccard, dice, sokalsneath, rogerstanimoto, russellrao, yule,
        num_obs_y, num_obs_dm, is_valid_dm, is_valid_y, minkowski, wminkowski,
        euclidean, sqeuclidean, cosine, correlation, hamming, mahalanobis,
        canberra, braycurtis, sokalmichener, _validate_vector)


_filenames = ["iris.txt",
              "cdist-X1.txt",
              "cdist-X2.txt",
              "pdist-hamming-ml.txt",
              "pdist-boolean-inp.txt",
              "pdist-jaccard-ml.txt",
              "pdist-cityblock-ml-iris.txt",
              "pdist-minkowski-3.2-ml-iris.txt",
              "pdist-cityblock-ml.txt",
              "pdist-correlation-ml-iris.txt",
              "pdist-minkowski-5.8-ml-iris.txt",
              "pdist-correlation-ml.txt",
              "pdist-minkowski-3.2-ml.txt",
              "pdist-cosine-ml-iris.txt",
              "pdist-seuclidean-ml-iris.txt",
              "pdist-cosine-ml.txt",
              "pdist-seuclidean-ml.txt",
              "pdist-double-inp.txt",
              "pdist-spearman-ml.txt",
              "pdist-euclidean-ml.txt",
              "pdist-euclidean-ml-iris.txt",
              "pdist-chebychev-ml.txt",
              "pdist-chebychev-ml-iris.txt",
              "random-bool-data.txt"]

_tdist = np.array([[0, 662, 877, 255, 412, 996],
                      [662, 0, 295, 468, 268, 400],
                      [877, 295, 0, 754, 564, 138],
                      [255, 468, 754, 0, 219, 869],
                      [412, 268, 564, 219, 0, 669],
                      [996, 400, 138, 869, 669, 0]], dtype='double')

_ytdist = squareform(_tdist)

# A hashmap of expected output arrays for the tests. These arrays
# come from a list of text files, which are read prior to testing.
# Each test loads inputs and outputs from this dictionary.
eo = {}


def load_testing_files():
    for fn in _filenames:
        name = fn.replace(".txt", "").replace("-ml", "")
        fqfn = os.path.join(os.path.dirname(__file__), 'data', fn)
        fp = open(fqfn)
        eo[name] = np.loadtxt(fp)
        fp.close()
    eo['pdist-boolean-inp'] = np.bool_(eo['pdist-boolean-inp'])

load_testing_files()


class TestCdist(TestCase):

    def test_cdist_euclidean_random(self):
        eps = 1e-07
        # Get the data: the input matrix and the right output.
        X1 = eo['cdist-X1']
        X2 = eo['cdist-X2']
        Y1 = cdist(X1, X2, 'euclidean')
        Y2 = cdist(X1, X2, 'test_euclidean')
        _assert_within_tol(Y1, Y2, eps, verbose > 2)

    def test_cdist_euclidean_random_unicode(self):
        eps = 1e-07
        X1 = eo['cdist-X1']
        X2 = eo['cdist-X2']
        Y1 = cdist(X1, X2, u('euclidean'))
        Y2 = cdist(X1, X2, u('test_euclidean'))
        _assert_within_tol(Y1, Y2, eps, verbose > 2)

    def test_cdist_sqeuclidean_random(self):
        eps = 1e-07
        X1 = eo['cdist-X1']
        X2 = eo['cdist-X2']
        Y1 = cdist(X1, X2, 'sqeuclidean')
        Y2 = cdist(X1, X2, 'test_sqeuclidean')
        _assert_within_tol(Y1, Y2, eps, verbose > 2)

    def test_cdist_cityblock_random(self):
        eps = 1e-07
        X1 = eo['cdist-X1']
        X2 = eo['cdist-X2']
        Y1 = cdist(X1, X2, 'cityblock')
        Y2 = cdist(X1, X2, 'test_cityblock')
        _assert_within_tol(Y1, Y2, eps, verbose > 2)

    def test_cdist_hamming_double_random(self):
        eps = 1e-07
        X1 = eo['cdist-X1']
        X2 = eo['cdist-X2']
        Y1 = cdist(X1, X2, 'hamming')
        Y2 = cdist(X1, X2, 'test_hamming')
        _assert_within_tol(Y1, Y2, eps, verbose > 2)

    def test_cdist_hamming_bool_random(self):
        eps = 1e-07
        X1 = eo['cdist-X1'] < 0.5
        X2 = eo['cdist-X2'] < 0.5
        Y1 = cdist(X1, X2, 'hamming')
        Y2 = cdist(X1, X2, 'test_hamming')
        _assert_within_tol(Y1, Y2, eps, verbose > 2)

    def test_cdist_jaccard_double_random(self):
        eps = 1e-07
        X1 = eo['cdist-X1']
        X2 = eo['cdist-X2']
        Y1 = cdist(X1, X2, 'jaccard')
        Y2 = cdist(X1, X2, 'test_jaccard')
        _assert_within_tol(Y1, Y2, eps, verbose > 2)

    def test_cdist_jaccard_bool_random(self):
        eps = 1e-07
        X1 = eo['cdist-X1'] < 0.5
        X2 = eo['cdist-X2'] < 0.5
        Y1 = cdist(X1, X2, 'jaccard')
        Y2 = cdist(X1, X2, 'test_jaccard')
        _assert_within_tol(Y1, Y2, eps, verbose > 2)

    def test_cdist_chebychev_random(self):
        eps = 1e-07
        X1 = eo['cdist-X1']
        X2 = eo['cdist-X2']
        Y1 = cdist(X1, X2, 'chebychev')
        Y2 = cdist(X1, X2, 'test_chebychev')
        _assert_within_tol(Y1, Y2, eps, verbose > 2)

    def test_cdist_minkowski_random_p3d8(self):
        eps = 1e-07
        X1 = eo['cdist-X1']
        X2 = eo['cdist-X2']
        Y1 = cdist(X1, X2, 'minkowski', p=3.8)
        Y2 = cdist(X1, X2, 'test_minkowski', p=3.8)
        _assert_within_tol(Y1, Y2, eps, verbose > 2)

    def test_cdist_minkowski_random_p4d6(self):
        eps = 1e-07
        X1 = eo['cdist-X1']
        X2 = eo['cdist-X2']
        Y1 = cdist(X1, X2, 'minkowski', p=4.6)
        Y2 = cdist(X1, X2, 'test_minkowski', p=4.6)
        _assert_within_tol(Y1, Y2, eps, verbose > 2)

    def test_cdist_minkowski_random_p1d23(self):
        eps = 1e-07
        X1 = eo['cdist-X1']
        X2 = eo['cdist-X2']
        Y1 = cdist(X1, X2, 'minkowski', p=1.23)
        Y2 = cdist(X1, X2, 'test_minkowski', p=1.23)
        _assert_within_tol(Y1, Y2, eps, verbose > 2)

    def test_cdist_wminkowski_random_p3d8(self):
        eps = 1e-07
        X1 = eo['cdist-X1']
        X2 = eo['cdist-X2']
        w = 1.0 / X1.std(axis=0)
        Y1 = cdist(X1, X2, 'wminkowski', p=3.8, w=w)
        Y2 = cdist(X1, X2, 'test_wminkowski', p=3.8, w=w)
        _assert_within_tol(Y1, Y2, eps, verbose > 2)

    def test_cdist_wminkowski_int_weights(self):
        # regression test when using integer weights
        eps = 1e-07
        X1 = eo['cdist-X1']
        X2 = eo['cdist-X2']
        w = np.arange(X1.shape[1])
        Y1 = cdist(X1, X2, 'wminkowski', p=3.8, w=w)
        Y2 = cdist(X1, X2, 'test_wminkowski', p=3.8, w=w)
        _assert_within_tol(Y1, Y2, eps, verbose > 2)

    def test_cdist_wminkowski_random_p4d6(self):
        eps = 1e-07
        X1 = eo['cdist-X1']
        X2 = eo['cdist-X2']
        w = 1.0 / X1.std(axis=0)
        Y1 = cdist(X1, X2, 'wminkowski', p=4.6, w=w)
        Y2 = cdist(X1, X2, 'test_wminkowski', p=4.6, w=w)
        _assert_within_tol(Y1, Y2, eps, verbose > 2)

    def test_cdist_wminkowski_random_p1d23(self):
        eps = 1e-07
        X1 = eo['cdist-X1']
        X2 = eo['cdist-X2']
        w = 1.0 / X1.std(axis=0)
        Y1 = cdist(X1, X2, 'wminkowski', p=1.23, w=w)
        Y2 = cdist(X1, X2, 'test_wminkowski', p=1.23, w=w)
        _assert_within_tol(Y1, Y2, eps, verbose > 2)

    def test_cdist_seuclidean_random(self):
        eps = 1e-07
        X1 = eo['cdist-X1']
        X2 = eo['cdist-X2']
        Y1 = cdist(X1, X2, 'seuclidean')
        Y2 = cdist(X1, X2, 'test_seuclidean')
        _assert_within_tol(Y1, Y2, eps, verbose > 2)

    def test_cdist_cosine_random(self):
        eps = 1e-07
        X1 = eo['cdist-X1']
        X2 = eo['cdist-X2']
        Y1 = cdist(X1, X2, 'cosine')

        # Naive implementation
        def norms(X):
            # NumPy 1.7: np.linalg.norm(X, axis=1).reshape(-1, 1)
            return np.asarray([np.linalg.norm(row)
                               for row in X]).reshape(-1, 1)

        Y2 = 1 - np.dot((X1 / norms(X1)), (X2 / norms(X2)).T)

        _assert_within_tol(Y1, Y2, eps, verbose > 2)

    def test_cdist_correlation_random(self):
        eps = 1e-07
        X1 = eo['cdist-X1']
        X2 = eo['cdist-X2']
        Y1 = cdist(X1, X2, 'correlation')
        Y2 = cdist(X1, X2, 'test_correlation')
        _assert_within_tol(Y1, Y2, eps, verbose > 2)

    def test_cdist_mahalanobis_random(self):
        eps = 1e-07
        X1 = eo['cdist-X1']
        X2 = eo['cdist-X2']
        Y1 = cdist(X1, X2, 'mahalanobis')
        Y2 = cdist(X1, X2, 'test_mahalanobis')
        _assert_within_tol(Y1, Y2, eps, verbose > 2)

    def test_cdist_mahalanobis(self):
        # 1-dimensional observations
        x1 = np.array([[2], [3]])
        x2 = np.array([[2], [5]])
        dist = cdist(x1, x2, metric='mahalanobis')
        assert_allclose(dist, [[0.0, np.sqrt(4.5)], [np.sqrt(0.5), np.sqrt(2)]])

        # 2-dimensional observations
        x1 = np.array([[0, 0], [-1, 0]])
        x2 = np.array([[0, 2], [1, 0], [0, -2]])
        dist = cdist(x1, x2, metric='mahalanobis')
        rt2 = np.sqrt(2)
        assert_allclose(dist, [[rt2, rt2, rt2], [2, 2*rt2, 2]])

        # Too few observations
        assert_raises(ValueError,
                      cdist, [[0, 1]], [[2, 3]], metric='mahalanobis')

    def test_cdist_canberra_random(self):
        eps = 1e-07
        X1 = eo['cdist-X1'] < 0.5
        X2 = eo['cdist-X2'] < 0.5
        Y1 = cdist(X1, X2, 'canberra')
        Y2 = cdist(X1, X2, 'test_canberra')
        _assert_within_tol(Y1, Y2, eps, verbose > 2)

    def test_cdist_braycurtis_random(self):
        eps = 1e-07
        X1 = eo['cdist-X1'] < 0.5
        X2 = eo['cdist-X2'] < 0.5
        Y1 = cdist(X1, X2, 'braycurtis')
        Y2 = cdist(X1, X2, 'test_braycurtis')
        if verbose > 2:
            print(Y1, Y2)
            print((Y1-Y2).max())
        _assert_within_tol(Y1, Y2, eps)

    def test_cdist_yule_random(self):
        eps = 1e-07
        X1 = eo['cdist-X1'] < 0.5
        X2 = eo['cdist-X2'] < 0.5
        Y1 = cdist(X1, X2, 'yule')
        Y2 = cdist(X1, X2, 'test_yule')
        _assert_within_tol(Y1, Y2, eps, verbose > 2)

    def test_cdist_matching_random(self):
        eps = 1e-07
        X1 = eo['cdist-X1'] < 0.5
        X2 = eo['cdist-X2'] < 0.5
        Y1 = cdist(X1, X2, 'matching')
        Y2 = cdist(X1, X2, 'test_matching')
        _assert_within_tol(Y1, Y2, eps, verbose > 2)

    def test_cdist_kulsinski_random(self):
        eps = 1e-07
        X1 = eo['cdist-X1'] < 0.5
        X2 = eo['cdist-X2'] < 0.5
        Y1 = cdist(X1, X2, 'kulsinski')
        Y2 = cdist(X1, X2, 'test_kulsinski')
        _assert_within_tol(Y1, Y2, eps, verbose > 2)

    def test_cdist_dice_random(self):
        eps = 1e-07
        X1 = eo['cdist-X1'] < 0.5
        X2 = eo['cdist-X2'] < 0.5
        Y1 = cdist(X1, X2, 'dice')
        Y2 = cdist(X1, X2, 'test_dice')
        _assert_within_tol(Y1, Y2, eps, verbose > 2)

    def test_cdist_rogerstanimoto_random(self):
        eps = 1e-07
        X1 = eo['cdist-X1'] < 0.5
        X2 = eo['cdist-X2'] < 0.5
        Y1 = cdist(X1, X2, 'rogerstanimoto')
        Y2 = cdist(X1, X2, 'test_rogerstanimoto')
        _assert_within_tol(Y1, Y2, eps, verbose > 2)

    def test_cdist_russellrao_random(self):
        eps = 1e-07
        X1 = eo['cdist-X1'] < 0.5
        X2 = eo['cdist-X2'] < 0.5
        Y1 = cdist(X1, X2, 'russellrao')
        Y2 = cdist(X1, X2, 'test_russellrao')
        _assert_within_tol(Y1, Y2, eps, verbose > 2)

    def test_cdist_sokalmichener_random(self):
        eps = 1e-07
        X1 = eo['cdist-X1'] < 0.5
        X2 = eo['cdist-X2'] < 0.5
        Y1 = cdist(X1, X2, 'sokalmichener')
        Y2 = cdist(X1, X2, 'test_sokalmichener')
        _assert_within_tol(Y1, Y2, eps, verbose > 2)

    def test_cdist_sokalsneath_random(self):
        eps = 1e-07
        X1 = eo['cdist-X1'] < 0.5
        X2 = eo['cdist-X2'] < 0.5
        Y1 = cdist(X1, X2, 'sokalsneath')
        Y2 = cdist(X1, X2, 'test_sokalsneath')
        _assert_within_tol(Y1, Y2, eps, verbose > 2)


class TestPdist(TestCase):

    def test_pdist_euclidean_random(self):
        eps = 1e-07
        X = eo['pdist-double-inp']
        Y_right = eo['pdist-euclidean']
        Y_test1 = pdist(X, 'euclidean')
        _assert_within_tol(Y_test1, Y_right, eps)

    def test_pdist_euclidean_random_u(self):
        eps = 1e-07
        X = eo['pdist-double-inp']
        Y_right = eo['pdist-euclidean']
        Y_test1 = pdist(X, u('euclidean'))
        _assert_within_tol(Y_test1, Y_right, eps)

    def test_pdist_euclidean_random_float32(self):
        eps = 1e-07
        X = np.float32(eo['pdist-double-inp'])
        Y_right = eo['pdist-euclidean']
        Y_test1 = pdist(X, 'euclidean')
        _assert_within_tol(Y_test1, Y_right, eps)

    def test_pdist_euclidean_random_nonC(self):
        eps = 1e-07
        X = eo['pdist-double-inp']
        Y_right = eo['pdist-euclidean']
        Y_test2 = pdist(X, 'test_euclidean')
        _assert_within_tol(Y_test2, Y_right, eps)

    def test_pdist_euclidean_iris_double(self):
        eps = 1e-07
        X = eo['iris']
        Y_right = eo['pdist-euclidean-iris']
        Y_test1 = pdist(X, 'euclidean')
        _assert_within_tol(Y_test1, Y_right, eps)

    def test_pdist_euclidean_iris_float32(self):
        eps = 1e-06
        X = np.float32(eo['iris'])
        Y_right = eo['pdist-euclidean-iris']
        Y_test1 = pdist(X, 'euclidean')
        _assert_within_tol(Y_test1, Y_right, eps, verbose > 2)

    def test_pdist_euclidean_iris_nonC(self):
        # Test pdist(X, 'test_euclidean') [the non-C implementation] on the
        # Iris data set.
        eps = 1e-07
        X = eo['iris']
        Y_right = eo['pdist-euclidean-iris']
        Y_test2 = pdist(X, 'test_euclidean')
        _assert_within_tol(Y_test2, Y_right, eps)

    def test_pdist_seuclidean_random(self):
        eps = 1e-05
        X = eo['pdist-double-inp']
        Y_right = eo['pdist-seuclidean']
        Y_test1 = pdist(X, 'seuclidean')
        _assert_within_tol(Y_test1, Y_right, eps)

    def test_pdist_seuclidean_random_float32(self):
        eps = 1e-05
        X = np.float32(eo['pdist-double-inp'])
        Y_right = eo['pdist-seuclidean']
        Y_test1 = pdist(X, 'seuclidean')
        _assert_within_tol(Y_test1, Y_right, eps)

    def test_pdist_seuclidean_random_nonC(self):
        # Test pdist(X, 'test_sqeuclidean') [the non-C implementation]
        eps = 1e-05
        X = eo['pdist-double-inp']
        Y_right = eo['pdist-seuclidean']
        Y_test2 = pdist(X, 'test_sqeuclidean')
        _assert_within_tol(Y_test2, Y_right, eps)

    def test_pdist_seuclidean_iris(self):
        eps = 1e-05
        X = eo['iris']
        Y_right = eo['pdist-seuclidean-iris']
        Y_test1 = pdist(X, 'seuclidean')
        _assert_within_tol(Y_test1, Y_right, eps)

    def test_pdist_seuclidean_iris_float32(self):
        # Tests pdist(X, 'seuclidean') on the Iris data set (float32).
        eps = 1e-05
        X = np.float32(eo['iris'])
        Y_right = eo['pdist-seuclidean-iris']
        Y_test1 = pdist(X, 'seuclidean')
        _assert_within_tol(Y_test1, Y_right, eps)

    def test_pdist_seuclidean_iris_nonC(self):
        # Test pdist(X, 'test_seuclidean') [the non-C implementation] on the
        # Iris data set.
        eps = 1e-05
        X = eo['iris']
        Y_right = eo['pdist-seuclidean-iris']
        Y_test2 = pdist(X, 'test_sqeuclidean')
        _assert_within_tol(Y_test2, Y_right, eps)

    def test_pdist_cosine_random(self):
        eps = 1e-08
        X = eo['pdist-double-inp']
        Y_right = eo['pdist-cosine']
        Y_test1 = pdist(X, 'cosine')
        _assert_within_tol(Y_test1, Y_right, eps)

    def test_pdist_cosine_random_float32(self):
        eps = 1e-08
        X = np.float32(eo['pdist-double-inp'])
        Y_right = eo['pdist-cosine']
        Y_test1 = pdist(X, 'cosine')
        _assert_within_tol(Y_test1, Y_right, eps)

    def test_pdist_cosine_random_nonC(self):
        # Test pdist(X, 'test_cosine') [the non-C implementation]
        eps = 1e-08
        X = eo['pdist-double-inp']
        Y_right = eo['pdist-cosine']
        Y_test2 = pdist(X, 'test_cosine')
        _assert_within_tol(Y_test2, Y_right, eps)

    def test_pdist_cosine_iris(self):
        eps = 1e-08
        X = eo['iris']
        Y_right = eo['pdist-cosine-iris']
        Y_test1 = pdist(X, 'cosine')
        _assert_within_tol(Y_test1, Y_right, eps)

    def test_pdist_cosine_iris_float32(self):
        eps = 1e-07
        X = np.float32(eo['iris'])
        Y_right = eo['pdist-cosine-iris']
        Y_test1 = pdist(X, 'cosine')
        _assert_within_tol(Y_test1, Y_right, eps, verbose > 2)

    def test_pdist_cosine_iris_nonC(self):
        eps = 1e-08
        X = eo['iris']
        Y_right = eo['pdist-cosine-iris']
        Y_test2 = pdist(X, 'test_cosine')
        _assert_within_tol(Y_test2, Y_right, eps)

    def test_pdist_cosine_bounds(self):
        # Test adapted from @joernhees's example at gh-5208: case were
        # cosine distance used to be negative. XXX: very sensitive to the
        # specific norm computation.
        x = np.abs(np.random.RandomState(1337).rand(91))
        X = np.vstack([x, x])
        assert_(pdist(X, 'cosine')[0] >= 0,
                msg='cosine distance should be non-negative')

    def test_pdist_cityblock_random(self):
        eps = 1e-06
        X = eo['pdist-double-inp']
        Y_right = eo['pdist-cityblock']
        Y_test1 = pdist(X, 'cityblock')
        _assert_within_tol(Y_test1, Y_right, eps)

    def test_pdist_cityblock_random_float32(self):
        eps = 1e-06
        X = np.float32(eo['pdist-double-inp'])
        Y_right = eo['pdist-cityblock']
        Y_test1 = pdist(X, 'cityblock')
        _assert_within_tol(Y_test1, Y_right, eps)

    def test_pdist_cityblock_random_nonC(self):
        eps = 1e-06
        X = eo['pdist-double-inp']
        Y_right = eo['pdist-cityblock']
        Y_test2 = pdist(X, 'test_cityblock')
        _assert_within_tol(Y_test2, Y_right, eps)

    def test_pdist_cityblock_iris(self):
        eps = 1e-14
        X = eo['iris']
        Y_right = eo['pdist-cityblock-iris']
        Y_test1 = pdist(X, 'cityblock')
        _assert_within_tol(Y_test1, Y_right, eps)

    def test_pdist_cityblock_iris_float32(self):
        eps = 1e-06
        X = np.float32(eo['iris'])
        Y_right = eo['pdist-cityblock-iris']
        Y_test1 = pdist(X, 'cityblock')
        _assert_within_tol(Y_test1, Y_right, eps, verbose > 2)

    def test_pdist_cityblock_iris_nonC(self):
        # Test pdist(X, 'test_cityblock') [the non-C implementation] on the
        # Iris data set.
        eps = 1e-14
        X = eo['iris']
        Y_right = eo['pdist-cityblock-iris']
        Y_test2 = pdist(X, 'test_cityblock')
        _assert_within_tol(Y_test2, Y_right, eps)

    def test_pdist_correlation_random(self):
        eps = 1e-07
        X = eo['pdist-double-inp']
        Y_right = eo['pdist-correlation']
        Y_test1 = pdist(X, 'correlation')
        _assert_within_tol(Y_test1, Y_right, eps)

    def test_pdist_correlation_random_float32(self):
        eps = 1e-07
        X = np.float32(eo['pdist-double-inp'])
        Y_right = eo['pdist-correlation']
        Y_test1 = pdist(X, 'correlation')
        _assert_within_tol(Y_test1, Y_right, eps)

    def test_pdist_correlation_random_nonC(self):
        eps = 1e-07
        X = eo['pdist-double-inp']
        Y_right = eo['pdist-correlation']
        Y_test2 = pdist(X, 'test_correlation')
        _assert_within_tol(Y_test2, Y_right, eps)

    def test_pdist_correlation_iris(self):
        eps = 1e-08
        X = eo['iris']
        Y_right = eo['pdist-correlation-iris']
        Y_test1 = pdist(X, 'correlation')
        _assert_within_tol(Y_test1, Y_right, eps)

    def test_pdist_correlation_iris_float32(self):
        eps = 1e-07
        X = eo['iris']
        Y_right = np.float32(eo['pdist-correlation-iris'])
        Y_test1 = pdist(X, 'correlation')
        _assert_within_tol(Y_test1, Y_right, eps, verbose > 2)

    def test_pdist_correlation_iris_nonC(self):
        eps = 1e-08
        X = eo['iris']
        Y_right = eo['pdist-correlation-iris']
        Y_test2 = pdist(X, 'test_correlation')
        _assert_within_tol(Y_test2, Y_right, eps)

    def test_pdist_minkowski_random(self):
        eps = 1e-05
        X = eo['pdist-double-inp']
        Y_right = eo['pdist-minkowski-3.2']
        Y_test1 = pdist(X, 'minkowski', 3.2)
        _assert_within_tol(Y_test1, Y_right, eps)

    def test_pdist_minkowski_random_float32(self):
        eps = 1e-05
        X = np.float32(eo['pdist-double-inp'])
        Y_right = eo['pdist-minkowski-3.2']
        Y_test1 = pdist(X, 'minkowski', 3.2)
        _assert_within_tol(Y_test1, Y_right, eps)

    def test_pdist_minkowski_random_nonC(self):
        eps = 1e-05
        X = eo['pdist-double-inp']
        Y_right = eo['pdist-minkowski-3.2']
        Y_test2 = pdist(X, 'test_minkowski', 3.2)
        _assert_within_tol(Y_test2, Y_right, eps)

    def test_pdist_minkowski_3_2_iris(self):
        eps = 1e-07
        X = eo['iris']
        Y_right = eo['pdist-minkowski-3.2-iris']
        Y_test1 = pdist(X, 'minkowski', 3.2)
        _assert_within_tol(Y_test1, Y_right, eps)

    def test_pdist_minkowski_3_2_iris_float32(self):
        eps = 1e-06
        X = np.float32(eo['iris'])
        Y_right = eo['pdist-minkowski-3.2-iris']
        Y_test1 = pdist(X, 'minkowski', 3.2)
        _assert_within_tol(Y_test1, Y_right, eps)

    def test_pdist_minkowski_3_2_iris_nonC(self):
        eps = 1e-07
        X = eo['iris']
        Y_right = eo['pdist-minkowski-3.2-iris']
        Y_test2 = pdist(X, 'test_minkowski', 3.2)
        _assert_within_tol(Y_test2, Y_right, eps)

    def test_pdist_minkowski_5_8_iris(self):
        eps = 1e-07
        X = eo['iris']
        Y_right = eo['pdist-minkowski-5.8-iris']
        Y_test1 = pdist(X, 'minkowski', 5.8)
        _assert_within_tol(Y_test1, Y_right, eps)

    def test_pdist_minkowski_5_8_iris_float32(self):
        eps = 1e-06
        X = np.float32(eo['iris'])
        Y_right = eo['pdist-minkowski-5.8-iris']
        Y_test1 = pdist(X, 'minkowski', 5.8)
        _assert_within_tol(Y_test1, Y_right, eps, verbose > 2)

    def test_pdist_minkowski_5_8_iris_nonC(self):
        eps = 1e-07
        X = eo['iris']
        Y_right = eo['pdist-minkowski-5.8-iris']
        Y_test2 = pdist(X, 'test_minkowski', 5.8)
        _assert_within_tol(Y_test2, Y_right, eps)

    def test_pdist_wminkowski(self):
        x = np.array([[0.0, 0.0, 0.0],
                      [1.0, 0.0, 0.0],
                      [0.0, 1.0, 0.0],
                      [1.0, 1.0, 1.0]])

        p2_expected = [1.0, 1.0, np.sqrt(3),
                       np.sqrt(2), np.sqrt(2),
                       np.sqrt(2)]
        p1_expected = [0.5, 1.0, 3.5,
                       1.5, 3.0,
                       2.5]
        dist = pdist(x, metric=wminkowski, w=[1.0, 1.0, 1.0])
        assert_allclose(dist, p2_expected, rtol=1e-14)

        dist = pdist(x, metric=wminkowski, w=[0.5, 1.0, 2.0], p=1)
        assert_allclose(dist, p1_expected, rtol=1e-14)

        dist = pdist(x, metric='wminkowski', w=[1.0, 1.0, 1.0])
        assert_allclose(dist, p2_expected, rtol=1e-14)

        dist = pdist(x, metric='wminkowski', w=[0.5, 1.0, 2.0], p=1)
        assert_allclose(dist, p1_expected, rtol=1e-14)

    def test_pdist_wminkowski_int_weights(self):
        # regression test for int weights
        x = np.array([[0.0, 0.0, 0.0],
                      [1.0, 0.0, 0.0],
                      [0.0, 1.0, 0.0],
                      [1.0, 1.0, 1.0]])
        dist1 = pdist(x, metric='wminkowski', w=np.arange(3), p=1)
        dist2 = pdist(x, metric='wminkowski', w=[0., 1., 2.], p=1)
        assert_allclose(dist1, dist2, rtol=1e-14)

    def test_pdist_mahalanobis(self):
        # 1-dimensional observations
        x = np.array([2.0, 2.0, 3.0, 5.0]).reshape(-1, 1)
        dist = pdist(x, metric='mahalanobis')
        assert_allclose(dist, [0.0, np.sqrt(0.5), np.sqrt(4.5),
                               np.sqrt(0.5), np.sqrt(4.5), np.sqrt(2.0)])

        # 2-dimensional observations
        x = np.array([[0, 0], [-1, 0], [0, 2], [1, 0], [0, -2]])
        dist = pdist(x, metric='mahalanobis')
        rt2 = np.sqrt(2)
        assert_allclose(dist, [rt2, rt2, rt2, rt2, 2, 2*rt2, 2, 2, 2*rt2, 2])

        # Too few observations
        assert_raises(ValueError,
                      pdist, [[0, 1], [2, 3]], metric='mahalanobis')

    def test_pdist_hamming_random(self):
        eps = 1e-07
        X = eo['pdist-boolean-inp']
        Y_right = eo['pdist-hamming']
        Y_test1 = pdist(X, 'hamming')
        _assert_within_tol(Y_test1, Y_right, eps)

    def test_pdist_hamming_random_float32(self):
        eps = 1e-07
        X = np.float32(eo['pdist-boolean-inp'])
        Y_right = eo['pdist-hamming']
        Y_test1 = pdist(X, 'hamming')
        _assert_within_tol(Y_test1, Y_right, eps)

    def test_pdist_hamming_random_nonC(self):
        eps = 1e-07
        X = eo['pdist-boolean-inp']
        Y_right = eo['pdist-hamming']
        Y_test2 = pdist(X, 'test_hamming')
        _assert_within_tol(Y_test2, Y_right, eps)

    def test_pdist_dhamming_random(self):
        eps = 1e-07
        X = np.float64(eo['pdist-boolean-inp'])
        Y_right = eo['pdist-hamming']
        Y_test1 = pdist(X, 'hamming')
        _assert_within_tol(Y_test1, Y_right, eps)

    def test_pdist_dhamming_random_float32(self):
        eps = 1e-07
        X = np.float32(eo['pdist-boolean-inp'])
        Y_right = eo['pdist-hamming']
        Y_test1 = pdist(X, 'hamming')
        _assert_within_tol(Y_test1, Y_right, eps)

    def test_pdist_dhamming_random_nonC(self):
        eps = 1e-07
        X = np.float64(eo['pdist-boolean-inp'])
        Y_right = eo['pdist-hamming']
        Y_test2 = pdist(X, 'test_hamming')
        _assert_within_tol(Y_test2, Y_right, eps)

    def test_pdist_jaccard_random(self):
        eps = 1e-08
        X = eo['pdist-boolean-inp']
        Y_right = eo['pdist-jaccard']
        Y_test1 = pdist(X, 'jaccard')
        _assert_within_tol(Y_test1, Y_right, eps)

    def test_pdist_jaccard_random_float32(self):
        eps = 1e-08
        X = np.float32(eo['pdist-boolean-inp'])
        Y_right = eo['pdist-jaccard']
        Y_test1 = pdist(X, 'jaccard')
        _assert_within_tol(Y_test1, Y_right, eps)

    def test_pdist_jaccard_random_nonC(self):
        eps = 1e-08
        X = eo['pdist-boolean-inp']
        Y_right = eo['pdist-jaccard']
        Y_test2 = pdist(X, 'test_jaccard')
        _assert_within_tol(Y_test2, Y_right, eps)

    def test_pdist_djaccard_random(self):
        eps = 1e-08
        X = np.float64(eo['pdist-boolean-inp'])
        Y_right = eo['pdist-jaccard']
        Y_test1 = pdist(X, 'jaccard')
        _assert_within_tol(Y_test1, Y_right, eps)

    def test_pdist_djaccard_random_float32(self):
        eps = 1e-08
        X = np.float32(eo['pdist-boolean-inp'])
        Y_right = eo['pdist-jaccard']
        Y_test1 = pdist(X, 'jaccard')
        _assert_within_tol(Y_test1, Y_right, eps)

    def test_pdist_djaccard_random_nonC(self):
        eps = 1e-08
        X = np.float64(eo['pdist-boolean-inp'])
        Y_right = eo['pdist-jaccard']
        Y_test2 = pdist(X, 'test_jaccard')
        _assert_within_tol(Y_test2, Y_right, eps)

    def test_pdist_chebychev_random(self):
        eps = 1e-08
        X = eo['pdist-double-inp']
        Y_right = eo['pdist-chebychev']
        Y_test1 = pdist(X, 'chebychev')
        _assert_within_tol(Y_test1, Y_right, eps)

    def test_pdist_chebychev_random_float32(self):
        eps = 1e-07
        X = np.float32(eo['pdist-double-inp'])
        Y_right = eo['pdist-chebychev']
        Y_test1 = pdist(X, 'chebychev')
        _assert_within_tol(Y_test1, Y_right, eps, verbose > 2)

    def test_pdist_chebychev_random_nonC(self):
        eps = 1e-08
        X = eo['pdist-double-inp']
        Y_right = eo['pdist-chebychev']
        Y_test2 = pdist(X, 'test_chebychev')
        _assert_within_tol(Y_test2, Y_right, eps)

    def test_pdist_chebychev_iris(self):
        eps = 1e-15
        X = eo['iris']
        Y_right = eo['pdist-chebychev-iris']
        Y_test1 = pdist(X, 'chebychev')
        _assert_within_tol(Y_test1, Y_right, eps)

    def test_pdist_chebychev_iris_float32(self):
        eps = 1e-06
        X = np.float32(eo['iris'])
        Y_right = eo['pdist-chebychev-iris']
        Y_test1 = pdist(X, 'chebychev')
        _assert_within_tol(Y_test1, Y_right, eps, verbose > 2)

    def test_pdist_chebychev_iris_nonC(self):
        eps = 1e-15
        X = eo['iris']
        Y_right = eo['pdist-chebychev-iris']
        Y_test2 = pdist(X, 'test_chebychev')
        _assert_within_tol(Y_test2, Y_right, eps)

    def test_pdist_matching_mtica1(self):
        # Test matching(*,*) with mtica example #1 (nums).
        m = matching(np.array([1, 0, 1, 1, 0]),
                     np.array([1, 1, 0, 1, 1]))
        m2 = matching(np.array([1, 0, 1, 1, 0], dtype=bool),
                      np.array([1, 1, 0, 1, 1], dtype=bool))
        assert_allclose(m, 0.6, rtol=0, atol=1e-10)
        assert_allclose(m2, 0.6, rtol=0, atol=1e-10)

    def test_pdist_matching_mtica2(self):
        # Test matching(*,*) with mtica example #2.
        m = matching(np.array([1, 0, 1]),
                     np.array([1, 1, 0]))
        m2 = matching(np.array([1, 0, 1], dtype=bool),
                      np.array([1, 1, 0], dtype=bool))
        assert_allclose(m, 2/3, rtol=0, atol=1e-10)
        assert_allclose(m2, 2/3, rtol=0, atol=1e-10)

    def test_pdist_matching_match(self):
        # Test pdist(X, 'matching') to see if the two implementations match on
        # random boolean input data.
        D = eo['random-bool-data']
        B = np.bool_(D)
        if verbose > 2:
            print(B.shape, B.dtype)
        eps = 1e-10
        y1 = pdist(B, "matching")
        y2 = pdist(B, "test_matching")
        y3 = pdist(D, "test_matching")
        if verbose > 2:
            print(np.abs(y1-y2).max())
            print(np.abs(y1-y3).max())
        _assert_within_tol(y1, y2, eps)
        _assert_within_tol(y2, y3, eps)

    def test_pdist_jaccard_mtica1(self):
        m = jaccard(np.array([1, 0, 1, 1, 0]),
                    np.array([1, 1, 0, 1, 1]))
        m2 = jaccard(np.array([1, 0, 1, 1, 0], dtype=bool),
                     np.array([1, 1, 0, 1, 1], dtype=bool))
        assert_allclose(m, 0.6, rtol=0, atol=1e-10)
        assert_allclose(m2, 0.6, rtol=0, atol=1e-10)

    def test_pdist_jaccard_mtica2(self):
        m = jaccard(np.array([1, 0, 1]),
                    np.array([1, 1, 0]))
        m2 = jaccard(np.array([1, 0, 1], dtype=bool),
                     np.array([1, 1, 0], dtype=bool))
        assert_allclose(m, 2/3, rtol=0, atol=1e-10)
        assert_allclose(m2, 2/3, rtol=0, atol=1e-10)

    def test_pdist_jaccard_match(self):
        # Test pdist(X, 'jaccard') to see if the two implementations match on
        # random double input data.
        D = eo['random-bool-data']
        if verbose > 2:
            print(D.shape, D.dtype)
        eps = 1e-10
        y1 = pdist(D, "jaccard")
        y2 = pdist(D, "test_jaccard")
        y3 = pdist(np.bool_(D), "test_jaccard")
        if verbose > 2:
            print(np.abs(y1-y2).max())
            print(np.abs(y2-y3).max())
        _assert_within_tol(y1, y2, eps)
        _assert_within_tol(y2, y3, eps)

    def test_pdist_yule_mtica1(self):
        m = yule(np.array([1, 0, 1, 1, 0]),
                 np.array([1, 1, 0, 1, 1]))
        m2 = yule(np.array([1, 0, 1, 1, 0], dtype=bool),
                  np.array([1, 1, 0, 1, 1], dtype=bool))
        if verbose > 2:
            print(m)
        assert_allclose(m, 2, rtol=0, atol=1e-10)
        assert_allclose(m2, 2, rtol=0, atol=1e-10)

    def test_pdist_yule_mtica2(self):
        m = yule(np.array([1, 0, 1]),
                 np.array([1, 1, 0]))
        m2 = yule(np.array([1, 0, 1], dtype=bool),
                  np.array([1, 1, 0], dtype=bool))
        if verbose > 2:
            print(m)
        assert_allclose(m, 2, rtol=0, atol=1e-10)
        assert_allclose(m2, 2, rtol=0, atol=1e-10)

    def test_pdist_yule_match(self):
        D = eo['random-bool-data']
        if verbose > 2:
            print(D.shape, D.dtype)
        eps = 1e-10
        y1 = pdist(D, "yule")
        y2 = pdist(D, "test_yule")
        y3 = pdist(np.bool_(D), "test_yule")
        if verbose > 2:
            print(np.abs(y1-y2).max())
            print(np.abs(y2-y3).max())
        _assert_within_tol(y1, y2, eps)
        _assert_within_tol(y2, y3, eps)

    def test_pdist_dice_mtica1(self):
        m = dice(np.array([1, 0, 1, 1, 0]),
                 np.array([1, 1, 0, 1, 1]))
        m2 = dice(np.array([1, 0, 1, 1, 0], dtype=bool),
                  np.array([1, 1, 0, 1, 1], dtype=bool))
        if verbose > 2:
            print(m)
        assert_allclose(m, 3/7, rtol=0, atol=1e-10)
        assert_allclose(m2, 3/7, rtol=0, atol=1e-10)

    def test_pdist_dice_mtica2(self):
        m = dice(np.array([1, 0, 1]),
                 np.array([1, 1, 0]))
        m2 = dice(np.array([1, 0, 1], dtype=bool),
                  np.array([1, 1, 0], dtype=bool))
        if verbose > 2:
            print(m)
        assert_allclose(m, 0.5, rtol=0, atol=1e-10)
        assert_allclose(m2, 0.5, rtol=0, atol=1e-10)

    def test_pdist_dice_match(self):
        D = eo['random-bool-data']
        if verbose > 2:
            print(D.shape, D.dtype)
        eps = 1e-10
        y1 = pdist(D, "dice")
        y2 = pdist(D, "test_dice")
        y3 = pdist(D, "test_dice")
        if verbose > 2:
            print(np.abs(y1-y2).max())
            print(np.abs(y2-y3).max())
        _assert_within_tol(y1, y2, eps)
        _assert_within_tol(y2, y3, eps)

    def test_pdist_sokalsneath_mtica1(self):
        m = sokalsneath(np.array([1, 0, 1, 1, 0]),
                        np.array([1, 1, 0, 1, 1]))
        m2 = sokalsneath(np.array([1, 0, 1, 1, 0], dtype=bool),
                         np.array([1, 1, 0, 1, 1], dtype=bool))
        if verbose > 2:
            print(m)
        assert_allclose(m, 3/4, rtol=0, atol=1e-10)
        assert_allclose(m2, 3/4, rtol=0, atol=1e-10)

    def test_pdist_sokalsneath_mtica2(self):
        m = sokalsneath(np.array([1, 0, 1]),
                        np.array([1, 1, 0]))
        m2 = sokalsneath(np.array([1, 0, 1], dtype=bool),
                         np.array([1, 1, 0], dtype=bool))
        if verbose > 2:
            print(m)
        assert_allclose(m, 4/5, rtol=0, atol=1e-10)
        assert_allclose(m2, 4/5, rtol=0, atol=1e-10)

    def test_pdist_sokalsneath_match(self):
        D = eo['random-bool-data']
        if verbose > 2:
            print(D.shape, D.dtype)
        eps = 1e-10
        y1 = pdist(D, "sokalsneath")
        y2 = pdist(D, "test_sokalsneath")
        y3 = pdist(np.bool_(D), "test_sokalsneath")
        if verbose > 2:
            print(np.abs(y1-y2).max())
            print(np.abs(y2-y3).max())
        _assert_within_tol(y1, y2, eps)
        _assert_within_tol(y2, y3, eps)

    def test_pdist_rogerstanimoto_mtica1(self):
        m = rogerstanimoto(np.array([1, 0, 1, 1, 0]),
                           np.array([1, 1, 0, 1, 1]))
        m2 = rogerstanimoto(np.array([1, 0, 1, 1, 0], dtype=bool),
                            np.array([1, 1, 0, 1, 1], dtype=bool))
        if verbose > 2:
            print(m)
        assert_allclose(m, 3/4, rtol=0, atol=1e-10)
        assert_allclose(m2, 3/4, rtol=0, atol=1e-10)

    def test_pdist_rogerstanimoto_mtica2(self):
        m = rogerstanimoto(np.array([1, 0, 1]),
                           np.array([1, 1, 0]))
        m2 = rogerstanimoto(np.array([1, 0, 1], dtype=bool),
                            np.array([1, 1, 0], dtype=bool))
        if verbose > 2:
            print(m)
        assert_allclose(m, 4/5, rtol=0, atol=1e-10)
        assert_allclose(m2, 4/5, rtol=0, atol=1e-10)

    def test_pdist_rogerstanimoto_match(self):
        D = eo['random-bool-data']
        if verbose > 2:
            print(D.shape, D.dtype)
        eps = 1e-10
        y1 = pdist(D, "rogerstanimoto")
        y2 = pdist(D, "test_rogerstanimoto")
        y3 = pdist(np.bool_(D), "test_rogerstanimoto")
        if verbose > 2:
            print(np.abs(y1-y2).max())
            print(np.abs(y2-y3).max())
        _assert_within_tol(y1, y2, eps)
        _assert_within_tol(y2, y3, eps)

    def test_pdist_russellrao_mtica1(self):
        m = russellrao(np.array([1, 0, 1, 1, 0]),
                       np.array([1, 1, 0, 1, 1]))
        m2 = russellrao(np.array([1, 0, 1, 1, 0], dtype=bool),
                        np.array([1, 1, 0, 1, 1], dtype=bool))
        if verbose > 2:
            print(m)
        assert_allclose(m, 3/5, rtol=0, atol=1e-10)
        assert_allclose(m2, 3/5, rtol=0, atol=1e-10)

    def test_pdist_russellrao_mtica2(self):
        m = russellrao(np.array([1, 0, 1]),
                       np.array([1, 1, 0]))
        m2 = russellrao(np.array([1, 0, 1], dtype=bool),
                        np.array([1, 1, 0], dtype=bool))
        if verbose > 2:
            print(m)
        assert_allclose(m, 2/3, rtol=0, atol=1e-10)
        assert_allclose(m2, 2/3, rtol=0, atol=1e-10)

    def test_pdist_russellrao_match(self):
        D = eo['random-bool-data']
        if verbose > 2:
            print(D.shape, D.dtype)
        eps = 1e-10
        y1 = pdist(D, "russellrao")
        y2 = pdist(D, "test_russellrao")
        y3 = pdist(np.bool_(D), "test_russellrao")
        if verbose > 2:
            print(np.abs(y1-y2).max())
            print(np.abs(y2-y3).max())
        _assert_within_tol(y1, y2, eps)
        _assert_within_tol(y2, y3, eps)

    def test_pdist_sokalmichener_match(self):
        D = eo['random-bool-data']
        if verbose > 2:
            print(D.shape, D.dtype)
        eps = 1e-10
        y1 = pdist(D, "sokalmichener")
        y2 = pdist(D, "test_sokalmichener")
        y3 = pdist(np.bool_(D), "test_sokalmichener")
        if verbose > 2:
            print(np.abs(y1-y2).max())
            print(np.abs(y2-y3).max())
        _assert_within_tol(y1, y2, eps)
        _assert_within_tol(y2, y3, eps)

    def test_pdist_kulsinski_match(self):
        D = eo['random-bool-data']
        if verbose > 2:
            print(D.shape, D.dtype)
        eps = 1e-10
        y1 = pdist(D, "kulsinski")
        y2 = pdist(D, "test_kulsinski")
        y3 = pdist(np.bool_(D), "test_kulsinski")
        _assert_within_tol(y1, y2, eps, verbose > 2)
        _assert_within_tol(y2, y3, eps)

    def test_pdist_canberra_match(self):
        D = eo['iris']
        if verbose > 2:
            print(D.shape, D.dtype)
        eps = 1e-10
        y1 = pdist(D, "canberra")
        y2 = pdist(D, "test_canberra")
        _assert_within_tol(y1, y2, eps, verbose > 2)

    def test_pdist_canberra_ticket_711(self):
        # Test pdist(X, 'canberra') to see if Canberra gives the right result
        # as reported on gh-1238.
        eps = 1e-8
        pdist_y = pdist(([3.3], [3.4]), "canberra")
        right_y = 0.01492537
        _assert_within_tol(pdist_y, right_y, eps, verbose > 2)


def within_tol(a, b, tol):
    return np.abs(a - b).max() < tol


def _assert_within_tol(a, b, atol, verbose_=False):
    if verbose_:
        print(np.abs(a-b).max())
    assert_allclose(a, b, rtol=0, atol=atol)


class TestSomeDistanceFunctions(TestCase):

    def setUp(self):
        # 1D arrays
        x = np.array([1.0, 2.0, 3.0])
        y = np.array([1.0, 1.0, 5.0])
        # 3x1 arrays
        x31 = x[:,np.newaxis]
        y31 = y[:,np.newaxis]
        # 1x3 arrays
        x13 = x31.T
        y13 = y31.T

        self.cases = [(x,y), (x31, y31), (x13, y13)]

    def test_minkowski(self):
        for x, y in self.cases:
            dist1 = minkowski(x, y, p=1)
            assert_almost_equal(dist1, 3.0)
            dist1p5 = minkowski(x, y, p=1.5)
            assert_almost_equal(dist1p5, (1.0+2.0**1.5)**(2./3))
            dist2 = minkowski(x, y, p=2)
            assert_almost_equal(dist2, np.sqrt(5))

    def test_wminkowski(self):
        w = np.array([1.0, 2.0, 0.5])
        for x, y in self.cases:
            dist1 = wminkowski(x, y, p=1, w=w)
            assert_almost_equal(dist1, 3.0)
            dist1p5 = wminkowski(x, y, p=1.5, w=w)
            assert_almost_equal(dist1p5, (2.0**1.5+1.0)**(2./3))
            dist2 = wminkowski(x, y, p=2, w=w)
            assert_almost_equal(dist2, np.sqrt(5))

    def test_euclidean(self):
        for x, y in self.cases:
            dist = euclidean(x, y)
            assert_almost_equal(dist, np.sqrt(5))

    def test_sqeuclidean(self):
        for x, y in self.cases:
            dist = sqeuclidean(x, y)
            assert_almost_equal(dist, 5.0)

    def test_cosine(self):
        for x, y in self.cases:
            dist = cosine(x, y)
            assert_almost_equal(dist, 1.0 - 18.0/(np.sqrt(14)*np.sqrt(27)))

    def test_correlation(self):
        xm = np.array([-1.0, 0, 1.0])
        ym = np.array([-4.0/3, -4.0/3, 5.0-7.0/3])
        for x, y in self.cases:
            dist = correlation(x, y)
            assert_almost_equal(dist, 1.0 - np.dot(xm, ym)/(norm(xm)*norm(ym)))

    def test_mahalanobis(self):
        x = np.array([1.0, 2.0, 3.0])
        y = np.array([1.0, 1.0, 5.0])
        vi = np.array([[2.0, 1.0, 0.0],[1.0, 2.0, 1.0], [0.0, 1.0, 2.0]])
        for x, y in self.cases:
            dist = mahalanobis(x, y, vi)
            assert_almost_equal(dist, np.sqrt(6.0))


class TestSquareForm(TestCase):

    def test_squareform_empty_matrix(self):
        A = np.zeros((0,0))
        rA = squareform(np.array(A, dtype='double'))
        assert_equal(rA.shape, (0,))

    def test_squareform_empty_vector(self):
        v = np.zeros((0,))
        rv = squareform(np.array(v, dtype='double'))
        assert_equal(rv.shape, (1,1))
        assert_equal(rv[0, 0], 0)

    def test_squareform_1by1_matrix(self):
        A = np.zeros((1,1))
        rA = squareform(np.array(A, dtype='double'))
        assert_equal(rA.shape, (0,))

    def test_squareform_one_vector(self):
        v = np.ones((1,)) * 8.3
        rv = squareform(np.array(v, dtype='double'))
        assert_equal(rv.shape, (2,2))
        assert_equal(rv[0,1], 8.3)
        assert_equal(rv[1,0], 8.3)

    def test_squareform_one_binary_vector(self):
        # Tests squareform on a 1x1 binary matrix; conversion to double was
        # causing problems (see pull request 73).
        v = np.ones((1,), dtype=bool)
        rv = squareform(v)
        assert_equal(rv.shape, (2,2))
        assert_(rv[0,1])

    def test_squareform_2by2_matrix(self):
        A = np.zeros((2,2))
        A[0,1] = 0.8
        A[1,0] = 0.8
        rA = squareform(np.array(A, dtype='double'))
        assert_equal(rA.shape, (1,))
        assert_equal(rA[0], 0.8)

    def test_squareform_multi_matrix(self):
        for n in xrange(2, 5):
            yield self.check_squareform_multi_matrix(n)

    def check_squareform_multi_matrix(self, n):
        X = np.random.rand(n, 4)
        Y = pdist(X)
        assert_equal(len(Y.shape), 1)
        A = squareform(Y)
        Yr = squareform(A)
        s = A.shape
        k = 0
        if verbose >= 3:
            print(A.shape, Y.shape, Yr.shape)
        assert_equal(len(s), 2)
        assert_equal(len(Yr.shape), 1)
        assert_equal(s[0], s[1])
        for i in xrange(0, s[0]):
            for j in xrange(i+1, s[1]):
                if i != j:
                    assert_equal(A[i, j], Y[k])
                    k += 1
                else:
                    assert_equal(A[i, j], 0)


class TestNumObsY(TestCase):

    def test_num_obs_y_multi_matrix(self):
        for n in xrange(2, 10):
            X = np.random.rand(n, 4)
            Y = pdist(X)
            assert_equal(num_obs_y(Y), n)

    def test_num_obs_y_1(self):
        # Tests num_obs_y(y) on a condensed distance matrix over 1
        # observations. Expecting exception.
        assert_raises(ValueError, self.check_y, 1)

    def test_num_obs_y_2(self):
        # Tests num_obs_y(y) on a condensed distance matrix over 2
        # observations.
        assert_(self.check_y(2))

    def test_num_obs_y_3(self):
        assert_(self.check_y(3))

    def test_num_obs_y_4(self):
        assert_(self.check_y(4))

    def test_num_obs_y_5_10(self):
        for i in xrange(5, 16):
            self.minit(i)

    def test_num_obs_y_2_100(self):
        # Tests num_obs_y(y) on 100 improper condensed distance matrices.
        # Expecting exception.
        a = set([])
        for n in xrange(2, 16):
            a.add(n*(n-1)/2)
        for i in xrange(5, 105):
            if i not in a:
                assert_raises(ValueError, self.bad_y, i)

    def minit(self, n):
        assert_(self.check_y(n))

    def bad_y(self, n):
        y = np.random.rand(n)
        return num_obs_y(y)

    def check_y(self, n):
        return num_obs_y(self.make_y(n)) == n

    def make_y(self, n):
        return np.random.rand((n * (n - 1)) // 2)


class TestNumObsDM(TestCase):

    def test_num_obs_dm_multi_matrix(self):
        for n in xrange(1, 10):
            X = np.random.rand(n, 4)
            Y = pdist(X)
            A = squareform(Y)
            if verbose >= 3:
                print(A.shape, Y.shape)
            assert_equal(num_obs_dm(A), n)

    def test_num_obs_dm_0(self):
        # Tests num_obs_dm(D) on a 0x0 distance matrix. Expecting exception.
        assert_(self.check_D(0))

    def test_num_obs_dm_1(self):
        # Tests num_obs_dm(D) on a 1x1 distance matrix.
        assert_(self.check_D(1))

    def test_num_obs_dm_2(self):
        assert_(self.check_D(2))

    def test_num_obs_dm_3(self):
        assert_(self.check_D(2))

    def test_num_obs_dm_4(self):
        assert_(self.check_D(4))

    def check_D(self, n):
        return num_obs_dm(self.make_D(n)) == n

    def make_D(self, n):
        return np.random.rand(n, n)


def is_valid_dm_throw(D):
    return is_valid_dm(D, throw=True)


class TestIsValidDM(TestCase):

    def test_is_valid_dm_int16_array_E(self):
        # Tests is_valid_dm(*) on an int16 array. Exception expected.
        D = np.zeros((5, 5), dtype='i')
        assert_raises(TypeError, is_valid_dm_throw, (D))

    def test_is_valid_dm_int16_array_F(self):
        D = np.zeros((5, 5), dtype='i')
        assert_equal(is_valid_dm(D), False)

    def test_is_valid_dm_improper_shape_1D_E(self):
        D = np.zeros((5,), dtype=np.double)
        assert_raises(ValueError, is_valid_dm_throw, (D))

    def test_is_valid_dm_improper_shape_1D_F(self):
        D = np.zeros((5,), dtype=np.double)
        assert_equal(is_valid_dm(D), False)

    def test_is_valid_dm_improper_shape_3D_E(self):
        D = np.zeros((3,3,3), dtype=np.double)
        assert_raises(ValueError, is_valid_dm_throw, (D))

    def test_is_valid_dm_improper_shape_3D_F(self):
        D = np.zeros((3,3,3), dtype=np.double)
        assert_equal(is_valid_dm(D), False)

    def test_is_valid_dm_nonzero_diagonal_E(self):
        y = np.random.rand(10)
        D = squareform(y)
        for i in xrange(0, 5):
            D[i, i] = 2.0
        assert_raises(ValueError, is_valid_dm_throw, (D))

    def test_is_valid_dm_nonzero_diagonal_F(self):
        y = np.random.rand(10)
        D = squareform(y)
        for i in xrange(0, 5):
            D[i, i] = 2.0
        assert_equal(is_valid_dm(D), False)

    def test_is_valid_dm_asymmetric_E(self):
        y = np.random.rand(10)
        D = squareform(y)
        D[1,3] = D[3,1] + 1
        assert_raises(ValueError, is_valid_dm_throw, (D))

    def test_is_valid_dm_asymmetric_F(self):
        y = np.random.rand(10)
        D = squareform(y)
        D[1,3] = D[3,1] + 1
        assert_equal(is_valid_dm(D), False)

    def test_is_valid_dm_correct_1_by_1(self):
        D = np.zeros((1,1), dtype=np.double)
        assert_equal(is_valid_dm(D), True)

    def test_is_valid_dm_correct_2_by_2(self):
        y = np.random.rand(1)
        D = squareform(y)
        assert_equal(is_valid_dm(D), True)

    def test_is_valid_dm_correct_3_by_3(self):
        y = np.random.rand(3)
        D = squareform(y)
        assert_equal(is_valid_dm(D), True)

    def test_is_valid_dm_correct_4_by_4(self):
        y = np.random.rand(6)
        D = squareform(y)
        assert_equal(is_valid_dm(D), True)

    def test_is_valid_dm_correct_5_by_5(self):
        y = np.random.rand(10)
        D = squareform(y)
        assert_equal(is_valid_dm(D), True)


def is_valid_y_throw(y):
    return is_valid_y(y, throw=True)


class TestIsValidY(TestCase):
    # If test case name ends on "_E" then an exception is expected for the
    # given input, if it ends in "_F" then False is expected for the is_valid_y
    # check.  Otherwise the input is expected to be valid.

    def test_is_valid_y_int16_array_E(self):
        y = np.zeros((10,), dtype='i')
        assert_raises(TypeError, is_valid_y_throw, (y))

    def test_is_valid_y_int16_array_F(self):
        y = np.zeros((10,), dtype='i')
        assert_equal(is_valid_y(y), False)

    def test_is_valid_y_improper_shape_2D_E(self):
        y = np.zeros((3,3,), dtype=np.double)
        assert_raises(ValueError, is_valid_y_throw, (y))

    def test_is_valid_y_improper_shape_2D_F(self):
        y = np.zeros((3,3,), dtype=np.double)
        assert_equal(is_valid_y(y), False)

    def test_is_valid_y_improper_shape_3D_E(self):
        y = np.zeros((3,3,3), dtype=np.double)
        assert_raises(ValueError, is_valid_y_throw, (y))

    def test_is_valid_y_improper_shape_3D_F(self):
        y = np.zeros((3,3,3), dtype=np.double)
        assert_equal(is_valid_y(y), False)

    def test_is_valid_y_correct_2_by_2(self):
        y = self.correct_n_by_n(2)
        assert_equal(is_valid_y(y), True)

    def test_is_valid_y_correct_3_by_3(self):
        y = self.correct_n_by_n(3)
        assert_equal(is_valid_y(y), True)

    def test_is_valid_y_correct_4_by_4(self):
        y = self.correct_n_by_n(4)
        assert_equal(is_valid_y(y), True)

    def test_is_valid_y_correct_5_by_5(self):
        y = self.correct_n_by_n(5)
        assert_equal(is_valid_y(y), True)

    def test_is_valid_y_2_100(self):
        a = set([])
        for n in xrange(2, 16):
            a.add(n*(n-1)/2)
        for i in xrange(5, 105):
            if i not in a:
                assert_raises(ValueError, self.bad_y, i)

    def bad_y(self, n):
        y = np.random.rand(n)
        return is_valid_y(y, throw=True)

    def correct_n_by_n(self, n):
        y = np.random.rand((n * (n - 1)) // 2)
        return y


def test_bad_p():
    # Raise ValueError if p < 1.
    p = 0.5
    assert_raises(ValueError, minkowski, [1, 2], [3, 4], p)
    assert_raises(ValueError, wminkowski, [1, 2], [3, 4], p, [1, 1])


def test_sokalsneath_all_false():
    # Regression test for ticket #876
    assert_raises(ValueError, sokalsneath, [False, False, False], [False, False, False])


def test_canberra():
    # Regression test for ticket #1430.
    assert_equal(canberra([1,2,3], [2,4,6]), 1)
    assert_equal(canberra([1,1,0,0], [1,0,1,0]), 2)


def test_braycurtis():
    # Regression test for ticket #1430.
    assert_almost_equal(braycurtis([1,2,3], [2,4,6]), 1./3, decimal=15)
    assert_almost_equal(braycurtis([1,1,0,0], [1,0,1,0]), 0.5, decimal=15)


def test_euclideans():
    # Regression test for ticket #1328.
    x1 = np.array([1, 1, 1])
    x2 = np.array([0, 0, 0])

    # Basic test of the calculation.
    assert_almost_equal(sqeuclidean(x1, x2), 3.0, decimal=14)
    assert_almost_equal(euclidean(x1, x2), np.sqrt(3), decimal=14)

    # Check flattening for (1, N) or (N, 1) inputs
    assert_almost_equal(euclidean(x1[np.newaxis, :], x2[np.newaxis, :]),
                        np.sqrt(3), decimal=14)
    assert_almost_equal(sqeuclidean(x1[np.newaxis, :], x2[np.newaxis, :]),
                        3.0, decimal=14)
    assert_almost_equal(sqeuclidean(x1[:, np.newaxis], x2[:, np.newaxis]),
                        3.0, decimal=14)

    # Distance metrics only defined for vectors (= 1-D)
    x = np.arange(4).reshape(2, 2)
    assert_raises(ValueError, euclidean, x, x)
    assert_raises(ValueError, sqeuclidean, x, x)

    # Another check, with random data.
    rs = np.random.RandomState(1234567890)
    x = rs.rand(10)
    y = rs.rand(10)
    d1 = euclidean(x, y)
    d2 = sqeuclidean(x, y)
    assert_almost_equal(d1**2, d2, decimal=14)


def test_hamming_unequal_length():
    # Regression test for gh-4290.
    x = [0, 0, 1]
    y = [1, 0, 1, 0]
    # Used to give an AttributeError from ndarray.mean called on bool
    assert_raises(ValueError, hamming, x, y)


def test_hamming_string_array():
    # https://github.com/scikit-learn/scikit-learn/issues/4014
    a = np.array(['eggs', 'spam', 'spam', 'eggs', 'spam', 'spam', 'spam',
                  'spam', 'spam', 'spam', 'spam', 'eggs', 'eggs', 'spam',
                  'eggs', 'eggs', 'eggs', 'eggs', 'eggs', 'spam'],
                  dtype='|S4')
    b = np.array(['eggs', 'spam', 'spam', 'eggs', 'eggs', 'spam', 'spam',
                  'spam', 'spam', 'eggs', 'spam', 'eggs', 'spam', 'eggs',
                  'spam', 'spam', 'eggs', 'spam', 'spam', 'eggs'],
                  dtype='|S4')
    desired = 0.45
    assert_allclose(hamming(a, b), desired)


def test_sqeuclidean_dtypes():
    # Assert that sqeuclidean returns the right types of values.
    # Integer types should be converted to floating for stability.
    # Floating point types should be the same as the input.
    x = [1, 2, 3]
    y = [4, 5, 6]

    for dtype in [np.int8, np.int16, np.int32, np.int64]:
        d = sqeuclidean(np.asarray(x, dtype=dtype), np.asarray(y, dtype=dtype))
        assert_(np.issubdtype(d.dtype, np.floating))

    for dtype in [np.uint8, np.uint16, np.uint32, np.uint64]:
        d1 = sqeuclidean([0], np.asarray([-1], dtype=dtype))
        d2 = sqeuclidean(np.asarray([-1], dtype=dtype), [0])

        assert_equal(d1, d2)
        assert_equal(d1, np.float64(np.iinfo(dtype).max) ** 2)

    dtypes = [np.float32, np.float64, np.complex64, np.complex128]
    for dtype in ['float16', 'float128']:
        # These aren't present in older numpy versions; float128 may also not
        # be present on all platforms.
        if hasattr(np, dtype):
            dtypes.append(getattr(np, dtype))

    for dtype in dtypes:
        d = sqeuclidean(np.asarray(x, dtype=dtype), np.asarray(y, dtype=dtype))
        assert_equal(d.dtype, dtype)


def test_sokalmichener():
    # Test that sokalmichener has the same result for bool and int inputs.
    p = [True, True, False]
    q = [True, False, True]
    x = [int(b) for b in p]
    y = [int(b) for b in q]
    dist1 = sokalmichener(p, q)
    dist2 = sokalmichener(x, y)
    # These should be exactly the same.
    assert_equal(dist1, dist2)


def test__validate_vector():
    x = [1, 2, 3]
    y = _validate_vector(x)
    assert_array_equal(y, x)

    y = _validate_vector(x, dtype=np.float64)
    assert_array_equal(y, x)
    assert_equal(y.dtype, np.float64)

    x = [1]
    y = _validate_vector(x)
    assert_equal(y.ndim, 1)
    assert_equal(y, x)

    x = 1
    y = _validate_vector(x)
    assert_equal(y.ndim, 1)
    assert_equal(y, [x])

    x = np.arange(5).reshape(1, -1, 1)
    y = _validate_vector(x)
    assert_equal(y.ndim, 1)
    assert_array_equal(y, x[0, :, 0])

    x = [[1, 2], [3, 4]]
    assert_raises(ValueError, _validate_vector, x)


if __name__ == "__main__":
    run_module_suite()