Source code for mars.learn.metrics.pairwise.pairwise

# Copyright 1999-2021 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import warnings
from functools import partial

try:
    from sklearn.exceptions import DataConversionWarning
except ImportError:  # pragma: no cover
    DataConversionWarning = None

from ....tensor.spatial import distance
from ...utils.validation import check_non_negative
from .core import PairwiseDistances
from .manhattan import manhattan_distances
from .cosine import cosine_distances
from .euclidean import euclidean_distances
from .haversine import haversine_distances


_VALID_METRICS = ['euclidean', 'l2', 'l1', 'manhattan', 'cityblock',
                  'braycurtis', 'canberra', 'chebyshev', 'correlation',
                  'cosine', 'dice', 'hamming', 'jaccard', 'kulsinski',
                  'mahalanobis', 'matching', 'minkowski', 'rogerstanimoto',
                  'russellrao', 'seuclidean', 'sokalmichener',
                  'sokalsneath', 'sqeuclidean', 'yule', 'wminkowski',
                  'haversine']

# Helper functions - distance
PAIRWISE_DISTANCE_FUNCTIONS = {
    # If updating this dictionary, update the doc in both distance_metrics()
    # and also in pairwise_distances()!
    'cityblock': manhattan_distances,
    'cosine': cosine_distances,
    'euclidean': euclidean_distances,
    'haversine': haversine_distances,
    'l2': euclidean_distances,
    'l1': manhattan_distances,
    'manhattan': manhattan_distances,
    'precomputed': None,  # HACK: precomputed is always allowed, never called
}

# These distances recquire boolean tensors, when using mars.tensor.spatial.distance
PAIRWISE_BOOLEAN_FUNCTIONS = [
    'dice',
    'jaccard',
    'kulsinski',
    'matching',
    'rogerstanimoto',
    'russellrao',
    'sokalmichener',
    'sokalsneath',
    'yule',
]


[docs]def pairwise_distances(X, Y=None, metric="euclidean", **kwds): if (metric not in _VALID_METRICS and not callable(metric) and metric != "precomputed"): raise ValueError(f"Unknown metric {metric}. Valid metrics are {_VALID_METRICS}, " "or 'precomputed', or a callable") if metric == "precomputed": X, _ = PairwiseDistances.check_pairwise_arrays(X, Y, precomputed=True) whom = ("`pairwise_distances`. Precomputed distance " " need to have non-negative values.") X = check_non_negative(X, whom=whom) return X elif metric in PAIRWISE_DISTANCE_FUNCTIONS: func = PAIRWISE_DISTANCE_FUNCTIONS[metric] else: # including when metric is callable dtype = bool if metric in PAIRWISE_BOOLEAN_FUNCTIONS else None if (dtype == bool and (X.dtype != bool or (Y is not None and Y.dtype != bool)) and DataConversionWarning is not None): msg = f"Data was converted to boolean for metric {metric}" warnings.warn(msg, DataConversionWarning) X, Y = PairwiseDistances.check_pairwise_arrays(X, Y, dtype=dtype) if X is Y: return distance.squareform(distance.pdist(X, metric=metric, **kwds)) func = partial(distance.cdist, metric=metric, **kwds) return func(X, Y, **kwds)