Source code for tmap.tda.metric
# -*- coding: utf-8 -*-
import numpy as np
from scipy.spatial import distance
# supported / allowed metrics
_METRIC_ALLOWED = ["braycurtis", "canberra", "chebyshev", "cityblock", "correlation", "cosine", "dice", "euclidean",
"hamming", "jaccard", "kulsinski", "mahalanobis", "matching", "minkowski", "rogerstanimoto",
"russellrao", "seuclidean", "sokalmichener", "sokalsneath", "sqeuclidean", "yule",
"precomputed"]
[docs]class Metric(object):
"""
``metric + data -> distance matrix``
Define a distance metric and transform data points into a distance matrix.
:param str metric: `metric` specified a distance metric.
For example:
* cosine
* euclidean
* hamming
* minkowski
* precomputed: for precomputed distance matrix.
"""
def __init__(self, metric="euclidean"):
if metric not in _METRIC_ALLOWED:
raise Exception("The metric is not allowed: %s." % metric)
self.name = metric
[docs] def fit_transform(self, data):
"""
Create and return a distance matrix based on the specified metric.
:param np.ndarray/pd.DataFrame data: `data`: raw data or precomputed distance matrix.
"""
if data is None:
raise Exception("Data must not be None.")
if type(data) is not np.ndarray:
data = np.array(data)
if self.name == "precomputed":
# data is a precomputed distance matrix
# to check the data is a valid distance matrix?
return data
# todo: the pdist may be too slow, to speed up for big data...
dist_vec = distance.pdist(data, metric=self.name)
dist_matrix = distance.squareform(dist_vec)
return dist_matrix