Source code for tmap.tda.cover

from __future__ import print_function
import numpy as np
import itertools


[docs]class Cover(object): """ Covering the projection data :param numpy.ndarray/pandas.DataFrame projected_data: Normally, `projected_data` should be the data transformed by *MDS* or *t-SNE* or *PCA*. It decides the way of partition for the original point cloud. :param integer resolution: It decides the number of partition of each axis at ``projected_data`` :param float overlap: `overlap` must greater than 0. It decides the level of the expansion of each partition. If `overlap` equals to 0.5, each partition (include the first and the last) will expand 0.5 times of orginal. """ def __init__(self, projected_data, resolution=10, overlap=0.5): """ :param projected_data: the projection data used for covering :param resolution: resolution of covering :param overlap: overlap of adjacent covers """ self.resolution = resolution self.overlap = overlap if overlap <= 0: print("overlap must greater than 0") exit(1) self.n_points, self.n_dimensions = projected_data.shape self.data = projected_data # upper and lower bounds, chunk and overlap for each dimension of the projected space self.floor, self.roof = (np.min(projected_data, axis=0), np.max(projected_data, axis=0)) self.chunk_width = (self.roof - self.floor) / resolution self.overlap_width = self.chunk_width * overlap @property def hypercubes(self): """ Generate hypercubes (covering) using a generator function :return: It returns a mask for the projected_data. Each row is a list of boolean for indicating which samples are within the current cube. The row number of hypercubes represents the number of partition. :rtype: numpy.ndarray """ return self._get_hypercubes() def _get_hypercubes(self,output_bounds=False): # generate hypercube index based on the resolution parameter (how many and where the hypercube is?) bins = itertools.product(np.arange(self.resolution), repeat=self.n_dimensions) n_bins = self.resolution**self.n_dimensions hypercubes = np.zeros((n_bins, self.n_points), dtype=bool) bounds_with_overlap = [] bounds_without_overlap = [] for i, bin in enumerate(bins): lower_bound = self.floor + bin * self.chunk_width upper_bound = lower_bound + self.chunk_width + self.overlap_width lower_bound -= self.overlap_width mask = np.all((self.data >= lower_bound) & (self.data < upper_bound), axis=1) hypercubes[i, :] = mask bounds_with_overlap.append((lower_bound,upper_bound)) bounds_without_overlap.append((lower_bound + self.overlap_width,upper_bound - self.overlap_width)) if output_bounds: return bounds_with_overlap,bounds_without_overlap else: return hypercubes