Source code for ugtm.ugtm_landscape

"""Builds continuous GTM class maps or landscapes using labels or activities.
"""
# Authors: Helena A. Gaspar <hagax8@gmail.com>
# License: MIT

from __future__ import print_function
import numpy as np


[docs] class ClassMap(object): r"""Class for ClassMap: Bayesian classification model for each GTM node. Arguments ========= nodeClassT : array of shape (n_nodes, n_classes) Likelihood of each node :math:`k` given class :math:`C_i`: :math:`P(k|C_i) = \frac{\sum_{i_{c}}R_{i_{c},k}}{N_c}`. nodeClassP : array of shape (n_nodes, n_classes) Posterior probabilities of each class :math:`C_i` for each node :math:`k`: :math:`P(C_i|k) =\frac{P(k|C_i)P(C_i)}{\sum_i P(k|C_i)P(C_i)}` activityModel : array of shape (n_nodes,1) Class label attributed to each GTM node on the GTM node grid. Computed using argmax of posterior probabilities. uniqClasses : array of shape (n_classes,1) Unique class labels. """ def __init__(self, nodeClassP, nodeClassT, activityModel, uniqClasses): r"""Constructor of ClassMap. Parameters ========== nodeClassT : array of shape (n_nodes, n_classes) Likelihood of each node :math:`k` given class :math:`C_i`: :math:`P(k|C_i) = \frac{\sum_{i_{c}}R_{i_{c},k}}{N_c}`. nodeClassP : array of shape (n_nodes, n_classes) Posterior probabilities of each class :math:`C_i` for each node :math:`k`: :math:`P(C_i|k) =\frac{P(k|C_i)P(C_i)}{\sum_i P(k|C_i)P(C_i)}` activityModel : array of shape (n_nodes,1) Class label attributed to each GTM node on the GTM node grid. Computed using argmax of posterior probabilities. uniqClasses : array of shape (n_classes,1) Unique class labels. """ self.nodeClassP = nodeClassP self.nodeClassT = nodeClassT self.activityModel = activityModel self.uniqClasses = uniqClasses
[docs] def landscape(optimizedModel, activity): r"""Computes GTM landscapes based on activities (= continuous labels). Parameters ========== optimizedModel: an instance of :class:`~ugtm.ugtm_classes.OptimizedGTM` The optimized GTM model. activity: array of shape (n_individuals,1) Activity vector (continuous labels) associated with the data used to compute the optimized GTM model. Returns ======= array of shape (n_nodes,1) Activity landscape: associates each GTM node :math:`k` on the GTM node grid with an activity value, which is computed as an average mean of data activity values (continuous labels). If a = activities, r_k = vector of optimized GTM responsibilities for node k, and N = n_individuals: :math:`landscape_k = \frac{\mathbf{a \cdot r}_k}{\sum_i^{N}r_{ik}}` """ epsilon = 10e-8 sums = np.sum(optimizedModel.matR+epsilon, axis=0) landscape = np.dot(activity.flatten(), optimizedModel.matR+epsilon) / sums[None, :] return np.asarray(landscape)[0, :]
[docs] def classMap(optimizedModel, activity, prior="estimated"): r"""Computes GTM class map based on discrete activities (= discrete labels) Parameters ========== optimizedModel: an instance of :class:`~ugtm.ugtm_classes.OptimizedGTM` The optimized GTM model. activity: array of shape (n_individuals,1) Activity vector (discrete labels) associated with the data used to compute the optimized GTM model. prior: {estimated, equiprobable}, optional Type of prior used for Bayesian classifier. "equiprobable" assigns the same weight to all classes: :math:`P(C_i)=1/N_{classes}`. "estimated" accounts for class imbalance using the number of individuals in each class :math:`N(C_i)`: :math:`P(C_i)=N_{C_i}/N_{total}` Returns ======= instance of :class:`~ugtm.ugtm_landscape.ClassMap` Computes a GTM bayesian model and returns an instance of :class:`~ugtm.ugtm_landscape.ClassMap`. Notes ===== This function computes the likelihood of each GTM node given a class, the posterior probabilities of each class (using Bayes' theorem), and the class attributed to each node: 1. output.nodeClassT: likelihood of each node :math:`k` given class :math:`C_i`: :math:`P(k|C_i) = \frac{\sum_{i_{c}}R_{i_{c},k}}{N_c}`. 2. output.nodeClassP: posterior probabilities of each class :math:`C_i` for each node :math:`k`, using piors :math:`P(C_i)`: :math:`P(C_i|k) =\frac{P(k|C_i)P(C_i)}{\sum_i P(k|C_i)P(C_i)}` 3. output.activityModel: Class label attributed to each GTM node on the GTM node grid. Computed using argmax of posterior probabilities. """ uniqClasses, classVector = np.unique(activity, return_inverse=True) nClasses = uniqClasses.shape[0] n_nodes = optimizedModel.matR.shape[1] # posterior distribution nodeClassP = np.zeros([n_nodes, nClasses]) # likelihood nodeClassT = np.zeros([n_nodes, nClasses]) sumClass = np.zeros([nClasses]) summe = np.zeros([n_nodes]) for i in range(nClasses): sumClass[i] = (classVector == i).sum() if prior == "estimated": priors = sumClass/sumClass.sum() elif prior == "equiprobable": priors = np.zeros([nClasses])+(1.0/nClasses) for i in range(nClasses): for k in range(n_nodes): nodeClassT[k, i] = optimizedModel.matR[classVector == i, k].sum()/sumClass[i] for i in range(nClasses): for k in range(n_nodes): nodeClassP[k, i] = nodeClassT[k, i]*priors[i] summe[k] += nodeClassP[k, i] for i in range(nClasses): for k in range(n_nodes): if summe[k] != 0.0: nodeClassP[k, i] = nodeClassP[k, i]/summe[k] for k in range(n_nodes): if summe[k] == 0.0: for i in range(nClasses): nodeClassP[k, i] = 1/nClasses nodeClass = np.argmax(nodeClassP, axis=1) return(ClassMap(nodeClassP, nodeClassT, nodeClass, uniqClasses))