Source code for ugtm.ugtm_classes

"""Defines classes for initial and optimized GTM model.
"""
# Authors: Helena A. Gaspar <hagax8@gmail.com>
# License: MIT

from __future__ import print_function
import numpy as np


[docs] class ReturnU(object): def __init__(self, matU, betaInv): self.matU = matU self.betaInv = betaInv
[docs] class InitialGTM(object): r"""Class for initial GTM model. Arguments ---------- matX : array of shape (n_nodes, 2) Coordinates of nodes defining a grid in the 2D space. matM : array of shape (n_rbf_centers, 2) Coordinates of radial basis function (RBF) centers, defining a grid in the 2D space. n_nodes : int The number of nodes defining a grid in the 2D space. n_rbf_centers : int The number of radial basis function (RBF) centers. rbfWidth : float Initial radial basis function (RBF) width. This is set to the average of the minimum distance between RBF centers: :math:`rbfWidth=\sigma \times average(\mathbf{distances(rbf)}_{min})`, where :math:`sigma` is the GTM hyperparameter s. NB: if GTM hyperparameter s = 0 (not recommended), rbfWidth is set to the maximum distance between RBF centers. matPhiMPlusOne: array of shape (n_nodes, n_rbf_centers+1) RBF matrix plus one dimension to include a term for bias. matW: array of shape (n_dimensions, n_rbf_centers+1) Parameter matrix (PCA-initialized). matY: array of shape (n_dimensions, n_nodes) Manifold in n-dimensional space (projection of matX in data space); A point matY[:,i] is a center of Gaussian component in data space. :math:`\mathbf{Y}=\mathbf{W}\mathbf{\Phi}^T` betaInv: float Noise variance parameter for the data distribution. Written as :math:`\beta^{-1}` in the original paper. Initialized to be the larger between: (1) the 3rd eigenvalue of the data covariance matrix, (2) half the average distance between Gaussian component centers in the data space (matY matrix). n_dimensions: int Data space dimensionality (number of variables). """ def __init__(self, matX, matM, n_nodes, n_rbf_centers, rbfWidth, matPhiMPlusOne, matW, matY, betaInv, n_dimensions): r"""Constructor for InitialGTM class. Parameters ---------- matX : array of shape (n_nodes, 2) Coordinates of nodes defining a grid in the 2D space. matM : array of shape (n_rbf_centers, 2) Coordinates of radial basis function (RBF) centers, defining a grid in the 2D space. n_nodes : int The number of nodes defining a grid in the 2D space. n_rbf_centers : int The number of radial basis function (RBF) centers. rbfWidth : float Initial radial basis function (RBF) width. This is set to the average of the minimum distance between RBF centers: :math:`rbfWidth=\sigma \times average(\mathbf{distances(rbf)}_{min})`, where :math:`sigma` is the GTM hyperparameter s. NB: if GTM hyperparameter s = 0 (not recommended), rbfWidth is set to the maximum distance between RBF centers. matPhiMPlusOne: array of shape (n_nodes, n_rbf_centers+1) RBF matrix plus one dimension to include a term for bias. matW: array of shape (n_dimensions, n_rbf_centers+1) Parameter matrix (PCA-initialized). matY: array of shape (n_dimensions, n_nodes) Manifold in n-dimensional space (projection of matX in data space); A point matY[:,i] is a Gaussian component center in data space. :math:`\mathbf{Y}=\mathbf{W}\mathbf{\Phi}^T` betaInv: float Noise variance parameter for the data distribution. Written as :math:`\beta^{-1}` in the original paper. Initialized to be the larger between: (1) the 3rd eigenvalue of the data covariance matrix, (2) half the average distance between Gaussian component centers in the data space (matY matrix). n_dimensions: int Data space dimensionality (number of variables). """ self.matX = matX self.matM = matM self.n_rbf_centers = n_rbf_centers self.n_nodes = n_nodes self.rbfWidth = rbfWidth self.matPhiMPlusOne = matPhiMPlusOne self.matW = matW self.matY = matY self.betaInv = betaInv self.n_dimensions = n_dimensions
[docs] class OptimizedGTM(object): r"""Class for optimized GTM model. Attributes ---------- matX : array of shape (n_nodes, 2) Coordinates of nodes defining a grid in the 2D space. matW : array of shape (n_dimensions, n_rbf_centers+1) Parameter matrix (PCA-initialized). matY : array of shape (n_dimensions, n_nodes) Manifold in n-dimensional space (projection of matX in data space). matY = np.dot(matW, np.transpose(matPhiMPlusOne)) matP : array of shape (n_individuals, n_nodes) Data distribution with variance betaInv. matR : array of shape (n_individuals, n_nodes) Responsibilities (posterior probabilities), used to compute data representations: means (matMeans) and modes (matModes). Responsibilities are the main output of GTM. matR[i,:] represents the responsibility vector for an instance i. The columns in matR correspond to rows in matX (nodes). betaInv: float Noise variance parameter for the data distribution. Written as :math:`\beta^{-1}` in the original paper. matMeans : array of shape (n_individuals, 2) Data representation in 2D space: means (most commonly used for GTM). matModes : array of shape(n_individuals, 2) Data representation in 2D space: modes (for each instance, coordinate with highest responsibility). n_dimensions : int Data space dimensionality (number of variables). converged : bool True if the model has converged; otherwise False. """ def __init__(self, matW, matY, matP, matR, betaInv, matMeans, matModes, matX, n_dimensions, converged): r"""Constructor for OptimizedGTM class. Parameters ---------- matX : array of shape (n_nodes, 2) Coordinates of nodes defining a grid in the 2D space. matW : array of shape (n_dimensions, n_rbf_centers+1) Parameter matrix (PCA-initialized). matY : array of shape (n_dimensions, n_nodes) Manifold in n-dimensional space (projection of matX in data space). matY = np.dot(matW, np.transpose(matPhiMPlusOne)) matP : array of shape (n_individuals, n_nodes) Data distribution with variance betaInv. matR : array of shape (n_individuals, n_nodes) Responsibilities (posterior probabilities), used to compute data representations: means (matMeans) and modes (matModes). Responsibilities are the main output of GTM. matR[i,:] represents the responsibility vector for an instance i. The columns in matR correspond to rows in matX (nodes). betaInv: float Noise variance parameter for the data distribution. Written as :math:`\beta^{-1}` in the original paper. matMeans : array of shape (n_individuals, 2) Data representation in 2D space: means (most commonly used for GTM). matModes : array of shape(n_individuals, 2) Data representation in 2D space: modes (for each instance, coordinate with highest responsibility). n_dimensions : int Data space dimensionality (number of variables). converged : bool True if the model has converged; otherwise False. """ self.matW = matW self.matY = matY self.matP = matP self.matR = matR self.betaInv = betaInv self.matMeans = matMeans self.matModes = matModes self.matX = matX self.n_dimensions = n_dimensions self.converged = converged
[docs] def write(self, output="output"): """Write optimized GTM model: means, modes and responsibilities. Parameters ---------- output : str, optional (default = 'output') Output path. Returns ------- CSV files Separate files for (1) means (mean position for each data point), (2) modes (node with max. responsibility for each data point), (3) responsibilities (posterior probabilities for each data point) """ np.savetxt(fname=output+"_responsibilities.csv", X=self.matR, delimiter=",") np.savetxt(fname=output+"_coordinates.csv", X=self.matMeans, delimiter=",") np.savetxt(fname=output+"_modes.csv", X=self.matModes, delimiter=",") print("") print("Wrote to disk:") print("") print("%s: responsibilities, which represent " "each individual's encoding " "on the map (dimensions=n_individuals*n_nodes_on_the_map)" % (output+"_responsibilities.csv")) print("") print("%s: coordinates to plot, which represent each individual's " "mean position on the map (dimensions = " "n_individuals*n_latent_dimensions)" % (output+"_coordinates.csv")) print("") print("%s: modes positions for each individual on the map " "(node with max probability for the individual; " "dimensions = n_individuals*n_latent_dimensions)" % (output+"_modes.csv")) print("") print("")
[docs] def write_all(self, output="output"): """Write optimized GTM model and optimized parameters. Parameters ---------- output : str, optional (default = 'output') Output path. Returns ------- CSV files Separate files for (1) means (mean position for each data point), (2) modes (node with max. responsibility for each data point), (3) responsibilities (posterior probabilities for each data point), (4) initial space dimension and data distribution variance, (5) manifold coordinates (matY), (6) parameter matrix (matW) """ outparams = "n_dimensions:"+str(self.n_dimensions) + \ "\n"+"variance:"+str(self.betaInv) np.savetxt(fname=output+"_responsibilities.csv", X=self.matR, delimiter=",") np.savetxt(fname=output+"_coordinates.csv", X=self.matMeans, delimiter=",") np.savetxt(fname=output+"_modes.csv", X=self.matModes, delimiter=",") np.savetxt(fname=output+"_manifold.csv", X=self.matY, delimiter=",") np.savetxt(fname=output+"_parametersMatrix.csv", X=self.matW, delimiter=",") np.savetxt(fname=output+"_dimensionsAndVariance.csv", X=outparams) print("") print("Wrote to disk:") print("") print("%s: responsibilities, which represent " "each individual's encoding on the map " "(dimensions=n_individuals*n_nodes_on_the_map)" % (output+"_responsibilities.csv")) print("") print("%s: coordinates to plot, which represent each individual's " "mean position on the map " "(dimensions = n_individuals*n_latent_dimensions)" % (output+"_coordinates.csv")) print("") print("%s: modes positions for each individual on the map " "(node with max probability for the individual; " "dimensions = n_individuals*n_latent_dimensions)" % (output+"_modes.csv")) print("") print("%s: manifold coordinates in the initial data space " "(dimensions: n_data_dimensions*n_points_on_manifold" % (output+"_manifold.csv")) print("") print("%s: parameters matrix" % (output+"_parametersMatrix.csv")) print("") print("%s: initial space and variance" % (output+"_dimensionsAndVariance.csv")) print("") print("")